lua_vm/string.rs
1//! String table and interned-string operations — port of `lstring.c` + `lstring.h`.
2//!
3//! Provides two key abstractions:
4//!
5//! - [`LuaStringImpl`]: the Lua string value, stored as a reference-counted byte slice.
6//! Short strings (`<= MAX_SHORT_LEN` bytes) are interned in the process-global
7//! [`StringPool`]; long strings are heap-allocated on each creation and never
8//! interned.
9//!
10//! - [`StringPool`]: the intern table for short strings, stored on `GlobalState`.
11//! Replaces the C `stringtable` struct, which used an open-addressing hash table
12//! with intrusive chaining through `TString.u.hnext`. In Rust the intrusive
13//! chain is dropped; a `HashMap` provides O(1) lookup and automatic rehashing.
14//! See PORT NOTE on [`StringPool`] for the full rationale.
15//!
16//! The `lstring.h` header is merged into this module per PORTING.md §1.
17//!
18//! # C source files
19//! - `reference/lua-5.4.7/src/lstring.c` (275 lines, 15 functions)
20//! - `reference/lua-5.4.7/src/lstring.h` (57 lines; merged here)
21
22#[allow(unused_imports)]
23use crate::prelude::*;
24use std::cell::Cell;
25use std::collections::HashMap;
26use std::rc::Rc;
27
28// TODO(port): these import paths will resolve once Phase B wires the crate graph.
29// `LuaState` and `GlobalState` live in crate::state (src/state.rs, from lstate.c).
30// `LuaValue` and `LuaError` live in lua_types (crates/lua-types/src/).
31use crate::state::LuaState;
32
33// PORT NOTE: `GcRef<T>` is the lua-types newtype around `Rc<T>` per PORT_STRATEGY §3.4.
34// Re-imported here so all string-pool entries share identity with state.rs / api.rs.
35use lua_types::GcRef;
36/// Phase-B bridge: converts a lua-vm rich `LuaStringImpl` into a `lua_types::LuaString`.
37/// The two types track different metadata (short/long flag, extra byte) and a real
38/// merge belongs in Phase B once `lua-types::LuaString` grows the needed fields.
39fn impl_to_lt(s: &GcRef<LuaStringImpl>) -> GcRef<lua_types::LuaString> {
40 // TODO(D-1c-bridge): allocation outside state context (free fn)
41 GcRef::new(lua_types::LuaString::from_bytes(s.as_bytes().to_vec()))
42}
43
44// ── Constants (lstring.h macros → macros.tsv) ─────────────────────────────────
45
46// macros.tsv: MEMERRMSG → const MEMERR_MSG: &[u8] = b"not enough memory"
47/// Pre-allocated OOM error message. Must be created before the allocator
48/// can fail so that the GC can always hand back a valid error string.
49pub(crate) const MEMERR_MSG: &[u8] = b"not enough memory";
50
51// macros.tsv: MINSTRTABSIZE → const MIN_STR_TAB_SIZE: usize = 128
52const MIN_STR_TAB_SIZE: usize = 128;
53
54// macros.tsv: STRCACHE_N → const STRCACHE_N: usize = 53
55const STRCACHE_N: usize = 53;
56
57// macros.tsv: STRCACHE_M → const STRCACHE_M: usize = 2
58const STRCACHE_M: usize = 2;
59
60// macros.tsv: LUAI_MAXSHORTLEN → const MAX_SHORT_LEN: usize = 40
61pub(crate) const MAX_SHORT_LEN: usize = 40;
62
63// macros.tsv: MAX_SIZE → const MAX_SIZE: usize = if size_of::<usize>() < size_of::<i64>() { usize::MAX } else { i64::MAX as usize }
64const MAX_SIZE: usize = if std::mem::size_of::<usize>() < std::mem::size_of::<i64>() {
65 usize::MAX
66} else {
67 i64::MAX as usize
68};
69
70// macros.tsv: luaM_limitN → std::cmp::min(n, usize::MAX / std::mem::size_of::<T>())
71// cast_int → x as i32
72// Rust: upper bound on the number of hash buckets; derived from MAX_INT / pointer size.
73const MAX_STR_TAB: usize = i32::MAX as usize / std::mem::size_of::<usize>();
74
75// macros.tsv: sizelstring → drop — Rust allocates via Box<[u8]> / Rc<[u8]>
76// PORT NOTE: dropped entirely; Rust uses Rc<[u8]> which carries its own length.
77
78// macros.tsv: luaS_newliteral → state.intern_str(b"...")
79// PORT NOTE: translated at call sites as `new_lstr(state, b"literal")`.
80
81// macros.tsv: isreserved → ts.is_reserved_word()
82// PORT NOTE: translated at call sites as the `LuaStringImpl::is_reserved_word()` method.
83
84// macros.tsv: eqshrstr → Rc::ptr_eq(a, b)
85// PORT NOTE: short strings are interned so pointer equality suffices.
86// Translated at call sites as `Rc::ptr_eq(a, b)`.
87
88// ── LuaStringImpl (was TString in lobject.h) ─────────────────────────────────────
89
90// PORT NOTE: `LuaStringImpl` corresponds to `TString` from `lobject.h`, which maps to
91// `src/object.rs` per file_deps.txt. It is defined here (in `string.rs`) because
92// `lstring.c` owns the string-table internals and most of the type's behaviour.
93// Phase B should reconcile: either keep it here and re-export from `object.rs`,
94// or move it there and import it from `string.rs`.
95
96/// Whether a Lua string is short (interned) or long (not interned).
97///
98/// Corresponds to `LUA_VSHRSTR` / `LUA_VLNGSTR` tags from `lobject.h`.
99///
100/// # C mapping (types.tsv)
101/// ```text
102/// LUA_VSHRSTR → LuaStringImpl::Short (shrlen holds length 0..=40)
103/// LUA_VLNGSTR → LuaStringImpl::Long (shrlen = 0xFF sentinel; u.lnglen holds length)
104/// ```
105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
106pub enum StringKind {
107 Short,
108 Long,
109}
110
111/// A Lua string: an immutable, reference-counted byte sequence.
112///
113/// Short strings (`<= MAX_SHORT_LEN = 40` bytes) are interned in the
114/// [`StringPool`] on `GlobalState`; two short strings with the same bytes
115/// are guaranteed to be the same `GcRef` (pointer equality via `Rc::ptr_eq`).
116///
117/// Long strings are heap-allocated independently and never interned. Their
118/// hash is computed lazily on first call to [`hash_long_str`] and cached via
119/// interior mutability (`Cell<u32>`).
120///
121/// # C mapping (types.tsv)
122/// ```text
123/// TString → LuaStringImpl
124/// TString.extra → extra: Cell<u8> (reserved-word idx for Short; hash-ready flag for Long)
125/// TString.shrlen → kind: StringKind (0xFF sentinel replaced by enum variant)
126/// TString.hash → hash: Cell<u32>
127/// TString.u.lnglen → bytes.len() (length implicit in Rc<[u8]>)
128/// TString.u.hnext → (removed) (intrusive chain gone; StringPool uses HashMap)
129/// TString.contents → bytes: Rc<[u8]>
130/// ```
131pub struct LuaStringImpl {
132 bytes: Rc<[u8]>,
133
134 // Replaced by the StringKind enum; length is implicit in bytes.len().
135 kind: StringKind,
136
137 // Using Cell<u32> so that `hash_long_str` can cache the hash through a
138 // shared `&LuaStringImpl` reference (interior mutability, single-threaded).
139 #[allow(dead_code)]
140 hash: Cell<u32>,
141
142 // Short strings: reserved-word token index (0 = not a keyword).
143 // Long strings: 0 = hash not yet computed; 1 = hash is valid.
144 extra: Cell<u8>,
145}
146
147impl LuaStringImpl {
148 /// Returns the string's bytes.
149 ///
150 /// macros.tsv: `getstr` / `getlngstr` / `getshrstr` → `ts.as_bytes()`
151 pub fn as_bytes(&self) -> &[u8] {
152 &self.bytes
153 }
154
155 /// Returns the byte length of the string.
156 ///
157 /// for Long. In Rust both cases are `bytes.len()`.
158 /// macros.tsv: `tsslen` → `ts.len()`
159 pub fn len(&self) -> usize {
160 self.bytes.len()
161 }
162
163 /// Returns `true` if this is a long (non-interned) string.
164 pub fn is_long(&self) -> bool {
165 self.kind == StringKind::Long
166 }
167
168 /// Returns `true` if this is a short (interned) string.
169 pub fn is_short(&self) -> bool {
170 self.kind == StringKind::Short
171 }
172
173 /// Returns `true` if this short string is a Lua reserved word.
174 ///
175 /// macros.tsv: `isreserved` → `ts.is_reserved_word()`
176 pub fn is_reserved_word(&self) -> bool {
177 self.kind == StringKind::Short && self.extra.get() > 0
178 }
179
180 /// GC color predicate. Returns `true` if this object is "white" (unreachable)
181 /// in the GC's current wave.
182 ///
183 /// macros.tsv: `iswhite` → `obj.is_white()`
184 ///
185 /// PORT NOTE: GC color management is deferred to Phase D. In Phases A–C all
186 /// objects are reachable via `Rc` reference counts and this always returns
187 /// `false` (nothing is white / unreachable).
188 pub fn is_white(&self) -> bool {
189 // TODO(port): Phase D — check the GC marked byte; stub returns false (all live)
190 false
191 }
192
193 /// Flip GC color from white to the current non-white (resurrect a dead object).
194 ///
195 /// macros.tsv: `changewhite` → `obj.flip_white()`
196 ///
197 /// PORT NOTE: GC color management deferred to Phase D; no-op in Phases A–C.
198 pub fn flip_white(&self) {
199 // TODO(port): Phase D — update the GC marked byte
200 }
201}
202
203impl PartialEq for LuaStringImpl {
204 /// Equality for Lua strings.
205 ///
206 /// For short strings (interned), pointer equality via `Rc::ptr_eq` is sufficient
207 /// and matches `eqshrstr` in C. For long strings, we fall back to byte
208 /// comparison, matching `luaS_eqlngstr` in C.
209 fn eq(&self, other: &Self) -> bool {
210 if self.kind == StringKind::Short && other.kind == StringKind::Short {
211 Rc::ptr_eq(&self.bytes, &other.bytes)
212 } else {
213 self.bytes == other.bytes
214 }
215 }
216}
217
218impl Eq for LuaStringImpl {}
219
220// ── StringPool (was stringtable in lstate.h) ──────────────────────────────────
221
222// PORT NOTE: `StringPool` corresponds to `stringtable` from `lstate.h`, which maps
223// to `src/state.rs` per file_deps.txt. It is defined here because `lstring.c`
224// owns all of the pool's mutation logic. Phase B should reconcile placement.
225//
226// The C `stringtable` used an open-addressing hash table where each bucket was
227// the head of an intrusive singly-linked list threaded through `TString.u.hnext`.
228// In Rust, `TString.u.hnext` is removed per types.tsv. The `HashMap` replaces
229// both the bucket array and the chain: it provides O(1) average-case lookup,
230// automatic rehashing, and eliminates the need for `tablerehash`.
231//
232// `nuse` and `size` are retained for parity with the C invariants that other
233// code may check (e.g. `growstrtab` tests `nuse >= size`).
234
235/// Intern table for short Lua strings. Lives on `GlobalState`.
236///
237/// # C mapping (types.tsv)
238/// ```text
239/// stringtable → StringPool
240/// stringtable.hash → map: HashMap<Box<[u8]>, GcRef<LuaStringImpl>>
241/// stringtable.nuse → nuse: usize
242/// stringtable.size → size: usize
243/// ```
244pub struct StringPool {
245 // PORT NOTE: keyed by owned byte slice; lookup by `&[u8]` via Borrow<[u8]>.
246 map: HashMap<Box<[u8]>, GcRef<LuaStringImpl>>,
247
248 // PERF(port): redundant with map.len() in Rust — keep for C-parity; remove in Phase B
249 nuse: usize,
250
251 // In Rust, HashMap manages its own capacity; this tracks the last requested size.
252 size: usize,
253}
254
255impl StringPool {
256 /// Create an empty pool with `MIN_STR_TAB_SIZE` preallocated capacity.
257 ///
258 /// `tablerehash(tb->hash, 0, MINSTRTABSIZE)` sequence in `luaS_init`.
259 pub fn new() -> Self {
260 StringPool {
261 map: HashMap::with_capacity(MIN_STR_TAB_SIZE),
262 nuse: 0,
263 size: MIN_STR_TAB_SIZE,
264 }
265 }
266}
267
268impl Default for StringPool {
269 fn default() -> Self {
270 Self::new()
271 }
272}
273
274// ── LuaUserData (was Udata in lobject.h) ──────────────────────────────────────
275
276// PORT NOTE: `LuaUserData` corresponds to `Udata` from `lobject.h`, which maps to
277// `src/object.rs` per file_deps.txt. Defined here because `luaS_newudata` lives
278// in `lstring.c`. Phase B should reconcile placement.
279
280/// Full userdata: a GC-tracked object carrying a raw byte payload plus optional
281/// Lua user values and an optional metatable.
282///
283/// # C mapping (types.tsv)
284/// ```text
285/// Udata → LuaUserData
286/// Udata.len → len: usize
287/// Udata.nuvalue → nuvalue: u16 (covered by uv.len() but kept for parity)
288/// Udata.metatable → metatable: Option<GcRef<LuaTable>>
289/// Udata.uv → uv: Vec<LuaValue>
290/// (no direct C field) data: Box<[u8]> — the raw byte payload; C used a flexible
291/// array member laid out past the Udata header via
292/// `udatamemoffset` alignment math.
293/// ```
294pub struct LuaUserDataImpl {
295 pub len: usize,
296 pub nuvalue: u16,
297 // TODO(port): GcRef<LuaTable> — LuaTable not yet defined; Phase B
298 pub metatable: Option<()>,
299 // macros.tsv: setnilvalue → *o = LuaValue::Nil
300 // TODO(port): Vec<LuaValue> — LuaValue not yet defined; Phase B
301 pub uv: Vec<()>,
302 // Port of the raw byte payload that C accessed via udatamemoffset arithmetic.
303 pub data: Box<[u8]>,
304}
305
306// ── Public functions ───────────────────────────────────────────────────────────
307
308// lstring.h: LUAI_FUNC → pub(crate)
309/// Hash a byte string with a seed using Lua's FNV-style hash.
310///
311/// This is a pure function with no allocations. The algorithm XORs shifts and
312/// additions over each byte in reverse order, seeded by `seed ^ len`.
313///
314/// # C source
315/// ```c
316///
317/// // unsigned int h = seed ^ cast_uint(l);
318/// // for (; l > 0; l--)
319/// // h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1]));
320/// // return h;
321/// // }
322/// ```
323///
324/// PORT NOTE: C parenthesises `(h<<5)` and `(h>>2)` explicitly, so the outer
325/// additions are unambiguous despite C's `<<`/`>>` having lower precedence than
326/// `+`. In Rust `<<` and `>>` have higher precedence than `+`, so the same
327/// expression is computed without extra parentheses; `wrapping_add` is used to
328/// match C's unsigned wrap-around arithmetic.
329pub(crate) fn hash_bytes(bytes: &[u8], seed: u32) -> u32 {
330 // macros.tsv: cast_uint → x as u32
331 let mut h: u32 = seed ^ (bytes.len() as u32);
332
333 let mut l = bytes.len();
334 while l > 0 {
335 l -= 1;
336 // macros.tsv: cast_byte → x as u8 (then as u32 for the arithmetic)
337 h ^= (h << 5).wrapping_add(h >> 2).wrapping_add(bytes[l] as u32);
338 }
339
340 h
341}
342
343//
344// PORT NOTE: `tablerehash` walked the intrusive `hnext` chain in each bucket and
345// redistributed `TString *` pointers into new bucket slots. In Rust the
346// `HashMap` in `StringPool` handles its own rehashing automatically whenever its
347// load factor is exceeded or `reserve` / `shrink_to` is called. The entire
348// function is therefore dropped; its effects are subsumed by the HashMap.
349
350// lstring.h: LUAI_FUNC → pub(crate)
351/// Resize the string intern table to approximately `nsize` buckets.
352///
353/// When growing, `HashMap::reserve` hints the desired capacity. When shrinking,
354/// `HashMap::shrink_to` is used as an approximation of the C logic, which
355/// would rehash entries out of the shrinking tail. The C function's graceful
356/// degradation on allocation failure (keep the current size) is preserved:
357/// `HashMap` will simply retain its existing capacity if memory is tight.
358///
359/// # C source
360/// ```c
361///
362/// // stringtable *tb = &G(L)->strt;
363/// // int osize = tb->size;
364/// // TString **newvect;
365/// // if (nsize < osize)
366/// // tablerehash(tb->hash, osize, nsize); /* depopulate shrinking part */
367/// // newvect = luaM_reallocvector(L, tb->hash, osize, nsize, TString*);
368/// // if (l_unlikely(newvect == NULL)) {
369/// // if (nsize < osize)
370/// // tablerehash(tb->hash, nsize, osize); /* restore to original size */
371/// // } else {
372/// // tb->hash = newvect;
373/// // tb->size = nsize;
374/// // if (nsize > osize)
375/// // tablerehash(newvect, osize, nsize);
376/// // }
377/// // }
378/// ```
379///
380/// PORT NOTE: The three calls to `tablerehash` are dropped because `HashMap`
381/// automatically rehashes. The allocation-failure fallback (restore to `osize`)
382/// has no direct analogue; `HashMap` will retain existing capacity on OOM, which
383/// matches the intent.
384// PERF(port): luaS_resize shrink — HashMap::shrink_to() is a hint, not a
385// guarantee; the C code freed exact memory. Profile in Phase B.
386pub(crate) fn resize(state: &mut LuaState, nsize: usize) {
387 let strt = &mut state.global_mut().strt;
388 let osize = strt.size;
389
390 if nsize > osize {
391 let additional = nsize.saturating_sub(strt.map.len());
392 strt.map.reserve(additional);
393 } else if nsize < osize {
394 // PERF(port): shrink_to is a hint; exact shrink not guaranteed in Rust
395 strt.map.shrink_to(nsize);
396 }
397
398 strt.size = nsize;
399}
400
401// lstring.h: LUAI_FUNC → pub(crate)
402/// Initialise the string intern table and the API string cache.
403///
404/// Must be called exactly once during VM startup, before any strings are created.
405/// Pre-creates the memory-error message and fixes it in the GC (so it is never
406/// collected), then fills every cache slot with that same string.
407///
408/// # C source
409/// ```c
410///
411/// // global_State *g = G(L);
412/// // int i, j;
413/// // stringtable *tb = &G(L)->strt;
414/// // tb->hash = luaM_newvector(L, MINSTRTABSIZE, TString*);
415/// // tablerehash(tb->hash, 0, MINSTRTABSIZE);
416/// // tb->size = MINSTRTABSIZE;
417/// // g->memerrmsg = luaS_newliteral(L, MEMERRMSG);
418/// // luaC_fix(L, obj2gco(g->memerrmsg));
419/// // for (i = 0; i < STRCACHE_N; i++)
420/// // for (j = 0; j < STRCACHE_M; j++)
421/// // g->strcache[i][j] = g->memerrmsg;
422/// // }
423/// ```
424pub(crate) fn init(state: &mut LuaState) -> Result<(), LuaError> {
425 // tablerehash(tb->hash, 0, MINSTRTABSIZE);
426 // tb->size = MINSTRTABSIZE;
427 // macros.tsv: luaM_newvector → vec![T::default(); n]
428 // PORT NOTE: StringPool::new() sets the initial capacity to MIN_STR_TAB_SIZE,
429 // replacing both the allocation and the tablerehash clear pass.
430 state.global_mut().strt = StringPool::new();
431
432 // macros.tsv: luaS_newliteral → state.intern_str(b"...")
433 let memerrmsg = new_lstr(state, MEMERR_MSG)?;
434
435 // macros.tsv: luaC_fix — not listed; it marks the object as fixed (non-collectable)
436 // TODO(port): call state.gc().fix(memerrmsg.clone()) when GC is wired in Phase D;
437 // in Phases A–C the Rc keeps it alive as long as GlobalState holds the clone
438 let memerrmsg_lt = impl_to_lt(&memerrmsg);
439 state.global_mut().memerrmsg = memerrmsg_lt.clone();
440
441 // for (j = 0; j < STRCACHE_M; j++)
442 // g->strcache[i][j] = g->memerrmsg;
443 for i in 0..STRCACHE_N {
444 for j in 0..STRCACHE_M {
445 state.global_mut().strcache[i][j] = memerrmsg_lt.clone();
446 }
447 }
448
449 Ok(())
450}
451
452// lstring.h: LUAI_FUNC → pub(crate)
453/// Create or retrieve a Lua string from `bytes`.
454///
455/// If `bytes.len() <= MAX_SHORT_LEN` (40), the string is interned: an existing
456/// identical short string is returned if found, otherwise a new one is created
457/// and inserted into the intern table.
458///
459/// If `bytes.len() > MAX_SHORT_LEN`, a new long string is allocated each time
460/// (long strings are never interned).
461///
462/// # C source
463/// ```c
464///
465/// // if (l <= LUAI_MAXSHORTLEN) /* short string? */
466/// // return internshrstr(L, str, l);
467/// // else {
468/// // TString *ts;
469/// // if (l_unlikely(l * sizeof(char) >= (MAX_SIZE - sizeof(TString))))
470/// // luaM_toobig(L);
471/// // ts = luaS_createlngstrobj(L, l);
472/// // memcpy(getlngstr(ts), str, l * sizeof(char));
473/// // return ts;
474/// // }
475/// // }
476/// ```
477pub(crate) fn new_lstr(
478 state: &mut LuaState,
479 bytes: &[u8],
480) -> Result<GcRef<LuaStringImpl>, LuaError> {
481 if bytes.len() <= MAX_SHORT_LEN {
482 intern_short_str(state, bytes)
483 } else {
484 // luaM_toobig(L);
485 // macros.tsv: luaM_toobig → return Err(LuaError::Memory)
486 // PORT NOTE: sizeof(TString) is a C-specific overhead; in Rust we just
487 // check that the byte count fits within MAX_SIZE.
488 if bytes.len() >= MAX_SIZE {
489 return Err(LuaError::Memory);
490 }
491
492 // memcpy(getlngstr(ts), str, l * sizeof(char));
493 // PORT NOTE: Rather than creating a zeroed buffer and then copying,
494 // we construct the LuaStringImpl directly from `bytes`.
495 let seed = state.global().seed;
496 let h = hash_bytes(bytes, seed);
497 let ts = create_str_obj(state, bytes, StringKind::Long, h);
498 Ok(ts)
499 }
500}
501
502// lstring.h: LUAI_FUNC → pub(crate)
503
504// ── Private helpers ───────────────────────────────────────────────────────────
505
506/// Allocate and initialise a new `LuaStringImpl` with the given bytes, kind, and hash.
507///
508/// In C, `createstrobj` allocated uninitialised memory via `luaC_newobj` and set
509/// the header fields; the caller then filled the content via `memcpy`. In Rust
510/// we construct the string directly from the provided `bytes`, eliminating the
511/// two-step pattern.
512///
513/// # C source
514/// ```c
515///
516/// // TString *ts;
517/// // GCObject *o;
518/// // size_t totalsize = sizelstring(l);
519/// // o = luaC_newobj(L, tag, totalsize);
520/// // ts = gco2ts(o);
521/// // ts->hash = h;
522/// // ts->extra = 0;
523/// // getstr(ts)[l] = '\0'; /* ending 0 */
524/// // return ts;
525/// // }
526/// ```
527///
528/// PORT NOTE: `sizelstring(l)` computed the total allocation size including the
529/// nul terminator. In Rust, `Rc<[u8]>` stores the bytes without a nul; the
530/// nul terminator is dropped. Callers that need a nul-terminated `*const u8`
531/// for FFI must use a temporary `CString` or equivalent.
532fn create_str_obj(
533 state: &mut LuaState,
534 bytes: &[u8],
535 kind: StringKind,
536 hash: u32,
537) -> GcRef<LuaStringImpl> {
538 // macros.tsv: luaM_newobject → state.gc().new_obj(tag, sz)
539 // TODO(port): register with GC tracking list (state.global_mut().allgc)
540 // in Phase D; Phase A–C creates a bare Rc
541 let _ = state; // state needed for GC registration in Phase D
542 // TODO(D-1c-bridge): LuaStringImpl is the rich local type; state helper produces lua_types::LuaString
543 GcRef::new(LuaStringImpl {
544 hash: Cell::new(hash),
545 extra: Cell::new(0),
546 // PORT NOTE: we receive bytes directly; no separate memcpy step needed
547 bytes: Rc::from(bytes),
548 kind,
549 })
550}
551
552/// Grow the string intern table, first attempting a GC collection if the table is
553/// at its absolute maximum size.
554///
555/// # C source
556/// ```c
557///
558/// // if (l_unlikely(tb->nuse == MAX_INT)) { /* too many strings? */
559/// // luaC_fullgc(L, 1); /* try to free some... */
560/// // if (tb->nuse == MAX_INT) /* still too many? */
561/// // luaM_error(L); /* cannot even create a message... */
562/// // }
563/// // if (tb->size <= MAXSTRTB / 2) /* can grow string table? */
564/// // luaS_resize(L, tb->size * 2);
565/// // }
566/// ```
567fn grow_str_tab(state: &mut LuaState) -> Result<(), LuaError> {
568 // macros.tsv: MAX_INT → i32::MAX
569 let nuse = state.global().strt.nuse;
570 if nuse == i32::MAX as usize {
571 // macros.tsv: luaC_fullgc → state.gc().full_collect()
572 // TODO(port): state.gc().full_collect() — GC not yet wired in Phase A–C; no-op
573 // (When GC is live this call may reduce nuse by sweeping dead short strings.)
574
575 // macros.tsv: luaM_error → return Err(LuaError::Memory)
576 if state.global().strt.nuse == i32::MAX as usize {
577 return Err(LuaError::Memory);
578 }
579 }
580
581 let size = state.global().strt.size;
582 if size <= MAX_STR_TAB / 2 {
583 resize(state, size * 2);
584 }
585
586 Ok(())
587}
588
589/// Look up `bytes` in the intern table; create and insert a new short string if
590/// not found.
591///
592/// The `isdead` / `changewhite` resurrection path is elided in Phases A–C because
593/// `Rc`-based reference counting keeps objects alive until all references drop
594/// (there are no dead-but-not-collected strings in Phase A–C).
595///
596/// # C source
597/// ```c
598///
599/// // TString *ts;
600/// // global_State *g = G(L);
601/// // stringtable *tb = &g->strt;
602/// // unsigned int h = luaS_hash(str, l, g->seed);
603/// // TString **list = &tb->hash[lmod(h, tb->size)];
604/// // lua_assert(str != NULL);
605/// // for (ts = *list; ts != NULL; ts = ts->u.hnext) {
606/// // if (l == ts->shrlen && (memcmp(str, getshrstr(ts), l) == 0)) {
607/// // if (isdead(g, ts)) changewhite(ts); /* resurrect it */
608/// // return ts;
609/// // }
610/// // }
611/// // if (tb->nuse >= tb->size) {
612/// // growstrtab(L, tb);
613/// // list = &tb->hash[lmod(h, tb->size)];
614/// // }
615/// // ts = createstrobj(L, l, LUA_VSHRSTR, h);
616/// // ts->shrlen = cast_byte(l);
617/// // memcpy(getshrstr(ts), str, l);
618/// // ts->u.hnext = *list;
619/// // *list = ts;
620/// // tb->nuse++;
621/// // return ts;
622/// // }
623/// ```
624///
625/// PORT NOTE: `lmod(h, tb->size)` (power-of-two bucket modulo via
626/// `macros.tsv: lmod → (s & (size - 1)) as usize`) and the `hnext` chain walk
627/// are both gone. `HashMap::get` replaces the linear bucket scan.
628fn intern_short_str(state: &mut LuaState, bytes: &[u8]) -> Result<GcRef<LuaStringImpl>, LuaError> {
629 // In Rust, &[u8] slices are never null; the assertion is trivially satisfied.
630
631 let seed = state.global().seed;
632 let h = hash_bytes(bytes, seed);
633
634 // PORT NOTE: intrusive hnext chain replaced by HashMap lookup
635 // Clone the existing GcRef<LuaStringImpl> so the immutable borrow on `state` ends
636 // before any mutable access below.
637 let existing = state.global().strt.map.get(bytes).cloned();
638 if let Some(ts) = existing {
639 // macros.tsv: isdead → g.is_dead(obj); changewhite → obj.flip_white()
640 // PORT NOTE: GC color management deferred to Phase D; in Phases A–C all
641 // Rc-held objects are live by definition (Rc keeps them alive).
642 return Ok(ts);
643 }
644
645 let needs_grow = {
646 let strt = &state.global().strt;
647 strt.nuse >= strt.size
648 };
649 if needs_grow {
650 grow_str_tab(state)?;
651 }
652
653 // ts->shrlen = cast_byte(l); — encoded in StringKind::Short
654 // memcpy(getshrstr(ts), str, l); — bytes passed directly to create_str_obj
655 let ts = create_str_obj(state, bytes, StringKind::Short, h);
656
657 state
658 .global_mut()
659 .strt
660 .map
661 .insert(bytes.to_vec().into_boxed_slice(), ts.clone());
662 state.global_mut().strt.nuse += 1;
663
664 Ok(ts)
665}
666
667// ── Re-export marker for type defined here ────────────────────────────────────
668
669// TODO(port): LuaError is used in function signatures above but is not yet defined
670// in lua-types. Phase B must add LuaError to lua-types/src/error.rs per
671// PORTING.md §6 before this file can compile. The expected variants are:
672// LuaError::Runtime(LuaValue)
673// LuaError::Memory
674// LuaError::Syntax(LuaValue)
675// ... (full list in PORTING.md §6)
676// For now, reference LuaError as an opaque import from the future lua-types crate.
677use lua_types::LuaError;
678
679// ──────────────────────────────────────────────────────────────────────────────
680// PORT STATUS
681// source: src/lstring.c (275 lines, 15 functions)
682// src/lstring.h (57 lines; merged)
683// target_crate: lua-vm
684// confidence: medium
685// todos: 14
686// port_notes: 30
687// unsafe_blocks: 0 (must be 0 outside explicit unsafe-budget crates)
688// notes: Logic is faithful to the C. The two largest structural changes
689// are: (1) `tablerehash` + intrusive `hnext` chain replaced by
690// `HashMap` in `StringPool`; (2) `luaS_new`'s `point2uint`
691// pointer-hash replaced by a content hash (safe, same semantics).
692// Key TODOs: GC registration in create_str_obj (Phase D),
693// GC registration in new_userdata (Phase D), luaC_fix in init
694// (Phase D), full_collect stub in grow_str_tab (Phase D),
695// udatamemoffset size check in new_userdata (Phase B),
696// LuaValue in LuaUserData.uv (Phase B), LuaError import path
697// (Phase B), GcRef typedef (Phase B). Phase B priority: wire
698// import paths for LuaState, GlobalState, LuaError, LuaValue,
699// and move LuaStringImpl/StringPool/LuaUserData to their canonical
700// modules (object.rs / state.rs).
701// ──────────────────────────────────────────────────────────────────────────────