lua_types/string.rs
1//! `LuaString` — Lua's byte-string (NOT UTF-8). PORT_STRATEGY §3.3.
2//!
3//! Phase A-C: a simple `Box<[u8]>`-backed struct with a short/long flag.
4//! Phase D may revisit for interning + content-hash equality.
5
6/// Lua's immutable byte-string value.
7///
8/// The byte payload is a `Box<[u8]>`, NOT an `Rc<[u8]>`. Strings are immutable
9/// and GC-owned: every live `LuaString` is reached through a `GcRef<LuaString>`
10/// (the interner stores `GcRef`s, `LuaValue::Str` holds a `GcRef`), and all
11/// value-level sharing happens at that `GcRef` layer. An `Rc<[u8]>` would
12/// co-locate a 16-byte refcount header (strong + weak counts) with the payload
13/// in the string's heap allocation, so every string allocation paid those 16
14/// bytes on top of its `GcBox<LuaString>` for a refcount machinery nothing
15/// uses. Switching to `Box<[u8]>` drops the 16-byte header per string and the
16/// refcount inc/dec traffic; the win is in the heap allocation, not the struct
17/// field (both `Rc<[u8]>` and `Box<[u8]>` are 16-byte fat pointers).
18///
19/// The `#[derive(Clone)]` is retained, but a by-value `LuaString` clone is now
20/// a deep copy (alloc + memcpy) rather than a refcount bump. This is acceptable
21/// because no hot path clones a `LuaString` by value — hot sharing goes through
22/// the `Copy` `GcRef<LuaString>` handle. The only by-value clones are cold
23/// (error-message construction, `GlobalState` init).
24#[derive(Debug, Clone)]
25pub struct LuaString {
26 bytes: Box<[u8]>,
27 is_short: bool,
28 hash: u32,
29}
30
31impl LuaString {
32 pub fn from_bytes(b: Vec<u8>) -> Self {
33 let is_short = b.len() <= 40;
34 let hash = Self::hash_bytes(&b, 0);
35 LuaString {
36 bytes: b.into_boxed_slice(),
37 is_short,
38 hash,
39 }
40 }
41
42 /// Construct directly from a borrowed slice with a single allocating copy.
43 ///
44 /// `from_bytes` takes an owned `Vec<u8>`, but `Vec<u8> -> Box<[u8]>` via
45 /// `into_boxed_slice` only adopts the existing buffer when it is exactly
46 /// full, otherwise it reallocates; a caller holding only a slice would copy
47 /// twice (once into a `Vec`, once into the `Box`). `Box::from(&[u8])` copies
48 /// the slice straight into the final allocation, matching C's single
49 /// `luaS_newlstr` allocation per string. Hash is computed with the same
50 /// algorithm as `from_bytes`.
51 pub fn from_slice(b: &[u8]) -> Self {
52 let is_short = b.len() <= 40;
53 let hash = Self::hash_bytes(b, 0);
54 LuaString {
55 bytes: Box::from(b),
56 is_short,
57 hash,
58 }
59 }
60
61 pub fn placeholder() -> Self {
62 Self::from_bytes(Vec::new())
63 }
64
65 pub fn as_bytes(&self) -> &[u8] {
66 &self.bytes
67 }
68 pub fn len(&self) -> usize {
69 self.bytes.len()
70 }
71 pub fn is_empty(&self) -> bool {
72 self.bytes.is_empty()
73 }
74 pub fn is_short(&self) -> bool {
75 self.is_short
76 }
77 pub fn is_long(&self) -> bool {
78 !self.is_short
79 }
80 pub fn hash(&self) -> u32 {
81 self.hash
82 }
83 pub fn buffer_bytes(&self) -> usize {
84 self.bytes.len() + 2 * std::mem::size_of::<usize>()
85 }
86
87 pub fn is_reserved_word(&self) -> bool {
88 // TODO(port): proper reserved-word check via lexer's token enum.
89 false
90 }
91
92 pub fn hash_bytes(bytes: &[u8], seed: u32) -> u32 {
93 // Stub WyHash. Real impl ports bun_wyhash. Stable for now so
94 // intern-table equality works.
95 let mut h: u32 = seed.wrapping_add(0x9e3779b9);
96 for &b in bytes {
97 h = h.wrapping_mul(31).wrapping_add(b as u32);
98 }
99 h
100 }
101
102 pub fn hash_long(&mut self) -> u32 {
103 self.hash
104 }
105}
106
107impl PartialEq for LuaString {
108 fn eq(&self, other: &Self) -> bool {
109 self.bytes == other.bytes
110 }
111}
112impl Eq for LuaString {}
113
114// ──────────────────────────────────────────────────────────────────────────────
115// PORT STATUS
116// source: src/lstring.h, src/lstring.c (TString)
117// target_crate: lua-types
118// confidence: high
119// todos: 0
120// port_notes: 0
121// unsafe_blocks: 0
122// notes: LuaString interned-string type. Mirrors C's TString with the short/long
123// variant distinction and the hash field; uses GcRef-style ptr
124// identity for interning. Byte payload is Box<[u8]>, not Rc<[u8]>:
125// strings are immutable and shared at the GcRef level, so the Rc
126// refcount header (16 B/string) was pure overhead. Box drops it.
127// ──────────────────────────────────────────────────────────────────────────────