Skip to main content

lua_types/
string.rs

1//! `LuaString` — Lua's byte-string (NOT UTF-8). PORT_STRATEGY §3.3.
2//!
3//! Phase A-C: a simple `Box<[u8]>`-backed struct with a short/long flag.
4//! Phase D may revisit for interning + content-hash equality.
5
6/// Lua's immutable byte-string value.
7///
8/// The byte payload is a `Box<[u8]>`, NOT an `Rc<[u8]>`. Strings are immutable
9/// and GC-owned: every live `LuaString` is reached through a `GcRef<LuaString>`
10/// (the interner stores `GcRef`s, `LuaValue::Str` holds a `GcRef`), and all
11/// value-level sharing happens at that `GcRef` layer. An `Rc<[u8]>` would
12/// co-locate a 16-byte refcount header (strong + weak counts) with the payload
13/// in the string's heap allocation, so every string allocation paid those 16
14/// bytes on top of its `GcBox<LuaString>` for a refcount machinery nothing
15/// uses. Switching to `Box<[u8]>` drops the 16-byte header per string and the
16/// refcount inc/dec traffic; the win is in the heap allocation, not the struct
17/// field (both `Rc<[u8]>` and `Box<[u8]>` are 16-byte fat pointers).
18///
19/// The `#[derive(Clone)]` is retained, but a by-value `LuaString` clone is now
20/// a deep copy (alloc + memcpy) rather than a refcount bump. This is acceptable
21/// because no hot path clones a `LuaString` by value — hot sharing goes through
22/// the `Copy` `GcRef<LuaString>` handle. The only by-value clones are cold
23/// (error-message construction, `GlobalState` init).
24#[derive(Debug, Clone)]
25pub struct LuaString {
26    bytes: Box<[u8]>,
27    is_short: bool,
28    hash: u32,
29}
30
31impl LuaString {
32    pub fn from_bytes(b: Vec<u8>) -> Self {
33        let is_short = b.len() <= 40;
34        let hash = Self::hash_bytes(&b, 0);
35        LuaString {
36            bytes: b.into_boxed_slice(),
37            is_short,
38            hash,
39        }
40    }
41
42    /// Construct directly from a borrowed slice with a single allocating copy.
43    ///
44    /// `from_bytes` takes an owned `Vec<u8>`, but `Vec<u8> -> Box<[u8]>` via
45    /// `into_boxed_slice` only adopts the existing buffer when it is exactly
46    /// full, otherwise it reallocates; a caller holding only a slice would copy
47    /// twice (once into a `Vec`, once into the `Box`). `Box::from(&[u8])` copies
48    /// the slice straight into the final allocation, matching C's single
49    /// `luaS_newlstr` allocation per string. Hash is computed with the same
50    /// algorithm as `from_bytes`.
51    pub fn from_slice(b: &[u8]) -> Self {
52        let is_short = b.len() <= 40;
53        let hash = Self::hash_bytes(b, 0);
54        LuaString {
55            bytes: Box::from(b),
56            is_short,
57            hash,
58        }
59    }
60
61    pub fn placeholder() -> Self {
62        Self::from_bytes(Vec::new())
63    }
64
65    pub fn as_bytes(&self) -> &[u8] {
66        &self.bytes
67    }
68    pub fn len(&self) -> usize {
69        self.bytes.len()
70    }
71    pub fn is_empty(&self) -> bool {
72        self.bytes.is_empty()
73    }
74    pub fn is_short(&self) -> bool {
75        self.is_short
76    }
77    pub fn is_long(&self) -> bool {
78        !self.is_short
79    }
80    pub fn hash(&self) -> u32 {
81        self.hash
82    }
83    pub fn buffer_bytes(&self) -> usize {
84        self.bytes.len() + 2 * std::mem::size_of::<usize>()
85    }
86
87    pub fn is_reserved_word(&self) -> bool {
88        // TODO(port): proper reserved-word check via lexer's token enum.
89        false
90    }
91
92    pub fn hash_bytes(bytes: &[u8], seed: u32) -> u32 {
93        // Stub WyHash. Real impl ports bun_wyhash. Stable for now so
94        // intern-table equality works.
95        let mut h: u32 = seed.wrapping_add(0x9e3779b9);
96        for &b in bytes {
97            h = h.wrapping_mul(31).wrapping_add(b as u32);
98        }
99        h
100    }
101
102    pub fn hash_long(&mut self) -> u32 {
103        self.hash
104    }
105}
106
107impl PartialEq for LuaString {
108    fn eq(&self, other: &Self) -> bool {
109        self.bytes == other.bytes
110    }
111}
112impl Eq for LuaString {}
113
114// ──────────────────────────────────────────────────────────────────────────────
115// PORT STATUS
116//   source:        src/lstring.h, src/lstring.c (TString)
117//   target_crate:  lua-types
118//   confidence:    high
119//   todos:         0
120//   port_notes:    0
121//   unsafe_blocks: 0
122//   notes:         LuaString interned-string type. Mirrors C's TString with the short/long
123//                  variant distinction and the hash field; uses GcRef-style ptr
124//                  identity for interning. Byte payload is Box<[u8]>, not Rc<[u8]>:
125//                  strings are immutable and shared at the GcRef level, so the Rc
126//                  refcount header (16 B/string) was pure overhead. Box drops it.
127// ──────────────────────────────────────────────────────────────────────────────