luna_core/runtime/
string.rs1use std::alloc::{Layout, alloc, dealloc, handle_alloc_error};
8use std::cell::Cell;
9use std::ptr;
10use std::slice;
11
12use crate::runtime::heap::{GcHeader, ObjTag};
13
14pub const MAX_SHORT_LEN: usize = 40;
17
18#[repr(C)]
22pub struct LuaStr {
23 pub(crate) hdr: GcHeader,
24 hnext: *mut LuaStr,
26 hash: Cell<u32>,
28 hashed: Cell<bool>,
29 short: bool,
30 len: u32,
31 }
33
34impl LuaStr {
35 pub fn len(&self) -> usize {
37 self.len as usize
38 }
39
40 pub fn is_empty(&self) -> bool {
42 self.len == 0
43 }
44
45 pub(crate) fn is_short(&self) -> bool {
46 self.short
47 }
48}
49
50impl crate::runtime::heap::Gc<LuaStr> {
55 pub fn as_bytes(&self) -> &[u8] {
57 unsafe { bytes_of(self.as_ptr()) }
59 }
60
61 pub fn hash(&self) -> u32 {
63 unsafe { hash_of(self.as_ptr()) }
65 }
66}
67
68pub(crate) unsafe fn bytes_of<'a>(p: *const LuaStr) -> &'a [u8] {
70 unsafe { slice::from_raw_parts(p.add(1) as *const u8, (*p).len as usize) }
71}
72
73pub(crate) unsafe fn hash_of(p: *const LuaStr) -> u32 {
75 unsafe {
76 if !(*p).hashed.get() {
77 (*p).hash.set(lua_hash(bytes_of(p), (*p).hash.get()));
78 (*p).hashed.set(true);
79 }
80 (*p).hash.get()
81 }
82}
83
84pub(crate) fn lua_hash(bytes: &[u8], seed: u32) -> u32 {
86 let mut h = seed ^ bytes.len() as u32;
87 for &b in bytes {
88 h ^= h
89 .wrapping_shl(5)
90 .wrapping_add(h.wrapping_shr(2))
91 .wrapping_add(b as u32);
92 }
93 h
94}
95
96fn layout(len: usize) -> Layout {
97 Layout::new::<LuaStr>()
98 .extend(Layout::array::<u8>(len).expect("string size overflows layout"))
99 .expect("string size overflows layout")
100 .0
101 .pad_to_align()
102}
103
104fn alloc_str(bytes: &[u8], short: bool, hash: u32, hashed: bool) -> *mut LuaStr {
105 let layout = layout(bytes.len());
106 unsafe {
108 let p = alloc(layout) as *mut LuaStr;
109 if p.is_null() {
110 handle_alloc_error(layout);
111 }
112 p.write(LuaStr {
113 hdr: GcHeader::new(ObjTag::Str),
114 hnext: ptr::null_mut(),
115 hash: Cell::new(hash),
116 hashed: Cell::new(hashed),
117 short,
118 len: bytes.len() as u32,
119 });
120 ptr::copy_nonoverlapping(bytes.as_ptr(), p.add(1) as *mut u8, bytes.len());
121 p
122 }
123}
124
125pub(crate) fn alloc_long(bytes: &[u8], seed: u32) -> *mut LuaStr {
126 debug_assert!(bytes.len() > MAX_SHORT_LEN);
127 alloc_str(bytes, false, seed, false)
128}
129
130pub(crate) unsafe fn free(p: *mut LuaStr) {
132 unsafe {
133 let l = layout((*p).len as usize);
134 ptr::drop_in_place(p);
135 dealloc(p as *mut u8, l);
136 }
137}
138
139pub(crate) struct StringTable {
141 buckets: Vec<*mut LuaStr>,
142 count: usize,
143}
144
145impl StringTable {
146 pub(crate) fn new() -> StringTable {
147 StringTable {
148 buckets: vec![ptr::null_mut(); 64],
149 count: 0,
150 }
151 }
152
153 pub(crate) fn intern(&mut self, bytes: &[u8], seed: u32) -> (*mut LuaStr, bool) {
155 debug_assert!(bytes.len() <= MAX_SHORT_LEN);
156 let h = lua_hash(bytes, seed);
157 let b = h as usize & (self.buckets.len() - 1);
158 let mut cur = self.buckets[b];
159 unsafe {
161 while !cur.is_null() {
162 if (*cur).len as usize == bytes.len() && bytes_of(cur) == bytes {
163 return (cur, false);
164 }
165 cur = (*cur).hnext;
166 }
167 }
168 if self.count >= self.buckets.len() {
169 self.grow();
170 }
171 let b = h as usize & (self.buckets.len() - 1);
172 let p = alloc_str(bytes, true, h, true);
173 unsafe {
175 (*p).hnext = self.buckets[b];
176 }
177 self.buckets[b] = p;
178 self.count += 1;
179 (p, true)
180 }
181
182 fn grow(&mut self) {
183 let mut nb = vec![ptr::null_mut(); self.buckets.len() * 2];
184 let mask = nb.len() - 1;
185 for &head in &self.buckets {
186 let mut cur = head;
187 while !cur.is_null() {
188 unsafe {
190 let next = (*cur).hnext;
191 let b = (*cur).hash.get() as usize & mask;
192 (*cur).hnext = nb[b];
193 nb[b] = cur;
194 cur = next;
195 }
196 }
197 }
198 self.buckets = nb;
199 }
200
201 pub(crate) fn remove(&mut self, p: *mut LuaStr) {
203 unsafe {
205 let b = (*p).hash.get() as usize & (self.buckets.len() - 1);
206 let mut cur: *mut *mut LuaStr = &mut self.buckets[b];
207 while !(*cur).is_null() {
208 if *cur == p {
209 *cur = (*p).hnext;
210 self.count -= 1;
211 return;
212 }
213 cur = &mut (**cur).hnext;
214 }
215 unreachable!("interned string missing from string table");
216 }
217 }
218}
219
220pub(crate) fn alloc_size(len: usize) -> usize {
222 layout(len).size()
223}
224
225#[cfg(test)]
226mod tests {
227 use crate::runtime::heap::Heap;
228
229 #[test]
230 fn short_strings_are_interned() {
231 let mut heap = Heap::new();
232 let a = heap.intern(b"hello");
233 let b = heap.intern(b"hello");
234 let c = heap.intern(b"world");
235 assert!(a.ptr_eq(b));
236 assert!(!a.ptr_eq(c));
237 assert_eq!(heap.live_objects(), 2);
238 assert_eq!(a.as_bytes(), b"hello");
239 }
240
241 #[test]
242 fn long_strings_are_not_interned() {
243 let mut heap = Heap::new();
244 let bytes = [0xAAu8; 64]; let a = heap.intern(&bytes);
246 let b = heap.intern(&bytes);
247 assert!(!a.ptr_eq(b));
248 assert_eq!(a.as_bytes(), b.as_bytes());
249 assert_eq!(a.hash(), b.hash());
251 }
252
253 #[test]
254 fn arbitrary_bytes_roundtrip() {
255 let mut heap = Heap::new();
256 let bytes: Vec<u8> = (0..=255).collect();
257 let s = heap.intern(&bytes);
258 assert_eq!(s.as_bytes(), &bytes[..]);
259 assert_eq!(s.len(), 256);
260 }
261
262 #[test]
263 fn interning_survives_table_growth() {
264 let mut heap = Heap::new();
265 let first = heap.intern(b"key000");
266 for i in 0..2000 {
268 heap.intern(format!("key{i:03}").as_bytes());
269 }
270 let again = heap.intern(b"key000");
271 assert!(first.ptr_eq(again));
272 }
273}