1use rustc_hash::{FxHashMap, FxHasher};
11use serde::Serialize;
12use std::hash::{Hash, Hasher};
13use std::sync::{Arc, RwLock};
14
15#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Default, PartialOrd, Ord)]
20pub struct Atom(pub u32);
21
22impl Atom {
23 pub const NONE: Self = Self(0);
25
26 #[must_use]
28 #[inline]
29 pub const fn none() -> Self {
30 Self::NONE
31 }
32
33 #[must_use]
35 #[inline]
36 pub const fn is_none(self) -> bool {
37 self.0 == 0
38 }
39
40 #[must_use]
42 #[inline]
43 pub const fn index(self) -> u32 {
44 self.0
45 }
46}
47
48const SHARD_BITS: u32 = 6;
49const SHARD_COUNT: usize = 64;
50const SHARD_MASK: u32 = 63;
51const SHARD_MASK_U64: u64 = 63;
52const COMMON_STRINGS: &[&str] = &[
53 "break",
55 "case",
56 "catch",
57 "class",
58 "const",
59 "continue",
60 "debugger",
61 "default",
62 "delete",
63 "do",
64 "else",
65 "enum",
66 "export",
67 "extends",
68 "false",
69 "finally",
70 "for",
71 "function",
72 "if",
73 "import",
74 "in",
75 "instanceof",
76 "new",
77 "null",
78 "return",
79 "super",
80 "switch",
81 "this",
82 "throw",
83 "true",
84 "try",
85 "typeof",
86 "undefined",
87 "var",
88 "void",
89 "while",
90 "with",
91 "as",
92 "implements",
93 "interface",
94 "let",
95 "package",
96 "private",
97 "protected",
98 "public",
99 "static",
100 "yield",
101 "any",
102 "boolean",
103 "number",
104 "string",
105 "symbol",
106 "type",
107 "from",
108 "of",
109 "async",
110 "await",
111 "id",
113 "name",
114 "value",
115 "length",
116 "key",
117 "index",
118 "item",
119 "data",
120 "error",
121 "result",
122 "response",
123 "request",
124 "options",
125 "config",
126 "props",
127 "state",
128 "children",
129 "onClick",
130 "onChange",
131 "onSubmit",
132 "constructor",
133 "prototype",
134 "toString",
135 "valueOf",
136 "hasOwnProperty",
137 "Array",
138 "Object",
139 "String",
140 "Number",
141 "Boolean",
142 "Function",
143 "Promise",
144 "Map",
145 "Set",
146 "Date",
147 "RegExp",
148 "Error",
149 "Symbol",
150 "console",
151 "log",
152 "warn",
153 "error",
154 "info",
155 "debug",
156 "document",
157 "window",
158 "global",
159 "process",
160 "module",
161 "exports",
162 "require",
163 "define",
164 "__dirname",
165 "__filename",
166];
167
168#[derive(Default, Clone, Debug)]
180pub struct Interner {
181 map: FxHashMap<Arc<str>, Atom>,
183 strings: Vec<Arc<str>>,
185}
186
187impl Interner {
188 #[must_use]
190 pub fn new() -> Self {
191 let mut interner = Self {
192 map: FxHashMap::default(),
193 strings: Vec::with_capacity(1024), };
195 let empty: Arc<str> = Arc::from("");
197 interner.strings.push(Arc::clone(&empty));
198 interner.map.insert(empty, Atom::NONE);
199 interner
200 }
201
202 #[must_use]
205 #[inline]
206 pub fn intern(&mut self, s: &str) -> Atom {
207 if let Some(&atom) = self.map.get(s) {
208 return atom;
209 }
210 let atom = Atom(u32::try_from(self.strings.len()).unwrap_or(Atom::NONE.0));
211 let owned: Arc<str> = Arc::from(s);
212 self.strings.push(Arc::clone(&owned));
213 self.map.insert(owned, atom);
214 atom
215 }
216
217 #[must_use]
219 #[inline]
220 pub fn intern_owned(&mut self, s: String) -> Atom {
221 if let Some(&atom) = self.map.get(s.as_str()) {
222 return atom;
223 }
224 let atom = Atom(u32::try_from(self.strings.len()).unwrap_or(Atom::NONE.0));
225 let owned: Arc<str> = Arc::from(s.into_boxed_str());
226 self.strings.push(Arc::clone(&owned));
227 self.map.insert(owned, atom);
228 atom
229 }
230
231 #[must_use]
234 #[inline]
235 pub fn resolve(&self, atom: Atom) -> &str {
236 self.strings.get(atom.0 as usize).map_or("", AsRef::as_ref)
237 }
238
239 #[must_use]
241 #[inline]
242 pub fn try_resolve(&self, atom: Atom) -> Option<&str> {
243 self.strings.get(atom.0 as usize).map(AsRef::as_ref)
244 }
245
246 #[must_use]
248 #[inline]
249 pub const fn len(&self) -> usize {
250 self.strings.len()
251 }
252
253 #[must_use]
255 #[inline]
256 pub const fn is_empty(&self) -> bool {
257 self.strings.len() <= 1
258 }
259
260 pub fn intern_common(&mut self) {
263 for s in COMMON_STRINGS {
264 let _ = self.intern(s);
265 }
266 }
267}
268
269#[derive(Default)]
270struct ShardState {
271 map: FxHashMap<Arc<str>, Atom>,
272 strings: Vec<Arc<str>>,
273}
274
275struct InternerShard {
276 state: RwLock<ShardState>,
277}
278
279impl InternerShard {
280 #[must_use]
281 fn new() -> Self {
282 Self {
283 state: RwLock::new(ShardState::default()),
284 }
285 }
286}
287
288pub struct ShardedInterner {
292 shards: [InternerShard; SHARD_COUNT],
293}
294
295impl ShardedInterner {
296 #[must_use]
298 pub fn new() -> Self {
299 let shards = std::array::from_fn(|_| InternerShard::new());
300
301 if let Ok(mut state) = shards[0].state.write() {
303 let empty: Arc<str> = Arc::from("");
304 state.strings.push(Arc::clone(&empty));
305 state.map.insert(empty, Atom::NONE);
306 }
307 Self { shards }
311 }
312
313 #[must_use]
316 #[inline]
317 pub fn intern(&self, s: &str) -> Atom {
318 if s.is_empty() {
319 return Atom::NONE;
320 }
321
322 let shard_idx = Self::shard_for(s);
323 let shard = &self.shards[shard_idx];
324 let Ok(mut state) = shard.state.write() else {
325 return Atom::NONE;
328 };
329
330 if let Some(&atom) = state.map.get(s) {
331 return atom;
332 }
333
334 let Ok(local_index) = u32::try_from(state.strings.len()) else {
335 return Atom::NONE;
336 };
337 if local_index > (u32::MAX >> SHARD_BITS) {
338 return Atom::NONE;
340 }
341
342 let shard_idx_u32 = u32::try_from(shard_idx).unwrap_or(Atom::NONE.0);
343 let atom = Self::make_atom(local_index, shard_idx_u32);
344 let owned: Arc<str> = Arc::from(s);
345 state.strings.push(Arc::clone(&owned));
346 state.map.insert(owned, atom);
347 atom
348 }
349
350 #[must_use]
352 #[inline]
353 pub fn intern_owned(&self, s: String) -> Atom {
354 if s.is_empty() {
355 return Atom::NONE;
356 }
357
358 let shard_idx = Self::shard_for(&s);
359 let shard = &self.shards[shard_idx];
360 let Ok(mut state) = shard.state.write() else {
361 return Atom::NONE;
363 };
364
365 if let Some(&atom) = state.map.get(s.as_str()) {
366 return atom;
367 }
368
369 let Ok(local_index) = u32::try_from(state.strings.len()) else {
370 return Atom::NONE;
371 };
372 if local_index > (u32::MAX >> SHARD_BITS) {
373 return Atom::NONE;
375 }
376
377 let shard_idx_u32 = u32::try_from(shard_idx).unwrap_or(Atom::NONE.0);
378 let atom = Self::make_atom(local_index, shard_idx_u32);
379 let owned: Arc<str> = Arc::from(s);
380 state.strings.push(Arc::clone(&owned));
381 state.map.insert(owned, atom);
382 atom
383 }
384
385 #[must_use]
388 #[inline]
389 pub fn resolve(&self, atom: Atom) -> Arc<str> {
390 self.try_resolve(atom).unwrap_or_else(|| Arc::from(""))
391 }
392
393 #[must_use]
395 #[inline]
396 pub fn try_resolve(&self, atom: Atom) -> Option<Arc<str>> {
397 let (shard_idx, local_index) = Self::split_atom(atom);
398 let shard = self.shards.get(shard_idx)?;
399 let state = shard.state.read().ok()?; state.strings.get(local_index).cloned()
401 }
402
403 #[must_use]
405 #[inline]
406 pub fn len(&self) -> usize {
407 self.shards
408 .iter()
409 .map(|shard| {
410 shard
412 .state
413 .read()
414 .map(|state| state.strings.len())
415 .unwrap_or(0)
416 })
417 .sum()
418 }
419
420 #[must_use]
422 #[inline]
423 pub fn is_empty(&self) -> bool {
424 self.len() <= 1
425 }
426
427 pub fn intern_common(&self) {
430 for s in COMMON_STRINGS {
431 let _ = self.intern(s);
432 }
433 }
434
435 #[inline]
436 fn shard_for(s: &str) -> usize {
437 let mut hasher = FxHasher::default();
438 s.hash(&mut hasher);
439 usize::try_from(hasher.finish() & SHARD_MASK_U64).unwrap_or(0)
440 }
441
442 #[inline]
443 const fn make_atom(local_index: u32, shard_idx: u32) -> Atom {
444 Atom((local_index << SHARD_BITS) | (shard_idx & SHARD_MASK))
445 }
446
447 #[inline]
448 fn split_atom(atom: Atom) -> (usize, usize) {
449 if atom == Atom::NONE {
450 return (0, 0);
451 }
452
453 let raw = atom.0;
454 let shard_idx = usize::try_from(raw & SHARD_MASK).unwrap_or(0);
455 let local_index = usize::try_from(raw >> SHARD_BITS).unwrap_or(0);
456 (shard_idx, local_index)
457 }
458}
459
460impl Default for ShardedInterner {
461 fn default() -> Self {
462 Self::new()
463 }
464}