1use serde::{Deserialize, Serialize};
3use std::{
4 borrow::Borrow,
5 fmt::{self, Debug},
6 hash::Hash,
7 sync::LazyLock,
8};
9
10use crate::{metrics::increment, position::TermPos};
11
12simple_counter::generate_counter!(GeneratedCounter, usize);
13static INTERNER: LazyLock<interner::Interner> = LazyLock::new(interner::Interner::new);
14
15#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
20#[serde(into = "&'static str", from = "String")]
21pub struct Ident(interner::Symbol);
22
23impl Ident {
24 pub fn new(s: impl AsRef<str>) -> Self {
25 Self(INTERNER.intern(s.as_ref()))
26 }
27
28 pub fn label(&self) -> &'static str {
30 INTERNER.lookup(self.0)
31 }
32
33 pub fn into_label(self) -> String {
34 self.label().to_owned()
35 }
36
37 pub fn fresh() -> Self {
46 increment!("Ident::fresh");
47 Self::new(format!("{}{}", GEN_PREFIX, GeneratedCounter::next()))
48 }
49
50 pub fn spanned(self, pos: TermPos) -> LocIdent {
52 LocIdent {
53 ident: self,
54 pos,
55 generated: self.label().starts_with(GEN_PREFIX),
56 }
57 }
58}
59
60impl fmt::Display for Ident {
61 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
62 write!(f, "{}", self.label())
63 }
64}
65
66impl fmt::Debug for Ident {
67 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
68 write!(f, "`{}`", self.label())
69 }
70}
71
72impl From<Ident> for LocIdent {
73 fn from(ident: Ident) -> Self {
74 ident.spanned(TermPos::None)
75 }
76}
77
78impl From<&LocIdent> for Ident {
79 fn from(ident: &LocIdent) -> Self {
80 ident.ident()
81 }
82}
83
84impl PartialOrd for Ident {
85 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
86 Some(self.cmp(other))
87 }
88}
89
90impl Ord for Ident {
91 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
92 self.label().cmp(other.label())
93 }
94}
95
96impl<'a> From<&'a str> for Ident {
97 fn from(s: &'a str) -> Self {
98 Ident::new(s)
99 }
100}
101
102impl From<String> for Ident {
103 fn from(s: String) -> Self {
104 Ident::new(s)
105 }
106}
107
108impl From<Ident> for &'static str {
109 fn from(id: Ident) -> &'static str {
110 id.label()
111 }
112}
113
114#[derive(Clone, Copy, Debug, Deserialize, Serialize)]
119#[serde(into = "String", from = "String")]
120pub struct LocIdent {
121 ident: Ident,
122 pub pos: TermPos,
123 generated: bool,
124}
125
126impl LocIdent {
127 pub fn new_with_pos(label: impl AsRef<str>, pos: TermPos) -> Self {
128 let generated = label.as_ref().starts_with(GEN_PREFIX);
129 Self {
130 ident: Ident::new(label),
131 pos,
132 generated,
133 }
134 }
135
136 pub fn new(label: impl AsRef<str>) -> Self {
137 Self::new_with_pos(label, TermPos::None)
138 }
139
140 pub fn with_pos(self, pos: TermPos) -> LocIdent {
142 LocIdent { pos, ..self }
143 }
144
145 pub fn fresh() -> Self {
147 Ident::fresh().into()
148 }
149
150 pub fn ident(&self) -> Ident {
152 self.ident
153 }
154
155 pub fn label(&self) -> &'static str {
157 self.ident.label()
158 }
159
160 pub fn into_label(self) -> String {
161 self.label().to_owned()
162 }
163}
164
165pub const GEN_PREFIX: char = '%';
168
169impl PartialOrd for LocIdent {
170 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
171 Some(self.cmp(other))
172 }
173}
174
175impl Ord for LocIdent {
176 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
177 self.label().cmp(other.label())
178 }
179}
180
181impl PartialEq for LocIdent {
182 fn eq(&self, other: &Self) -> bool {
183 self.ident == other.ident
184 }
185}
186
187impl Eq for LocIdent {}
188
189impl Hash for LocIdent {
190 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
191 self.ident.hash(state)
192 }
193}
194
195impl Borrow<Ident> for LocIdent {
196 fn borrow(&self) -> &Ident {
197 &self.ident
198 }
199}
200
201impl fmt::Display for LocIdent {
202 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
203 write!(f, "{}", self.label())
204 }
205}
206
207#[derive(Clone, Debug, Eq, PartialEq)]
211pub struct FastOrdIdent(pub Ident);
212
213impl PartialOrd for FastOrdIdent {
214 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
215 Some(self.cmp(other))
216 }
217}
218
219impl Ord for FastOrdIdent {
220 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
221 self.0.0.cmp(&other.0.0)
222 }
223}
224
225impl<'a> From<&'a str> for LocIdent {
226 fn from(s: &'a str) -> Self {
227 LocIdent::new(s)
228 }
229}
230
231impl From<String> for LocIdent {
232 fn from(s: String) -> Self {
233 LocIdent::new(s)
234 }
235}
236
237impl From<LocIdent> for &'static str {
238 fn from(id: LocIdent) -> &'static str {
239 id.label()
240 }
241}
242
243impl From<LocIdent> for String {
248 fn from(id: LocIdent) -> String {
249 id.label().to_owned()
250 }
251}
252
253impl LocIdent {
254 pub fn is_generated(&self) -> bool {
255 self.generated
256 }
257}
258
259impl AsRef<str> for LocIdent {
260 fn as_ref(&self) -> &str {
261 self.label()
262 }
263}
264
265mod interner {
266 use std::collections::HashMap;
267 use std::sync::{Mutex, RwLock};
268
269 use typed_arena::Arena;
270
271 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
274 pub struct Symbol(u32);
275
276 pub(crate) struct Interner(RwLock<InnerInterner>);
280
281 impl Interner {
282 pub(crate) fn new() -> Self {
284 Self(RwLock::new(InnerInterner::empty()))
285 }
286
287 pub(crate) fn intern(&self, string: impl AsRef<str>) -> Symbol {
290 self.0.write().unwrap().intern(string)
291 }
292
293 pub(crate) fn lookup(&self, sym: Symbol) -> &str {
298 unsafe { std::mem::transmute::<&'_ str, &'_ str>(self.0.read().unwrap().lookup(sym)) }
302 }
303 }
304
305 #[ouroboros::self_referencing]
307 struct InnerInterner {
308 arena: Mutex<Arena<u8>>,
310
311 #[borrows(arena)]
313 #[covariant]
314 map: HashMap<&'this str, Symbol>,
315
316 #[borrows(arena)]
318 #[covariant]
319 vec: Vec<&'this str>,
320 }
321
322 impl InnerInterner {
323 fn empty() -> Self {
325 Self::new(
326 Mutex::new(Arena::new()),
327 |_arena| HashMap::new(),
328 |_arena| Vec::new(),
329 )
330 }
331
332 fn intern(&mut self, string: impl AsRef<str>) -> Symbol {
335 if let Some(sym) = self.borrow_map().get(string.as_ref()) {
336 return *sym;
337 }
338 let in_string = unsafe {
342 std::mem::transmute::<&'_ str, &'_ str>(
343 self.borrow_arena()
344 .lock()
345 .unwrap()
346 .alloc_str(string.as_ref()),
347 )
348 };
349 let sym = Symbol(self.borrow_vec().len() as u32);
350 self.with_vec_mut(|v| v.push(in_string));
351 self.with_map_mut(|m| m.insert(in_string, sym));
352 sym
353 }
354 fn lookup(&self, sym: Symbol) -> &str {
362 self.borrow_vec()[sym.0 as usize]
363 }
364 }
365
366 #[cfg(test)]
367 mod tests {
368 use super::*;
369
370 #[test]
371 fn test_intern_then_lookup() {
372 let interner = Interner::new();
373 let test_string = "test_string";
374 let sym = interner.intern(test_string);
375 assert_eq!(interner.lookup(sym), test_string);
376 }
377
378 #[test]
379 fn test_intern_twice_has_same_symbol() {
380 let interner = Interner::new();
381 let test_string = "test_string";
382 let sym1 = interner.intern(test_string);
383 let sym2 = interner.intern(test_string);
384 assert_eq!(sym1, sym2);
385 }
386
387 #[test]
388 fn test_intern_two_different_has_different_symbols() {
389 let interner = Interner::new();
390 let sym1 = interner.intern("a");
391 let sym2 = interner.intern("b");
392 assert_ne!(sym1, sym2);
393 }
394
395 #[test]
396 fn test_large_number_of_interns() {
397 let interner = Interner::new();
398 for i in 0..10000 {
399 let i = i.to_string();
400 let sym = interner.intern(&i);
401 assert_eq!(i, interner.lookup(sym));
402 }
403 assert_eq!(10000, interner.0.read().unwrap().borrow_map().len());
404 assert_eq!(10000, interner.0.read().unwrap().borrow_vec().len());
405 for i in 0..10000 {
407 let i = i.to_string();
408 let sym = interner.intern(&i);
409 assert_eq!(i, interner.lookup(sym));
410 }
411 assert_eq!(10000, interner.0.read().unwrap().borrow_map().len());
412 assert_eq!(10000, interner.0.read().unwrap().borrow_vec().len());
413 }
414 }
415}