1use serde::{Deserialize, Serialize};
3use std::{
4 borrow::Borrow,
5 fmt::{self, Debug},
6 hash::Hash,
7 sync::{
8 LazyLock,
9 atomic::{AtomicUsize, Ordering},
10 },
11};
12
13use crate::{metrics::increment, position::TermPos};
14
15static INTERNER: LazyLock<interner::Interner> = LazyLock::new(interner::Interner::new);
16static COUNTER: AtomicUsize = AtomicUsize::new(0);
17
18#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
23#[serde(into = "&'static str", from = "String")]
24pub struct Ident(interner::Symbol);
25
26impl Ident {
27 pub fn new(s: impl AsRef<str>) -> Self {
28 Self(INTERNER.get_or_intern(s.as_ref()))
29 }
30
31 pub fn label(&self) -> &'static str {
33 INTERNER.lookup(self.0)
34 }
35
36 pub fn into_label(self) -> String {
37 self.label().to_owned()
38 }
39
40 #[doc(hidden)]
47 pub fn find_generated(s: &str) -> Self {
48 Self(INTERNER.find_generated(s))
49 }
50
51 pub fn fresh() -> Self {
66 increment!("Ident::fresh");
67 Self(INTERNER.intern_generated(format!(
68 "{}{}",
69 GEN_PREFIX,
70 COUNTER.fetch_add(1, Ordering::Relaxed)
71 )))
72 }
73
74 pub fn spanned(self, pos: TermPos) -> LocIdent {
76 LocIdent { ident: self, pos }
77 }
78
79 pub fn is_generated(&self) -> bool {
81 INTERNER.is_generated(self.0)
82 }
83}
84
85impl fmt::Display for Ident {
86 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
87 write!(f, "{}", self.label())
88 }
89}
90
91impl fmt::Debug for Ident {
92 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93 if self.is_generated() {
94 write!(f, "`{}` (generated)", self.label())
95 } else {
96 write!(f, "`{}`", self.label())
97 }
98 }
99}
100
101impl From<Ident> for LocIdent {
102 fn from(ident: Ident) -> Self {
103 ident.spanned(TermPos::None)
104 }
105}
106
107impl From<&LocIdent> for Ident {
108 fn from(ident: &LocIdent) -> Self {
109 ident.ident()
110 }
111}
112
113impl PartialOrd for Ident {
114 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
115 Some(self.cmp(other))
116 }
117}
118
119impl Ord for Ident {
120 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
121 self.label().cmp(other.label())
122 }
123}
124
125impl<'a> From<&'a str> for Ident {
126 fn from(s: &'a str) -> Self {
127 Ident::new(s)
128 }
129}
130
131impl From<String> for Ident {
132 fn from(s: String) -> Self {
133 Ident::new(s)
134 }
135}
136
137impl From<Ident> for &'static str {
138 fn from(id: Ident) -> &'static str {
139 id.label()
140 }
141}
142
143#[derive(Clone, Copy, Debug, Deserialize, Serialize)]
148#[serde(into = "String", from = "String")]
149pub struct LocIdent {
150 ident: Ident,
151 pub pos: TermPos,
152}
153
154impl LocIdent {
155 pub fn new_with_pos(label: impl AsRef<str>, pos: TermPos) -> Self {
156 Self {
157 ident: Ident::new(label),
158 pos,
159 }
160 }
161
162 pub fn new(label: impl AsRef<str>) -> Self {
163 Self::new_with_pos(label, TermPos::None)
164 }
165
166 pub fn with_pos(self, pos: TermPos) -> LocIdent {
168 LocIdent { pos, ..self }
169 }
170
171 pub fn fresh() -> Self {
173 Ident::fresh().into()
174 }
175
176 pub fn ident(&self) -> Ident {
178 self.ident
179 }
180
181 pub fn label(&self) -> &'static str {
183 self.ident.label()
184 }
185
186 pub fn into_label(self) -> String {
187 self.label().to_owned()
188 }
189
190 pub fn is_generated(&self) -> bool {
195 self.ident.is_generated()
196 }
197}
198
199pub const GEN_PREFIX: char = '%';
202
203impl PartialOrd for LocIdent {
204 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
205 Some(self.cmp(other))
206 }
207}
208
209impl Ord for LocIdent {
210 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
211 self.label().cmp(other.label())
212 }
213}
214
215impl PartialEq for LocIdent {
216 fn eq(&self, other: &Self) -> bool {
217 self.ident == other.ident
218 }
219}
220
221impl Eq for LocIdent {}
222
223impl Hash for LocIdent {
224 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
225 self.ident.hash(state)
226 }
227}
228
229impl Borrow<Ident> for LocIdent {
230 fn borrow(&self) -> &Ident {
231 &self.ident
232 }
233}
234
235impl fmt::Display for LocIdent {
236 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
237 write!(f, "{}", self.label())
238 }
239}
240
241#[derive(Clone, Debug, Eq, PartialEq)]
245pub struct FastOrdIdent(pub Ident);
246
247impl PartialOrd for FastOrdIdent {
248 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
249 Some(self.cmp(other))
250 }
251}
252
253impl Ord for FastOrdIdent {
254 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
255 self.0.0.cmp(&other.0.0)
256 }
257}
258
259impl<'a> From<&'a str> for LocIdent {
260 fn from(s: &'a str) -> Self {
261 LocIdent::new(s)
262 }
263}
264
265impl From<String> for LocIdent {
266 fn from(s: String) -> Self {
267 LocIdent::new(s)
268 }
269}
270
271impl From<LocIdent> for &'static str {
272 fn from(id: LocIdent) -> &'static str {
273 id.label()
274 }
275}
276
277impl From<LocIdent> for String {
282 fn from(id: LocIdent) -> String {
283 id.label().to_owned()
284 }
285}
286
287impl AsRef<str> for LocIdent {
288 fn as_ref(&self) -> &str {
289 self.label()
290 }
291}
292
293mod interner {
294 use std::collections::HashMap;
295 use std::sync::{Mutex, RwLock};
296
297 use typed_arena::Arena;
298
299 use super::Bitmap;
300
301 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
304 pub struct Symbol(u32);
305
306 pub(crate) struct Interner(RwLock<InnerInterner>);
310
311 impl Interner {
312 pub(crate) fn new() -> Self {
314 Self(RwLock::new(InnerInterner::empty()))
315 }
316
317 pub(crate) fn get_or_intern(&self, string: impl AsRef<str>) -> Symbol {
320 self.0.write().unwrap().get_or_intern(string)
321 }
322
323 pub(crate) fn intern_generated(&self, string: impl AsRef<str>) -> Symbol {
327 self.0.write().unwrap().intern(string, true)
328 }
329
330 pub(crate) fn lookup(&self, sym: Symbol) -> &str {
335 unsafe { std::mem::transmute::<&'_ str, &'_ str>(self.0.read().unwrap().lookup(sym)) }
339 }
340
341 pub(crate) fn find_generated(&self, s: &str) -> Symbol {
346 let inner = self.0.read().unwrap();
347 inner.with(|inner| {
348 let idx = (0..inner.vec.len())
349 .find(|&idx| inner.generated[idx] && inner.vec[idx] == s)
350 .unwrap();
351 Symbol(idx as u32)
352 })
353 }
354
355 pub(crate) fn is_generated(&self, sym: Symbol) -> bool {
356 self.0.read().unwrap().is_generated(sym)
357 }
358 }
359
360 #[ouroboros::self_referencing]
362 struct InnerInterner {
363 arena: Mutex<Arena<u8>>,
365
366 #[borrows(arena)]
368 #[covariant]
369 map: HashMap<&'this str, Symbol>,
370
371 #[borrows(arena)]
373 #[covariant]
374 vec: Vec<&'this str>,
375
376 generated: Bitmap,
378 }
379
380 impl InnerInterner {
381 fn empty() -> Self {
383 Self::new(
384 Mutex::new(Arena::new()),
385 |_arena| HashMap::new(),
386 |_arena| Vec::new(),
387 Bitmap::default(),
388 )
389 }
390
391 fn get_or_intern(&mut self, string: impl AsRef<str>) -> Symbol {
394 if let Some(sym) = self.borrow_map().get(string.as_ref()) {
395 return *sym;
396 }
397 self.intern(string, false)
398 }
399
400 fn intern(&mut self, string: impl AsRef<str>, generated: bool) -> Symbol {
402 let in_string = unsafe {
406 std::mem::transmute::<&'_ str, &'_ str>(
407 self.borrow_arena()
408 .lock()
409 .unwrap()
410 .alloc_str(string.as_ref()),
411 )
412 };
413 let sym = Symbol(self.borrow_vec().len() as u32);
414 self.with_vec_mut(|v| v.push(in_string));
415 self.with_generated_mut(|g| g.push(generated));
416 if !generated {
420 self.with_map_mut(|m| m.insert(in_string, sym));
421 }
422 sym
423 }
424
425 fn lookup(&self, sym: Symbol) -> &str {
433 self.borrow_vec()[sym.0 as usize]
434 }
435
436 fn is_generated(&self, sym: Symbol) -> bool {
437 self.borrow_generated()[sym.0 as usize]
438 }
439 }
440
441 #[cfg(test)]
442 mod tests {
443 use super::*;
444
445 #[test]
446 fn test_intern_then_lookup() {
447 let interner = Interner::new();
448 let test_string = "test_string";
449 let sym = interner.get_or_intern(test_string);
450 assert_eq!(interner.lookup(sym), test_string);
451 }
452
453 #[test]
454 fn test_intern_twice_has_same_symbol() {
455 let interner = Interner::new();
456 let test_string = "test_string";
457 let sym1 = interner.get_or_intern(test_string);
458 let sym2 = interner.get_or_intern(test_string);
459 assert_eq!(sym1, sym2);
460 }
461
462 #[test]
463 fn test_intern_two_different_has_different_symbols() {
464 let interner = Interner::new();
465 let sym1 = interner.get_or_intern("a");
466 let sym2 = interner.get_or_intern("b");
467 assert_ne!(sym1, sym2);
468 }
469
470 #[test]
471 fn test_large_number_of_interns() {
472 let interner = Interner::new();
473 for i in 0..10000 {
474 let i = i.to_string();
475 let sym = interner.get_or_intern(&i);
476 assert_eq!(i, interner.lookup(sym));
477 }
478 assert_eq!(10000, interner.0.read().unwrap().borrow_map().len());
479 assert_eq!(10000, interner.0.read().unwrap().borrow_vec().len());
480 for i in 0..10000 {
482 let i = i.to_string();
483 let sym = interner.get_or_intern(&i);
484 assert_eq!(i, interner.lookup(sym));
485 }
486 assert_eq!(10000, interner.0.read().unwrap().borrow_map().len());
487 assert_eq!(10000, interner.0.read().unwrap().borrow_vec().len());
488 }
489 }
490}
491
492#[derive(Default)]
495struct Bitmap {
496 data: Vec<u64>,
499 len: usize,
502}
503
504impl Bitmap {
505 fn location(idx: usize) -> (usize, u64) {
507 let shift = (idx % 64) as u32;
508 (idx / 64, 1 << shift)
509 }
510
511 fn push(&mut self, val: bool) {
512 let (idx, mask) = Bitmap::location(self.len);
513 if idx >= self.data.len() {
514 debug_assert_eq!(idx, self.data.len());
515 self.data.push(0);
516 }
517 if val {
518 self.data[idx] |= mask;
519 }
520 self.len += 1;
521 }
522}
523
524impl std::ops::Index<usize> for Bitmap {
525 type Output = bool;
526
527 fn index(&self, idx: usize) -> &bool {
528 let (idx, mask) = Bitmap::location(idx);
529 if (self.data[idx] & mask) == 0 {
530 &false
531 } else {
532 &true
533 }
534 }
535}
536
537#[cfg(test)]
538mod tests {
539 use super::Bitmap;
540
541 #[test]
542 fn bitmap_basics() {
543 let mut b = Bitmap::default();
544 b.push(true);
545 b.push(false);
546 assert!(b[0]);
547 assert!(!b[1]);
548
549 for _ in 0..64 {
550 b.push(true);
551 }
552 b.push(false);
553
554 assert!(b[63]);
555 assert!(b[64]);
556 assert!(b[65]);
557 assert!(!b[66]);
558 }
559}