1use once_cell::sync::Lazy;
3use serde::{Deserialize, Serialize};
4use std::{
5 borrow::Borrow,
6 fmt::{self, Debug},
7 hash::Hash,
8};
9
10use crate::{metrics::increment, position::TermPos, term::string::NickelString};
11
12simple_counter::generate_counter!(GeneratedCounter, usize);
13static INTERNER: Lazy<interner::Interner> = Lazy::new(interner::Interner::new);
14
15#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
20#[serde(into = "&'static str", from = "String")]
21pub struct Ident(interner::Symbol);
22
23impl Ident {
24 pub fn new(s: impl AsRef<str>) -> Self {
25 Self(INTERNER.intern(s.as_ref()))
26 }
27
28 pub fn label(&self) -> &'static str {
30 INTERNER.lookup(self.0)
31 }
32
33 pub fn into_label(self) -> String {
34 self.label().to_owned()
35 }
36
37 pub fn fresh() -> Self {
46 increment!("Ident::fresh");
47 Self::new(format!("{}{}", GEN_PREFIX, GeneratedCounter::next()))
48 }
49
50 pub fn spanned(self, pos: TermPos) -> LocIdent {
52 LocIdent {
53 ident: self,
54 pos,
55 generated: self.label().starts_with(GEN_PREFIX),
56 }
57 }
58}
59
60impl fmt::Display for Ident {
61 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
62 write!(f, "{}", self.label())
63 }
64}
65
66impl fmt::Debug for Ident {
67 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
68 write!(f, "`{}`", self.label())
69 }
70}
71
72impl From<Ident> for LocIdent {
73 fn from(ident: Ident) -> Self {
74 ident.spanned(TermPos::None)
75 }
76}
77
78impl From<&LocIdent> for Ident {
79 fn from(ident: &LocIdent) -> Self {
80 ident.ident()
81 }
82}
83
84impl PartialOrd for Ident {
85 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
86 Some(self.cmp(other))
87 }
88}
89
90impl Ord for Ident {
91 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
92 self.label().cmp(other.label())
93 }
94}
95
96impl From<Ident> for NickelString {
97 fn from(sym: Ident) -> Self {
98 sym.to_string().into()
99 }
100}
101
102impl<'a> From<&'a str> for Ident {
103 fn from(s: &'a str) -> Self {
104 Ident::new(s)
105 }
106}
107
108impl From<String> for Ident {
109 fn from(s: String) -> Self {
110 Ident::new(s)
111 }
112}
113
114impl From<Ident> for &'static str {
115 fn from(id: Ident) -> &'static str {
116 id.label()
117 }
118}
119
120#[derive(Clone, Copy, Debug, Deserialize, Serialize)]
125#[serde(into = "String", from = "String")]
126pub struct LocIdent {
127 ident: Ident,
128 pub pos: TermPos,
129 generated: bool,
130}
131
132impl LocIdent {
133 pub fn new_with_pos(label: impl AsRef<str>, pos: TermPos) -> Self {
134 let generated = label.as_ref().starts_with(GEN_PREFIX);
135 Self {
136 ident: Ident::new(label),
137 pos,
138 generated,
139 }
140 }
141
142 pub fn new(label: impl AsRef<str>) -> Self {
143 Self::new_with_pos(label, TermPos::None)
144 }
145
146 pub fn with_pos(self, pos: TermPos) -> LocIdent {
148 LocIdent { pos, ..self }
149 }
150
151 pub fn fresh() -> Self {
153 Ident::fresh().into()
154 }
155
156 pub fn ident(&self) -> Ident {
158 self.ident
159 }
160
161 pub fn label(&self) -> &'static str {
163 self.ident.label()
164 }
165
166 pub fn into_label(self) -> String {
167 self.label().to_owned()
168 }
169}
170
171pub const GEN_PREFIX: char = '%';
174
175impl PartialOrd for LocIdent {
176 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
177 Some(self.cmp(other))
178 }
179}
180
181impl Ord for LocIdent {
182 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
183 self.label().cmp(other.label())
184 }
185}
186
187impl PartialEq for LocIdent {
188 fn eq(&self, other: &Self) -> bool {
189 self.ident == other.ident
190 }
191}
192
193impl Eq for LocIdent {}
194
195impl Hash for LocIdent {
196 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
197 self.ident.hash(state)
198 }
199}
200
201impl Borrow<Ident> for LocIdent {
202 fn borrow(&self) -> &Ident {
203 &self.ident
204 }
205}
206
207impl fmt::Display for LocIdent {
208 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
209 write!(f, "{}", self.label())
210 }
211}
212
213#[derive(Clone, Debug, Eq, PartialEq)]
217pub struct FastOrdIdent(pub Ident);
218
219impl PartialOrd for FastOrdIdent {
220 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
221 Some(self.cmp(other))
222 }
223}
224
225impl Ord for FastOrdIdent {
226 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
227 self.0 .0.cmp(&other.0 .0)
228 }
229}
230
231impl<'a> From<&'a str> for LocIdent {
232 fn from(s: &'a str) -> Self {
233 LocIdent::new(s)
234 }
235}
236
237impl From<String> for LocIdent {
238 fn from(s: String) -> Self {
239 LocIdent::new(s)
240 }
241}
242
243impl From<LocIdent> for &'static str {
244 fn from(id: LocIdent) -> &'static str {
245 id.label()
246 }
247}
248
249impl From<LocIdent> for String {
254 fn from(id: LocIdent) -> String {
255 id.label().to_owned()
256 }
257}
258
259impl LocIdent {
260 pub fn is_generated(&self) -> bool {
261 self.generated
262 }
263}
264
265impl AsRef<str> for LocIdent {
266 fn as_ref(&self) -> &str {
267 self.label()
268 }
269}
270
271mod interner {
272 use std::collections::HashMap;
273 use std::sync::{Mutex, RwLock};
274
275 use typed_arena::Arena;
276
277 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
280 pub struct Symbol(u32);
281
282 pub(crate) struct Interner(RwLock<InnerInterner>);
286
287 impl Interner {
288 pub(crate) fn new() -> Self {
290 Self(RwLock::new(InnerInterner::empty()))
291 }
292
293 pub(crate) fn intern(&self, string: impl AsRef<str>) -> Symbol {
296 self.0.write().unwrap().intern(string)
297 }
298
299 pub(crate) fn lookup(&self, sym: Symbol) -> &str {
304 unsafe { std::mem::transmute::<&'_ str, &'_ str>(self.0.read().unwrap().lookup(sym)) }
308 }
309 }
310
311 #[ouroboros::self_referencing]
313 struct InnerInterner {
314 arena: Mutex<Arena<u8>>,
316
317 #[borrows(arena)]
319 #[covariant]
320 map: HashMap<&'this str, Symbol>,
321
322 #[borrows(arena)]
324 #[covariant]
325 vec: Vec<&'this str>,
326 }
327
328 impl InnerInterner {
329 fn empty() -> Self {
331 Self::new(
332 Mutex::new(Arena::new()),
333 |_arena| HashMap::new(),
334 |_arena| Vec::new(),
335 )
336 }
337
338 fn intern(&mut self, string: impl AsRef<str>) -> Symbol {
341 if let Some(sym) = self.borrow_map().get(string.as_ref()) {
342 return *sym;
343 }
344 let in_string = unsafe {
348 std::mem::transmute::<&'_ str, &'_ str>(
349 self.borrow_arena()
350 .lock()
351 .unwrap()
352 .alloc_str(string.as_ref()),
353 )
354 };
355 let sym = Symbol(self.borrow_vec().len() as u32);
356 self.with_vec_mut(|v| v.push(in_string));
357 self.with_map_mut(|m| m.insert(in_string, sym));
358 sym
359 }
360 fn lookup(&self, sym: Symbol) -> &str {
368 self.borrow_vec()[sym.0 as usize]
369 }
370 }
371
372 #[cfg(test)]
373 mod tests {
374 use super::*;
375
376 #[test]
377 fn test_intern_then_lookup() {
378 let interner = Interner::new();
379 let test_string = "test_string";
380 let sym = interner.intern(test_string);
381 assert_eq!(interner.lookup(sym), test_string);
382 }
383
384 #[test]
385 fn test_intern_twice_has_same_symbol() {
386 let interner = Interner::new();
387 let test_string = "test_string";
388 let sym1 = interner.intern(test_string);
389 let sym2 = interner.intern(test_string);
390 assert_eq!(sym1, sym2);
391 }
392
393 #[test]
394 fn test_intern_two_different_has_different_symbols() {
395 let interner = Interner::new();
396 let sym1 = interner.intern("a");
397 let sym2 = interner.intern("b");
398 assert_ne!(sym1, sym2);
399 }
400
401 #[test]
402 fn test_large_number_of_interns() {
403 let interner = Interner::new();
404 for i in 0..10000 {
405 let i = i.to_string();
406 let sym = interner.intern(&i);
407 assert_eq!(i, interner.lookup(sym));
408 }
409 assert_eq!(10000, interner.0.read().unwrap().borrow_map().len());
410 assert_eq!(10000, interner.0.read().unwrap().borrow_vec().len());
411 for i in 0..10000 {
413 let i = i.to_string();
414 let sym = interner.intern(&i);
415 assert_eq!(i, interner.lookup(sym));
416 }
417 assert_eq!(10000, interner.0.read().unwrap().borrow_map().len());
418 assert_eq!(10000, interner.0.read().unwrap().borrow_vec().len());
419 }
420 }
421}