mimium_lang/
interner.rs

1//! Storage for interning symbols, expressions and types during compilation.
2//!
3//! The global [`SessionGlobals`] instance keeps track of all identifiers and
4//! AST nodes created while parsing.
5
6use std::{
7    borrow::Cow,
8    collections::BTreeMap,
9    fmt::{self, Display},
10    hash::Hash,
11    path::PathBuf,
12    sync::{LazyLock, Mutex},
13};
14
15use slotmap::SlotMap;
16use string_interner::{StringInterner, backend::StringBackend};
17
18use crate::{
19    ast::Expr,
20    dummy_span,
21    types::{PType, RecordTypeField, Type},
22    utils::metadata::{Location, Span},
23};
24slotmap::new_key_type! {
25    pub struct ExprKey;
26    pub struct TypeKey;
27}
28
29/// Global storages shared during compilation stages.
30pub struct SessionGlobals {
31    pub symbol_interner: StringInterner<StringBackend<usize>>,
32    pub expr_storage: SlotMap<ExprKey, Expr>,
33    pub type_storage: SlotMap<TypeKey, Type>,
34    pub loc_storage: BTreeMap<NodeId, Location>,
35}
36
37impl SessionGlobals {
38    fn store_expr(&mut self, expr: Expr) -> ExprNodeId {
39        let key = self.expr_storage.insert(expr);
40        ExprNodeId(key)
41    }
42
43    fn store_loc<T: ToNodeId>(&mut self, node_id: T, loc: Location) {
44        self.loc_storage.insert(node_id.to_node_id(), loc);
45    }
46
47    pub fn store_type(&mut self, ty: Type) -> TypeNodeId {
48        let key = self.type_storage.insert(ty);
49        TypeNodeId(key)
50    }
51
52    pub fn store_expr_with_location(&mut self, expr: Expr, loc: Location) -> ExprNodeId {
53        let expr_id = self.store_expr(expr);
54        self.store_loc(expr_id, loc);
55        expr_id
56    }
57
58    pub fn store_type_with_location(&mut self, ty: Type, loc: Location) -> TypeNodeId {
59        let type_id = self.store_type(ty);
60        self.store_loc(type_id, loc);
61        type_id
62    }
63
64    // TODO: in theory, instead of cloning, this can return &Expr with an
65    // extended lifetime by `std::mem::transmute()` just like we do in
66    // Symbol::as_str(). However, we would get segfault for some reason.
67    //
68    // cf. https://github.com/tomoyanonymous/mimium-rs/pull/27#issuecomment-2306226748
69    pub fn get_expr(&self, expr_id: ExprNodeId) -> Expr {
70        if cfg!(test) {
71            self.expr_storage.get(expr_id.0).unwrap().clone()
72        } else {
73            unsafe { self.expr_storage.get_unchecked(expr_id.0) }.clone()
74        }
75    }
76
77    pub fn get_type(&self, type_id: TypeNodeId) -> Type {
78        if cfg!(test) {
79            self.type_storage.get(type_id.0).unwrap().clone()
80        } else {
81            unsafe { self.type_storage.get_unchecked(type_id.0) }.clone()
82        }
83    }
84
85    pub fn get_span<T: ToNodeId>(&self, node_id: T) -> Option<&Location> {
86        self.loc_storage.get(&node_id.to_node_id())
87    }
88}
89
90static SESSION_GLOBALS: LazyLock<Mutex<SessionGlobals>> = LazyLock::new(|| {
91    Mutex::new(SessionGlobals {
92        symbol_interner: StringInterner::new(),
93        expr_storage: SlotMap::with_key(),
94        type_storage: SlotMap::with_key(),
95        loc_storage: BTreeMap::new(),
96    })
97});
98
99pub fn with_session_globals<R, F>(f: F) -> R
100where
101    F: FnOnce(&mut SessionGlobals) -> R,
102{
103    if let Ok(mut guard) = SESSION_GLOBALS.lock() {
104        f(&mut *guard)
105    } else {
106        panic!("Failed to acquire lock on SESSION_GLOBALS");
107    }
108}
109
110#[derive(Default, Copy, Clone, PartialEq, Hash, Eq, PartialOrd, Ord)]
111pub struct Symbol(pub usize); //Symbol Trait is implemented on usize
112
113pub trait ToSymbol {
114    fn to_symbol(&self) -> Symbol;
115}
116
117impl<T: AsRef<str>> ToSymbol for T {
118    fn to_symbol(&self) -> Symbol {
119        Symbol(with_session_globals(|session_globals| {
120            session_globals.symbol_interner.get_or_intern(self.as_ref())
121        }))
122    }
123}
124impl<'a> From<Symbol> for Cow<'a, str> {
125    fn from(val: Symbol) -> Self {
126        with_session_globals(|session_globals| {
127            Cow::Owned(
128                session_globals
129                    .symbol_interner
130                    .resolve(val.0)
131                    .unwrap()
132                    .to_string(),
133            )
134        })
135    }
136}
137impl Symbol {
138    pub fn as_str(&self) -> &str {
139        with_session_globals(|session_globals| unsafe {
140            // This transmute is needed to convince the borrow checker. Since
141            // the session_global should exist until the end of the session,
142            // this &str should live sufficiently long.
143            std::mem::transmute::<&str, &str>(
144                session_globals
145                    .symbol_interner
146                    .resolve(self.0)
147                    .expect("invalid symbol"),
148            )
149        })
150    }
151}
152impl AsRef<str> for Symbol {
153    fn as_ref(&self) -> &str {
154        with_session_globals(|session_globals| unsafe {
155            // This transmute is needed to convince the borrow checker. Since
156            // the session_global should exist until the end of the session,
157            // this &str should live sufficiently long.
158            std::mem::transmute::<&str, &str>(
159                session_globals
160                    .symbol_interner
161                    .resolve(self.0)
162                    .expect("invalid symbol"),
163            )
164        })
165    }
166}
167
168// Note: to_string() is auto-implemented by this
169impl Display for Symbol {
170    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171        write!(f, "{}", self.as_str())
172    }
173}
174impl std::fmt::Debug for Symbol {
175    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176        write!(f, "{}({})", self.as_str(), self.0)
177    }
178}
179
180#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
181pub enum NodeId {
182    ExprArena(ExprKey),
183    TypeArena(TypeKey),
184}
185
186#[derive(Clone, Copy, Default)]
187pub struct ExprNodeId(pub ExprKey);
188
189#[derive(Clone, Copy, Default)]
190pub struct TypeNodeId(pub TypeKey);
191
192// traits required for Key trait
193
194impl PartialEq for ExprNodeId {
195    fn eq(&self, other: &Self) -> bool {
196        self.to_expr() == other.to_expr() && self.to_span() == other.to_span()
197    }
198}
199
200impl PartialEq for TypeNodeId {
201    fn eq(&self, other: &Self) -> bool {
202        self.to_type() == other.to_type() && self.to_span() == other.to_span()
203    }
204}
205impl Eq for TypeNodeId {}
206impl Hash for TypeNodeId {
207    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
208        self.0.hash(state);
209    }
210}
211
212impl state_tree::tree::SizedType for TypeNodeId {
213    fn word_size(&self) -> u64 {
214        match self.to_type() {
215            Type::Primitive(PType::Unit) => 0,
216            Type::Primitive(PType::String) => 1,
217            Type::Primitive(_) => 1,
218            Type::Array(_) => 1, //array is represented as a pointer to the special storage
219            Type::Tuple(types) => types.iter().map(|t| t.word_size()).sum(),
220            Type::Record(types) => types
221                .iter()
222                .map(|RecordTypeField { ty, .. }| ty.word_size())
223                .sum(),
224            Type::Function { arg: _, ret: _ } => 1,
225            Type::Ref(_) => 1,
226            Type::Code(_) => todo!(),
227            _ => {
228                //todo: this may contain intermediate types
229                1
230            }
231        }
232    }
233}
234
235impl ExprNodeId {
236    pub fn to_expr(&self) -> Expr {
237        with_session_globals(|session_globals| session_globals.get_expr(*self))
238    }
239
240    pub fn to_span(&self) -> Span {
241        with_session_globals(|session_globals| match session_globals.get_span(*self) {
242            Some(loc) => loc.span.clone(),
243            None => dummy_span!(),
244        })
245    }
246    pub fn to_location(&self) -> Location {
247        with_session_globals(|session_globals| match session_globals.get_span(*self) {
248            Some(loc) => loc.clone(),
249            None => Location {
250                span: dummy_span!(),
251                path: PathBuf::new(),
252            },
253        })
254    }
255}
256
257impl TypeNodeId {
258    pub fn to_type(&self) -> Type {
259        with_session_globals(|session_globals| session_globals.get_type(*self))
260    }
261
262    pub fn to_span(&self) -> Span {
263        with_session_globals(|session_globals| match session_globals.get_span(*self) {
264            Some(loc) => loc.span.clone(),
265            None => dummy_span!(),
266        })
267    }
268    pub fn to_loc(&self) -> Location {
269        let dummy_path = PathBuf::new();
270        with_session_globals(|session_globals| match session_globals.get_span(*self) {
271            Some(loc) => loc.clone(),
272            None => Location {
273                span: dummy_span!(),
274                path: dummy_path,
275            },
276        })
277    }
278    // Flatten a composite type into a list of types so that the offset of the
279    // element can be calculated easily. For example:
280    //
281    // original:       Tuple(float, function, Tuple(float, float))
282    // flattened form: [float, function, float, float]
283    pub fn flatten(&self) -> Vec<Self> {
284        match self.to_type() {
285            Type::Tuple(t) => t.iter().flat_map(|t| t.flatten()).collect::<Vec<_>>(),
286            Type::Record(t) => t
287                .iter()
288                .flat_map(|RecordTypeField { ty, .. }| ty.flatten())
289                .collect::<Vec<_>>(),
290            _ => vec![*self],
291        }
292    }
293}
294
295pub trait ToNodeId {
296    fn to_node_id(&self) -> NodeId;
297}
298
299impl ToNodeId for ExprNodeId {
300    fn to_node_id(&self) -> NodeId {
301        NodeId::ExprArena(self.0)
302    }
303}
304
305impl ToNodeId for TypeNodeId {
306    fn to_node_id(&self) -> NodeId {
307        NodeId::TypeArena(self.0)
308    }
309}
310impl std::fmt::Display for ExprNodeId {
311    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
312        let span = self.to_span();
313        write!(f, "{:?},{}..{}", self.to_expr(), span.start, span.end)
314    }
315}
316impl std::fmt::Debug for ExprNodeId {
317    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
318        let span = self.to_span();
319        write!(f, "{:#?},{}..{}", self.to_expr(), span.start, span.end)
320    }
321}
322impl std::fmt::Display for TypeNodeId {
323    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
324        let span = self.to_span();
325
326        write!(f, "{:?},{}..{}", self.to_type(), span.start, span.end)
327    }
328}
329impl std::fmt::Debug for TypeNodeId {
330    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
331        let span = self.to_span();
332
333        write!(f, "{:#?},{}..{}", self.to_type(), span.start, span.end)
334    }
335}