Skip to main content

mq_lang/
module.rs

1pub mod error;
2pub mod resolver;
3
4use crate::{
5    Arena, ArenaId, Program, Shared, TokenArena,
6    ast::{node as ast, parser::Parser},
7    lexer::{self, Lexer},
8    module::{
9        error::ModuleError,
10        resolver::{LocalFsModuleResolver, ModuleResolver},
11    },
12};
13use rustc_hash::FxHashMap;
14use smol_str::SmolStr;
15use std::{borrow::Cow, cell::RefCell, path::PathBuf, sync::LazyLock};
16
17use crate::Token;
18
19struct BuiltinCache {
20    tokens: Vec<Shared<Token>>,
21    module: Module,
22}
23
24thread_local! {
25    static BUILTIN_CACHE: RefCell<Option<BuiltinCache>> = const { RefCell::new(None) };
26}
27
28pub type ModuleId = ArenaId<ModuleName>;
29
30type ModuleName = SmolStr;
31type StandardModules = FxHashMap<SmolStr, fn() -> &'static str>;
32
33impl<T: ModuleResolver> Default for ModuleLoader<T> {
34    fn default() -> Self {
35        Self::new(T::default())
36    }
37}
38
39#[derive(Debug, Clone)]
40pub struct ModuleLoader<T: ModuleResolver = LocalFsModuleResolver> {
41    pub(crate) loaded_modules: Arena<ModuleName>,
42    #[cfg(feature = "debugger")]
43    pub(crate) source_code: Option<String>,
44    resolver: T,
45}
46
47#[derive(Debug, Clone, PartialEq)]
48pub struct Module {
49    pub name: String,
50    pub functions: Program,
51    pub modules: Program,
52    pub vars: Program,
53    pub macros: Program,
54}
55
56impl Module {
57    pub const BUILTIN_MODULE: &str = "builtin";
58    pub const TOP_LEVEL_MODULE: &str = "top-level";
59    pub const TOP_LEVEL_MODULE_ID: ArenaId<ModuleName> = ArenaId::new(0);
60}
61
62pub static STANDARD_MODULES: LazyLock<StandardModules> = LazyLock::new(|| {
63    let mut map = FxHashMap::default();
64
65    macro_rules! std_module {
66        ($name:ident) => {
67            fn $name() -> &'static str {
68                include_str!(concat!("../modules/", stringify!($name), ".mq"))
69            }
70            map.insert(SmolStr::new(stringify!($name)), $name as fn() -> &'static str);
71        };
72    }
73
74    std_module!(ast);
75    std_module!(cbor);
76    std_module!(csv);
77    std_module!(fuzzy);
78    std_module!(hcl);
79    std_module!(json);
80    std_module!(section);
81    std_module!(semver);
82    std_module!(test);
83    std_module!(table);
84    std_module!(toml);
85    std_module!(toon);
86    std_module!(xml);
87    std_module!(yaml);
88
89    map
90});
91
92pub const BUILTIN_FILE: &str = include_str!("../builtin.mq");
93
94impl<T: ModuleResolver> ModuleLoader<T> {
95    pub fn new(resolver: T) -> Self {
96        let mut loaded_modules = Arena::new(10);
97        loaded_modules.alloc(Module::TOP_LEVEL_MODULE.into());
98
99        Self {
100            loaded_modules,
101            #[cfg(feature = "debugger")]
102            source_code: None,
103            resolver,
104        }
105    }
106
107    #[inline(always)]
108    pub fn module_name(&self, module_id: ModuleId) -> Cow<'static, str> {
109        match module_id {
110            Module::TOP_LEVEL_MODULE_ID => Cow::Borrowed(Module::TOP_LEVEL_MODULE),
111            _ => self
112                .loaded_modules
113                .get(module_id)
114                .map(|s| Cow::Owned(s.to_string()))
115                .unwrap_or_else(|| Cow::Borrowed("<unknown>")),
116        }
117    }
118
119    pub fn get_module_path(&self, module_name: &str) -> Result<String, ModuleError> {
120        self.resolver.get_path(module_name)
121    }
122
123    #[cfg(feature = "debugger")]
124    pub fn set_source_code(&mut self, source_code: String) {
125        self.source_code = Some(source_code);
126    }
127
128    pub fn search_paths(&self) -> Vec<PathBuf> {
129        self.resolver.search_paths()
130    }
131
132    pub fn set_search_paths(&mut self, paths: Vec<PathBuf>) {
133        self.resolver.set_search_paths(paths);
134    }
135
136    pub fn load(&mut self, module_name: &str, code: &str, token_arena: TokenArena) -> Result<Module, ModuleError> {
137        if self.loaded_modules.contains(module_name.into()) {
138            return Err(ModuleError::AlreadyLoaded(Cow::Owned(module_name.to_string())));
139        }
140
141        let module_id = self.loaded_modules.len().into();
142        let mut program = Self::parse_program(code, module_id, token_arena)?;
143
144        self.load_from_ast(module_name, &mut program)
145    }
146
147    pub fn load_from_ast(&mut self, module_name: &str, program: &mut Program) -> Result<Module, ModuleError> {
148        if self.loaded_modules.contains(module_name.into()) {
149            return Err(ModuleError::AlreadyLoaded(Cow::Owned(module_name.to_string())));
150        }
151
152        let modules = program
153            .iter()
154            .filter(|node| {
155                matches!(
156                    *node.expr,
157                    ast::Expr::Include(_) | ast::Expr::Module(_, _) | ast::Expr::Import(_)
158                )
159            })
160            .cloned()
161            .collect::<Vec<_>>();
162
163        let functions = program
164            .iter()
165            .filter(|node| matches!(*node.expr, ast::Expr::Def(..)))
166            .cloned()
167            .collect::<Vec<_>>();
168
169        let vars = program
170            .iter()
171            .filter(|node| matches!(*node.expr, ast::Expr::Let(..)))
172            .cloned()
173            .collect::<Vec<_>>();
174
175        let macros = program
176            .iter()
177            .filter(|node| matches!(*node.expr, ast::Expr::Macro(..)))
178            .cloned()
179            .collect::<Vec<_>>();
180
181        let expected_len = functions.len() + modules.len() + vars.len() + macros.len();
182
183        if program.len() != expected_len {
184            return Err(ModuleError::InvalidModule);
185        }
186
187        self.loaded_modules.alloc(module_name.into());
188
189        Ok(Module {
190            name: module_name.to_string(),
191            functions,
192            modules,
193            vars,
194            macros,
195        })
196    }
197
198    pub fn load_from_file(&mut self, module_path: &str, token_arena: TokenArena) -> Result<Module, ModuleError> {
199        let program = self.resolve(module_path)?;
200        self.load(module_path, &program, token_arena)
201    }
202
203    pub fn resolve(&self, module_name: &str) -> Result<String, ModuleError> {
204        if STANDARD_MODULES.contains_key(module_name) {
205            Ok(STANDARD_MODULES.get(module_name).map(|f| f()).unwrap().to_string())
206        } else {
207            self.resolver.resolve(module_name)
208        }
209    }
210
211    pub fn load_builtin(&mut self, token_arena: TokenArena) -> Result<Module, ModuleError> {
212        if self.loaded_modules.contains(Module::BUILTIN_MODULE.into()) {
213            return Err(ModuleError::AlreadyLoaded(Cow::Borrowed(Module::BUILTIN_MODULE)));
214        }
215
216        // Cache is only valid when both arenas are in their initial state (builtin
217        // module_id == 1, tokens right after the dummy EOF). Fall back to full parse otherwise.
218        let pristine = self.loaded_modules.len() == 1 && {
219            #[cfg(not(feature = "sync"))]
220            {
221                token_arena.borrow().len() == 1
222            }
223            #[cfg(feature = "sync")]
224            {
225                token_arena.read().unwrap().len() == 1
226            }
227        };
228
229        if pristine {
230            let cached =
231                BUILTIN_CACHE.with(|cache| cache.borrow().as_ref().map(|c| (c.tokens.clone(), c.module.clone())));
232
233            if let Some((tokens, module)) = cached {
234                {
235                    #[cfg(not(feature = "sync"))]
236                    token_arena.borrow_mut().extend_from_slice(&tokens);
237                    #[cfg(feature = "sync")]
238                    token_arena.write().unwrap().extend_from_slice(&tokens);
239                }
240                self.loaded_modules.alloc(Module::BUILTIN_MODULE.into());
241                return Ok(module);
242            }
243        }
244
245        let module = self.load(Module::BUILTIN_MODULE, BUILTIN_FILE, Shared::clone(&token_arena))?;
246
247        if pristine {
248            let tokens = {
249                #[cfg(not(feature = "sync"))]
250                let arena = token_arena.borrow();
251                #[cfg(feature = "sync")]
252                let arena = token_arena.read().unwrap();
253                arena.as_slice()[1..].iter().map(Shared::clone).collect::<Vec<_>>()
254            };
255
256            BUILTIN_CACHE.with(|cache| {
257                *cache.borrow_mut() = Some(BuiltinCache {
258                    tokens,
259                    module: module.clone(),
260                });
261            });
262        }
263
264        Ok(module)
265    }
266
267    #[cfg(feature = "debugger")]
268    pub fn get_source_code_for_debug(&self, module_id: ModuleId) -> Result<String, ModuleError> {
269        let name = self.module_name(module_id);
270        match name.as_ref() {
271            Module::TOP_LEVEL_MODULE => Ok(self.source_code.clone().unwrap_or_default()),
272            Module::BUILTIN_MODULE => Ok(BUILTIN_FILE.to_string()),
273            module_name => self.resolve(module_name),
274        }
275    }
276
277    pub fn get_source_code(&self, module_id: ModuleId, source_code: String) -> Result<String, ModuleError> {
278        let name = self.module_name(module_id);
279        match name.as_ref() {
280            Module::TOP_LEVEL_MODULE => Ok(source_code),
281            Module::BUILTIN_MODULE => Ok(BUILTIN_FILE.to_string()),
282            module_name => self.resolve(module_name),
283        }
284    }
285
286    /// Returns the display filename for a module (e.g. `"builtin.mq"`, `"csv.mq"`, `""` for top-level).
287    pub fn module_file_name(&self, module_id: ModuleId) -> String {
288        let name = self.module_name(module_id);
289        match name.as_ref() {
290            Module::TOP_LEVEL_MODULE => String::new(),
291            other => resolver::module_name(other).to_string(),
292        }
293    }
294
295    fn parse_program(code: &str, module_id: ModuleId, token_arena: TokenArena) -> Result<Program, ModuleError> {
296        let tokens = Lexer::new(lexer::Options::default()).tokenize(code, module_id)?;
297        let mut token_arena = {
298            #[cfg(not(feature = "sync"))]
299            {
300                token_arena.borrow_mut()
301            }
302
303            #[cfg(feature = "sync")]
304            {
305                token_arena.write().unwrap()
306            }
307        };
308
309        let program = Parser::new(
310            tokens.into_iter().map(Shared::new).collect::<Vec<_>>().iter(),
311            &mut token_arena,
312            module_id,
313        )
314        .parse()?;
315
316        Ok(program)
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use rstest::{fixture, rstest};
323    use smallvec::{SmallVec, smallvec};
324    use smol_str::SmolStr;
325
326    use crate::{
327        Range, Shared, SharedCell, Token, TokenKind,
328        ast::node::{self as ast, IdentWithToken, Param},
329        module::LocalFsModuleResolver,
330        range::Position,
331        token_alloc,
332    };
333
334    use super::{Module, ModuleError, ModuleLoader};
335
336    #[fixture]
337    fn token_arena() -> Shared<SharedCell<crate::arena::Arena<Shared<Token>>>> {
338        Shared::new(SharedCell::new(crate::arena::Arena::new(10)))
339    }
340
341    /// Arena that mirrors the engine's initial state: one dummy EOF token at index 0.
342    /// Required to exercise the "pristine" cache path in `load_builtin`.
343    #[fixture]
344    fn pristine_token_arena() -> Shared<SharedCell<crate::arena::Arena<Shared<Token>>>> {
345        let arena = Shared::new(SharedCell::new(crate::arena::Arena::new(2048)));
346        token_alloc(
347            &arena,
348            &Shared::new(Token {
349                kind: TokenKind::Eof,
350                range: Range::default(),
351                module_id: Module::TOP_LEVEL_MODULE_ID,
352            }),
353        );
354        arena
355    }
356
357    #[rstest]
358    #[case::load1("test".to_string(), Err(ModuleError::InvalidModule))]
359    #[case::load2("let test = \"value\"".to_string(), Ok(Module{
360        name: "test".to_string(),
361        functions: Vec::new(),
362        modules: Vec::new(),
363        vars: vec![
364            Shared::new(ast::Node{token_id: 0.into(), expr: Shared::new(ast::Expr::Let(
365                ast::Pattern::Ident(IdentWithToken::new_with_token("test", Some(Shared::new(Token{
366                    kind: TokenKind::Ident(SmolStr::new("test")),
367                    range: Range{start: Position{line: 1, column: 5}, end: Position{line: 1, column: 9}},
368                    module_id: 1.into()
369                })))),
370                Shared::new(ast::Node{token_id: 2.into(), expr: Shared::new(ast::Expr::Literal(ast::Literal::String("value".to_string())))})
371            ))})],
372        macros: Vec::new(),
373    }))]
374    #[case::load3("def test(): 1;".to_string(), Ok(Module{
375        name: "test".to_string(),
376        modules: Vec::new(),
377        functions: vec![
378            Shared::new(ast::Node{token_id: 0.into(), expr: Shared::new(ast::Expr::Def(
379            IdentWithToken::new_with_token("test", Some(Shared::new(Token{
380                kind: TokenKind::Ident(SmolStr::new("test")),
381                range: Range{start: Position{line: 1, column: 5}, end: Position{line: 1, column: 9}},
382                module_id: 1.into()
383            }))),
384            SmallVec::new(),
385            vec![
386                Shared::new(ast::Node{token_id: 2.into(), expr: Shared::new(ast::Expr::Literal(ast::Literal::Number(1.into())))})
387            ]
388            ))})],
389        vars: Vec::new(),
390        macros: Vec::new(),
391    }))]
392    #[case::load4("def test(a, b): add(a, b);".to_string(), Ok(Module{
393        name: "test".to_string(),
394        modules: Vec::new(),
395        functions: vec![
396            Shared::new(ast::Node{token_id: 0.into(), expr: Shared::new(ast::Expr::Def(
397                IdentWithToken::new_with_token("test", Some(Shared::new(Token{kind: TokenKind::Ident(SmolStr::new("test")), range: Range{start: Position{line: 1, column: 5}, end: Position{line: 1, column: 9}}, module_id: 1.into()}))),
398                smallvec![
399                    Param::new(IdentWithToken::new_with_token("a", Some(Shared::new(Token{kind: TokenKind::Ident(SmolStr::new("a")), range: Range{start: Position{line: 1, column: 10}, end: Position{line: 1, column: 11}}, module_id: 1.into()})))),
400                    Param::new(IdentWithToken::new_with_token("b", Some(Shared::new(Token{kind: TokenKind::Ident(SmolStr::new("b")), range: Range{start: Position{line: 1, column: 13}, end: Position{line: 1, column: 14}}, module_id: 1.into()})))),
401                ],
402                vec![
403                    Shared::new(ast::Node{token_id: 4.into(), expr: Shared::new(ast::Expr::Call(
404                    IdentWithToken::new_with_token("add", Some(Shared::new(Token{kind: TokenKind::Ident(SmolStr::new("add")), range: Range{start: Position{line: 1, column: 17}, end: Position{line: 1, column: 20}}, module_id: 1.into()}))),
405                    smallvec![
406                        Shared::new(ast::Node{token_id: 2.into(),
407                            expr: Shared::new(
408                                ast::Expr::Ident(IdentWithToken::new_with_token("a", Some(Shared::new(Token{kind: TokenKind::Ident(SmolStr::new("a")), range: Range{start: Position{line: 1, column: 21}, end: Position{line: 1, column: 22}}, module_id: 1.into()}))))
409                                )}),
410                        Shared::new(ast::Node{token_id: 3.into(),
411                            expr: Shared::new(
412                                ast::Expr::Ident(IdentWithToken::new_with_token("b", Some(Shared::new(Token{kind: TokenKind::Ident(SmolStr::new("b")), range: Range{start: Position{line: 1, column: 24}, end: Position{line: 1, column: 25}}, module_id: 1.into()}))))
413                            )})
414                    ],
415                ))})]
416            ))})],
417        vars: Vec::new(),
418        macros: Vec::new(),
419    }))]
420    fn test_load(
421        token_arena: Shared<SharedCell<crate::arena::Arena<Shared<Token>>>>,
422        #[case] program: String,
423        #[case] expected: Result<Module, ModuleError>,
424    ) {
425        assert_eq!(
426            ModuleLoader::new(LocalFsModuleResolver::default()).load("test", &program, token_arena),
427            expected
428        );
429    }
430
431    #[rstest]
432    #[case::load_standard_csv("csv", Ok(Module {
433        name: "csv".to_string(),
434        functions: Vec::new(),
435        modules: Vec::new(), // Assuming the csv.mq only contains definitions or is empty for this test
436        vars: Vec::new(),
437        macros: Vec::new(),
438    }))]
439    fn test_load_standard_module(
440        token_arena: Shared<SharedCell<crate::arena::Arena<Shared<Token>>>>,
441        #[case] module_name: &str,
442        #[case] expected: Result<Module, ModuleError>,
443    ) {
444        let mut loader = ModuleLoader::new(LocalFsModuleResolver::default());
445        let result = loader.load_from_file(module_name, token_arena.clone());
446        // Only check that loading does not return NotFound error and returns Some(Module)
447        match expected {
448            Ok(_) => {
449                assert!(result.is_ok(), "Expected Ok, got {:?}", result);
450                assert_eq!(result.unwrap().name, module_name);
451            }
452            Err(ref e) => {
453                assert_eq!(result.unwrap_err(), *e);
454            }
455        }
456    }
457
458    #[test]
459    fn test_standard_modules_contains_csv() {
460        assert!(super::STANDARD_MODULES.contains_key("csv"));
461        let csv_content = super::STANDARD_MODULES.get("csv").unwrap()();
462        assert!(csv_content.contains("")); // Just check it's a string, optionally check for expected content
463    }
464
465    #[test]
466    fn test_load_builtin_idempotent() {
467        let token_arena = token_arena();
468        let mut loader = ModuleLoader::new(LocalFsModuleResolver::default());
469        assert!(loader.load_builtin(Shared::clone(&token_arena)).is_ok());
470        // Second call on the same loader must return AlreadyLoaded, not corrupt state.
471        assert!(matches!(
472            loader.load_builtin(Shared::clone(&token_arena)),
473            Err(ModuleError::AlreadyLoaded(_))
474        ));
475    }
476
477    #[test]
478    fn test_load_builtin_non_pristine_falls_back_to_parse() {
479        // Load another module first so the arenas are no longer in their initial state.
480        let token_arena = token_arena();
481        let mut loader = ModuleLoader::new(LocalFsModuleResolver::default());
482        loader
483            .load("other", "def dummy(): 1;", Shared::clone(&token_arena))
484            .expect("should load other module");
485
486        // load_builtin must still succeed even though the arenas are non-pristine.
487        let result = loader.load_builtin(Shared::clone(&token_arena));
488        assert!(result.is_ok(), "load_builtin failed on non-pristine state: {result:?}");
489
490        let module = result.unwrap();
491        assert_eq!(module.name, Module::BUILTIN_MODULE);
492    }
493
494    /// Token arena size must be the same whether the builtin module was loaded from a fresh
495    /// parse or replayed from the thread-local cache.
496    #[rstest]
497    fn test_load_builtin_cache_arena_size_consistent(
498        pristine_token_arena: Shared<SharedCell<crate::arena::Arena<Shared<Token>>>>,
499    ) {
500        let arena1 = pristine_token_arena;
501        let mut loader1 = ModuleLoader::new(LocalFsModuleResolver::default());
502        loader1.load_builtin(Shared::clone(&arena1)).unwrap();
503        #[cfg(not(feature = "sync"))]
504        let size1 = arena1.borrow().len();
505        #[cfg(feature = "sync")]
506        let size1 = arena1.read().unwrap().len();
507
508        let arena2 = Shared::new(SharedCell::new(crate::arena::Arena::new(2048)));
509        token_alloc(
510            &arena2,
511            &Shared::new(Token {
512                kind: TokenKind::Eof,
513                range: Range::default(),
514                module_id: Module::TOP_LEVEL_MODULE_ID,
515            }),
516        );
517        let mut loader2 = ModuleLoader::new(LocalFsModuleResolver::default());
518        loader2.load_builtin(Shared::clone(&arena2)).unwrap();
519        #[cfg(not(feature = "sync"))]
520        let size2 = arena2.borrow().len();
521        #[cfg(feature = "sync")]
522        let size2 = arena2.read().unwrap().len();
523
524        assert_eq!(size1, size2, "arena size must match between cache and fresh parse");
525        assert!(size1 > 1, "builtin tokens must be added to the arena");
526    }
527
528    /// The module returned from cache must have the same function/var/macro counts as a fresh parse.
529    #[rstest]
530    fn test_load_builtin_cache_module_counts_consistent(
531        pristine_token_arena: Shared<SharedCell<crate::arena::Arena<Shared<Token>>>>,
532    ) {
533        let mut loader1 = ModuleLoader::new(LocalFsModuleResolver::default());
534        let module1 = loader1.load_builtin(pristine_token_arena).unwrap();
535
536        let arena2 = Shared::new(SharedCell::new(crate::arena::Arena::new(2048)));
537        token_alloc(
538            &arena2,
539            &Shared::new(Token {
540                kind: TokenKind::Eof,
541                range: Range::default(),
542                module_id: Module::TOP_LEVEL_MODULE_ID,
543            }),
544        );
545        let mut loader2 = ModuleLoader::new(LocalFsModuleResolver::default());
546        let module2 = loader2.load_builtin(arena2).unwrap();
547
548        assert_eq!(module1.name, module2.name);
549        assert_eq!(module1.functions.len(), module2.functions.len());
550        assert_eq!(module1.vars.len(), module2.vars.len());
551        assert_eq!(module1.macros.len(), module2.macros.len());
552        assert_eq!(module1.modules.len(), module2.modules.len());
553    }
554
555    /// After load_builtin, the builtin module must be registered at loaded_modules index 1
556    /// (TOP_LEVEL_MODULE is always 0).
557    #[rstest]
558    fn test_load_builtin_module_registered_at_id_one(
559        pristine_token_arena: Shared<SharedCell<crate::arena::Arena<Shared<Token>>>>,
560    ) {
561        let mut loader = ModuleLoader::new(LocalFsModuleResolver::default());
562        loader.load_builtin(pristine_token_arena).unwrap();
563
564        assert_eq!(loader.loaded_modules.len(), 2);
565        assert!(loader.loaded_modules.contains(Module::BUILTIN_MODULE.into()));
566    }
567
568    /// All tokens injected from cache must carry module_id == 1 (BUILTIN_MODULE_ID),
569    /// so that error diagnostics resolve to the builtin source file rather than garbage.
570    #[rstest]
571    fn test_load_builtin_cache_tokens_have_builtin_module_id(
572        pristine_token_arena: Shared<SharedCell<crate::arena::Arena<Shared<Token>>>>,
573    ) {
574        let mut loader1 = ModuleLoader::new(LocalFsModuleResolver::default());
575        loader1.load_builtin(pristine_token_arena).unwrap();
576
577        // Second pristine load — this is the cache-hit path.
578        let arena2 = Shared::new(SharedCell::new(crate::arena::Arena::new(2048)));
579        token_alloc(
580            &arena2,
581            &Shared::new(Token {
582                kind: TokenKind::Eof,
583                range: Range::default(),
584                module_id: Module::TOP_LEVEL_MODULE_ID,
585            }),
586        );
587        let mut loader2 = ModuleLoader::new(LocalFsModuleResolver::default());
588        loader2.load_builtin(Shared::clone(&arena2)).unwrap();
589
590        let builtin_module_id: crate::ModuleId = 1.into();
591        #[cfg(not(feature = "sync"))]
592        let arena = arena2.borrow();
593        #[cfg(feature = "sync")]
594        let arena = arena2.read().unwrap();
595        for token in arena.as_slice()[1..].iter() {
596            assert_eq!(
597                token.module_id, builtin_module_id,
598                "cached builtin token must have BUILTIN_MODULE_ID"
599            );
600        }
601    }
602}