Skip to main content

tatara_lisp_eval/
module.rs

1//! Module system — file-as-module + qualified names + alias imports.
2//!
3//! Design rationale (researched, see commit history): file = module.
4//! No explicit `(namespace foo)` declaration; the file's path IS the
5//! module's identifier. Exports are explicit via `(provide ...)`;
6//! imports through `(require "path" :as alias)` or `(require "path"
7//! :refer (a b c))`. Qualified names like `foo/bar` resolve via the
8//! loaded module table at eval time.
9//!
10//! Loader injection: the eval crate is filesystem-free. Embedders pass
11//! a `Loader` trait object that resolves a module path string into
12//! source. `tatara-script` provides a `FilesystemLoader`; tests use an
13//! in-memory `MapLoader`.
14//!
15//! Cycle detection: each `require` push the path onto a load stack;
16//! re-entering the same path raises `EvalError::User`. This is the
17//! simplest sound approach — no need for two-phase resolution.
18
19use std::collections::{HashMap, HashSet};
20use std::sync::{Arc, Mutex};
21
22use thiserror::Error;
23
24use crate::value::Value;
25
26/// One module's contribution to the global symbol table:
27/// every binding it defines, plus the subset that's been
28/// `(provide)`-d as exported.
29#[derive(Debug, Clone, Default)]
30pub struct Module {
31    pub path: Arc<str>,
32    pub exports: HashSet<Arc<str>>,
33    pub bindings: HashMap<Arc<str>, Value>,
34}
35
36impl Module {
37    pub fn new(path: impl Into<Arc<str>>) -> Self {
38        Self {
39            path: path.into(),
40            exports: HashSet::new(),
41            bindings: HashMap::new(),
42        }
43    }
44
45    /// Look up an exported binding. `None` if the name isn't defined
46    /// or isn't in the export set.
47    pub fn get_export(&self, name: &str) -> Option<Value> {
48        if self.exports.contains(name) {
49            self.bindings.get(name).cloned()
50        } else {
51            None
52        }
53    }
54
55    /// Add to the export set. Idempotent.
56    pub fn add_export(&mut self, name: impl Into<Arc<str>>) {
57        self.exports.insert(name.into());
58    }
59
60    /// Bind a value (either from a `define` while loading or from
61    /// embedder pre-population).
62    pub fn define(&mut self, name: impl Into<Arc<str>>, value: Value) {
63        self.bindings.insert(name.into(), value);
64    }
65}
66
67/// Source-loading hook. Resolves a `module path` (the string the user
68/// wrote in `(require "path")`) into its source text. Embedders own
69/// the path semantics — relative-to-cwd, relative-to-caller, search
70/// path with `$TATARA_PATH`, in-memory map for tests, etc.
71pub trait Loader: Send + Sync {
72    fn load(&self, path: &str) -> Result<String, ModuleError>;
73}
74
75/// In-memory loader — useful for tests and bundled-stdlib loading.
76/// Path strings map directly to source strings; missing path → error.
77#[derive(Default, Debug, Clone)]
78pub struct MapLoader {
79    pub modules: HashMap<String, String>,
80}
81
82impl MapLoader {
83    pub fn new() -> Self {
84        Self::default()
85    }
86
87    pub fn insert(&mut self, path: impl Into<String>, source: impl Into<String>) -> &mut Self {
88        self.modules.insert(path.into(), source.into());
89        self
90    }
91}
92
93impl Loader for MapLoader {
94    fn load(&self, path: &str) -> Result<String, ModuleError> {
95        self.modules
96            .get(path)
97            .cloned()
98            .ok_or_else(|| ModuleError::NotFound(path.to_string()))
99    }
100}
101
102/// Default no-op loader for embedders that haven't wired one up yet.
103/// Returns `NotFound` for every path; modules calling `(require ...)`
104/// will surface that error to the user.
105#[derive(Debug, Default, Clone)]
106pub struct NoLoader;
107
108impl Loader for NoLoader {
109    fn load(&self, path: &str) -> Result<String, ModuleError> {
110        Err(ModuleError::NotFound(path.to_string()))
111    }
112}
113
114/// Filesystem-backed loader. Reads a module path string by walking a
115/// base directory (or filesystem-absolute paths). Path-resolution rules
116/// match the documented design:
117///
118/// 1. `path` ending in `.tlisp` or `.lisp` is read as-is.
119/// 2. `path` without an extension tries `<path>.tlisp`, then
120///    `<path>.lisp`, then `<path>/init.tlisp`, then `<path>/init.lisp`.
121/// 3. Relative paths resolve against `base_dir`. Absolute paths are
122///    passed through. The optional `extra_search_paths` list (e.g.
123///    a `$TATARA_PATH`-equivalent) is consulted in order if the
124///    primary lookup fails.
125///
126/// The loader is `Send + Sync` so it can live behind the `Arc<dyn Loader>`
127/// the Interpreter expects.
128#[derive(Debug, Clone)]
129pub struct FilesystemLoader {
130    pub base_dir: std::path::PathBuf,
131    pub extra_search_paths: Vec<std::path::PathBuf>,
132}
133
134impl FilesystemLoader {
135    pub fn new(base_dir: impl Into<std::path::PathBuf>) -> Self {
136        Self {
137            base_dir: base_dir.into(),
138            extra_search_paths: Vec::new(),
139        }
140    }
141
142    pub fn with_search_paths(
143        mut self,
144        paths: impl IntoIterator<Item = std::path::PathBuf>,
145    ) -> Self {
146        self.extra_search_paths.extend(paths);
147        self
148    }
149
150    fn candidates(&self, path: &str) -> Vec<std::path::PathBuf> {
151        let p = std::path::Path::new(path);
152        let has_ext = p
153            .extension()
154            .is_some_and(|e| matches!(e.to_str(), Some("tlisp" | "lisp")));
155        let mut bases: Vec<std::path::PathBuf> = Vec::new();
156        if p.is_absolute() {
157            bases.push(p.to_path_buf());
158        } else {
159            bases.push(self.base_dir.join(p));
160            for extra in &self.extra_search_paths {
161                bases.push(extra.join(p));
162            }
163        }
164        let mut out = Vec::with_capacity(bases.len() * 4);
165        for base in bases {
166            if has_ext {
167                out.push(base);
168            } else {
169                out.push(base.with_extension("tlisp"));
170                out.push(base.with_extension("lisp"));
171                out.push(base.join("init.tlisp"));
172                out.push(base.join("init.lisp"));
173            }
174        }
175        out
176    }
177}
178
179impl Loader for FilesystemLoader {
180    fn load(&self, path: &str) -> Result<String, ModuleError> {
181        for candidate in self.candidates(path) {
182            if let Ok(s) = std::fs::read_to_string(&candidate) {
183                return Ok(s);
184            }
185        }
186        Err(ModuleError::NotFound(path.to_string()))
187    }
188}
189
190/// Errors specific to the module pipeline. Embedders convert these
191/// to user-facing `EvalError::User { value: Value::Error(...) }`.
192#[derive(Debug, Error, Clone)]
193pub enum ModuleError {
194    #[error("module not found: {0}")]
195    NotFound(String),
196    #[error("circular require: {path} (load stack: {stack})")]
197    Circular {
198        path: String,
199        stack: String,
200    },
201    #[error("name not exported: {1} from module {0}")]
202    NotExported(String, String),
203}
204
205/// Process-global module registry. Holds every module that's been
206/// loaded so far, keyed by path. Two `(require "lib/auth")` calls
207/// from different sites share one Module instance — the file is
208/// loaded + evaluated exactly once.
209#[derive(Debug, Default, Clone)]
210pub struct ModuleRegistry {
211    inner: Arc<Mutex<RegistryInner>>,
212}
213
214#[derive(Debug, Default)]
215pub(crate) struct RegistryInner {
216    pub(crate) modules: HashMap<Arc<str>, Module>,
217    /// Currently-loading paths (for cycle detection).
218    pub(crate) loading: Vec<String>,
219    /// Exports declared via `(provide ...)` inside a still-loading
220    /// module. Drained on `finish_load` and merged into the Module.
221    /// Keyed by module path; value is the set of names provided.
222    pub(crate) exports_staging: HashMap<String, HashSet<Arc<str>>>,
223}
224
225impl ModuleRegistry {
226    pub fn new() -> Self {
227        Self::default()
228    }
229
230    /// Has this path already been fully loaded?
231    pub fn has(&self, path: &str) -> bool {
232        let g = self.inner.lock().unwrap();
233        g.modules.contains_key(path)
234    }
235
236    /// Snapshot a loaded module. Returns `None` if not yet loaded.
237    pub fn get(&self, path: &str) -> Option<Module> {
238        let g = self.inner.lock().unwrap();
239        g.modules.get(path).cloned()
240    }
241
242    /// Begin loading `path`. Pushes onto the load stack and returns
243    /// `Err(Circular)` if the path is already on the stack.
244    pub fn begin_load(&self, path: &str) -> Result<(), ModuleError> {
245        let mut g = self.inner.lock().unwrap();
246        if g.loading.iter().any(|p| p == path) {
247            return Err(ModuleError::Circular {
248                path: path.to_string(),
249                stack: g.loading.join(" → "),
250            });
251        }
252        g.loading.push(path.to_string());
253        Ok(())
254    }
255
256    /// Finish loading `path` — remove from load stack, store final
257    /// module bindings.
258    pub fn finish_load(&self, module: Module) {
259        let mut g = self.inner.lock().unwrap();
260        g.loading.retain(|p| **p != *module.path);
261        g.modules.insert(module.path.clone(), module);
262    }
263
264    /// Abort a load (e.g., after an error during eval). Drops the
265    /// path from the load stack so retries can succeed.
266    pub fn abort_load(&self, path: &str) {
267        let mut g = self.inner.lock().unwrap();
268        g.loading.retain(|p| p != path);
269    }
270
271    /// Number of fully-loaded modules. Useful for tests + tooling.
272    pub fn len(&self) -> usize {
273        self.inner.lock().unwrap().modules.len()
274    }
275
276    pub fn is_empty(&self) -> bool {
277        self.len() == 0
278    }
279
280    /// Internal access to the lock — used by the eval loop to stage
281    /// exports during a module load.
282    pub(crate) fn inner_lock(&self) -> std::sync::MutexGuard<'_, RegistryInner> {
283        self.inner.lock().unwrap()
284    }
285}
286
287/// Split a qualified name `foo/bar` into `(module-alias, member)`.
288/// Returns `None` if there's no `/` separator (caller treats as a
289/// plain unqualified name).
290///
291/// Multi-segment aliases like `lib/auth/validate-token` resolve to
292/// alias = `lib/auth` and member = `validate-token` — i.e., the LAST
293/// `/` is the separator. This matches Clojure semantics where
294/// `lib.auth/validate-token` (using `.` for the alias and `/` for
295/// the boundary) splits at the FINAL `/`.
296pub fn split_qualified(name: &str) -> Option<(&str, &str)> {
297    let idx = name.rfind('/')?;
298    // A bare leading `/` (e.g. `/foo`) or trailing `/` (e.g. `foo/`)
299    // isn't a qualified name.
300    if idx == 0 || idx == name.len() - 1 {
301        return None;
302    }
303    Some((&name[..idx], &name[idx + 1..]))
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309
310    #[test]
311    fn split_qualified_works() {
312        assert_eq!(split_qualified("foo/bar"), Some(("foo", "bar")));
313        assert_eq!(
314            split_qualified("lib/auth/validate"),
315            Some(("lib/auth", "validate"))
316        );
317        assert_eq!(split_qualified("plain"), None);
318        assert_eq!(split_qualified("/leading"), None);
319        assert_eq!(split_qualified("trailing/"), None);
320    }
321
322    #[test]
323    fn map_loader_round_trips() {
324        let mut l = MapLoader::new();
325        l.insert("lib/auth", "(define x 42)");
326        assert_eq!(l.load("lib/auth").unwrap(), "(define x 42)");
327        assert!(matches!(l.load("missing"), Err(ModuleError::NotFound(_))));
328    }
329
330    #[test]
331    fn registry_cycle_detection() {
332        let r = ModuleRegistry::new();
333        r.begin_load("a").unwrap();
334        r.begin_load("b").unwrap();
335        let err = r.begin_load("a").unwrap_err();
336        assert!(matches!(err, ModuleError::Circular { .. }));
337    }
338
339    #[test]
340    fn registry_finish_load_makes_module_visible() {
341        let r = ModuleRegistry::new();
342        r.begin_load("foo").unwrap();
343        let mut m = Module::new("foo");
344        m.define("x", Value::Int(42));
345        m.add_export("x");
346        r.finish_load(m);
347        assert!(r.has("foo"));
348        let exported = r.get("foo").unwrap().get_export("x");
349        assert!(matches!(exported, Some(Value::Int(42))));
350    }
351
352    #[test]
353    fn registry_finish_load_removes_from_loading() {
354        let r = ModuleRegistry::new();
355        r.begin_load("foo").unwrap();
356        r.finish_load(Module::new("foo"));
357        // Re-loading the same path should now succeed (not cyclic).
358        r.begin_load("foo").unwrap();
359        r.abort_load("foo");
360    }
361
362    #[test]
363    fn filesystem_loader_resolves_with_extensions() {
364        use std::io::Write;
365        let dir = tempfile_dir();
366        // Drop a "lib/util.tlisp" file.
367        let lib = dir.join("lib");
368        std::fs::create_dir_all(&lib).unwrap();
369        let mut f = std::fs::File::create(lib.join("util.tlisp")).unwrap();
370        writeln!(f, "(define x 42)").unwrap();
371
372        let loader = FilesystemLoader::new(&dir);
373        // Bare name → tries `<base>/lib/util.tlisp`.
374        let src = loader.load("lib/util").unwrap();
375        assert!(src.contains("define x 42"));
376
377        // Explicit extension also works.
378        let src2 = loader.load("lib/util.tlisp").unwrap();
379        assert_eq!(src, src2);
380
381        // Missing path errors clearly.
382        assert!(matches!(
383            loader.load("missing/whatever"),
384            Err(ModuleError::NotFound(_))
385        ));
386
387        let _ = std::fs::remove_dir_all(&dir);
388    }
389
390    fn tempfile_dir() -> std::path::PathBuf {
391        use std::time::{SystemTime, UNIX_EPOCH};
392        let nanos = SystemTime::now()
393            .duration_since(UNIX_EPOCH)
394            .unwrap()
395            .as_nanos();
396        let mut tmp = std::env::temp_dir();
397        tmp.push(format!("tatara-loader-test-{nanos}"));
398        std::fs::create_dir_all(&tmp).unwrap();
399        tmp
400    }
401
402    #[test]
403    fn module_get_export_respects_export_set() {
404        let mut m = Module::new("test");
405        m.define("public", Value::Int(1));
406        m.define("private", Value::Int(2));
407        m.add_export("public");
408        assert!(matches!(m.get_export("public"), Some(Value::Int(1))));
409        // private is bound but not exported.
410        assert!(matches!(m.get_export("private"), None));
411    }
412}