Skip to main content

lex_syntax/
loader.rs

1//! Multi-file loader: resolves `import "./..."`, `import "../..."`, and
2//! `import "/abs/..."` statements relative to the importer, recursively
3//! parses, and produces a single [`Program`] with all stages merged.
4//!
5//! Names that are local to an imported file are mangled with a
6//! **per-file-path** prefix, so the same module imported via multiple
7//! aliases (or from multiple parents in a diamond shape) collapses to
8//! one set of mangled names — same SigId, same nominal identity.
9//! Stdlib imports (`import "std.foo" as bar`) pass through unchanged.
10//!
11//! ## Mangling
12//!
13//! Each loaded file gets a prefix derived from its canonical filesystem
14//! path. The entry file's prefix is empty (so `lex run main.lex
15//! process` works unchanged). Imported files use `<stem>_<hash>`
16//! where `hash` is the first 8 hex chars of SHA-256 of the canonical
17//! path string. The hash disambiguates same-stem files in different
18//! directories without forcing a project manifest.
19//!
20//! Within a file at prefix `P`:
21//!
22//! - `fn foo` declared in this file becomes `<P>.foo` (just `foo` at root).
23//! - `type T` declared in this file becomes `<P>.T`.
24//! - References to a locally-declared name get mangled, **unless** the
25//!   name is shadowed by a binder (let, fn param, lambda param, or
26//!   pattern binder) in scope.
27//! - `m.foo` where `m` is a path-import alias is rewritten to the
28//!   imported file's prefix-qualified name. Two parents importing the
29//!   same file see the same prefix → calls and types unify.
30//! - `m.foo` where `m` is a stdlib alias is unchanged.
31//!
32//! Variant constructors are **not** mangled — they live in a global
33//! namespace, and a collision between two imported types' constructors
34//! surfaces later as a type-check error. Same for record field names.
35//!
36//! ## Diamond imports
37//!
38//! `main.lex` imports `./left` and `./right`, both of which import
39//! `./shared`. `shared.lex` is parsed once per resolution, but its
40//! mangled items are merged into the output exactly once (subsequent
41//! loads from the same canonical path return an empty Program). This
42//! is what makes `s.build_report(...)` and `v.read_score(...)` agree
43//! on `Report`'s nominal identity.
44//!
45//! ## Limitations (tracked separately)
46//!
47//! The mangling key is the canonical filesystem path. Moving a file
48//! changes its SigId; renaming changes the file-stem half of the
49//! prefix. The eventual fix — content-addressed identity decoupled
50//! from filesystem layout — lives with store-native imports
51//! (`import "stage:..."`); see the corresponding follow-up tracker.
52
53use std::collections::{HashMap, HashSet};
54use std::path::{Path, PathBuf};
55use thiserror::Error;
56
57use sha2::{Digest, Sha256};
58
59use crate::syntax::*;
60use crate::{parse_source, SyntaxError};
61
62#[derive(Debug, Error)]
63pub enum LoadError {
64    #[error("read {path}: {source}")]
65    Io {
66        path: String,
67        #[source]
68        source: std::io::Error,
69    },
70    #[error("parse {path}: {source}")]
71    Syntax {
72        path: String,
73        #[source]
74        source: SyntaxError,
75    },
76    #[error("import cycle: {chain}")]
77    Cycle { chain: String },
78    #[error("import \"{reference}\" from {importer}: file not found")]
79    NotFound { importer: String, reference: String },
80    #[error("local imports (`./`, `../`, `/`) require a base path; cannot resolve from a string source")]
81    LocalImportInStringSource,
82}
83
84/// Load a multi-file Lex program, expanding local imports relative to
85/// the entry path. Stdlib imports (`std.*`) pass through unchanged.
86pub fn load_program(entry: &Path) -> Result<Program, LoadError> {
87    let entry_canonical = entry.canonicalize().map_err(|source| LoadError::Io {
88        path: entry.display().to_string(),
89        source,
90    })?;
91    let mut state = LoaderState {
92        in_progress: Vec::new(),
93        loaded: HashSet::new(),
94        prefixes: HashMap::new(),
95    };
96    // Entry file's prefix is empty so `lex run main.lex process` works
97    // without users typing the hashed prefix.
98    state.prefixes.insert(entry_canonical.clone(), String::new());
99    state.load(&entry_canonical)
100}
101
102/// Load a Lex program from a string source. Local-path imports are
103/// rejected up-front since there's no base path to resolve from.
104pub fn load_program_from_str(src: &str) -> Result<Program, LoadError> {
105    let prog = parse_source(src).map_err(|source| LoadError::Syntax {
106        path: "<input>".into(),
107        source,
108    })?;
109    for item in &prog.items {
110        if let Item::Import(imp) = item {
111            if is_path_import(&imp.reference) {
112                return Err(LoadError::LocalImportInStringSource);
113            }
114        }
115    }
116    Ok(prog)
117}
118
119struct LoaderState {
120    in_progress: Vec<PathBuf>,
121    /// Canonical paths that have already been merged into the output.
122    /// A second `import "./shared"` from a different parent skips
123    /// re-merging — the file's mangled items are already there.
124    loaded: HashSet<PathBuf>,
125    /// Stable mangling prefix per canonical path. Computed lazily;
126    /// the entry file is seeded with an empty prefix.
127    prefixes: HashMap<PathBuf, String>,
128}
129
130impl LoaderState {
131    fn prefix_for(&mut self, canonical: &Path) -> String {
132        if let Some(p) = self.prefixes.get(canonical) {
133            return p.clone();
134        }
135        let stem = canonical
136            .file_stem()
137            .and_then(|s| s.to_str())
138            .unwrap_or("module");
139        let mut hasher = Sha256::new();
140        hasher.update(canonical.to_string_lossy().as_bytes());
141        let digest = hasher.finalize();
142        let prefix = format!("{stem}_{:08x}", u32::from_be_bytes([
143            digest[0], digest[1], digest[2], digest[3],
144        ]));
145        self.prefixes.insert(canonical.to_path_buf(), prefix.clone());
146        prefix
147    }
148
149    fn load(&mut self, canonical: &Path) -> Result<Program, LoadError> {
150        if self.in_progress.contains(&canonical.to_path_buf()) {
151            let mut chain: Vec<String> = self
152                .in_progress
153                .iter()
154                .map(|p| p.display().to_string())
155                .collect();
156            chain.push(canonical.display().to_string());
157            return Err(LoadError::Cycle {
158                chain: chain.join(" -> "),
159            });
160        }
161        // Diamond dedupe: if this file was already merged on another
162        // path through the import graph, its items are already in the
163        // output Vec — return an empty Program so the caller's
164        // `merged_children.extend(...)` is a no-op for items, but the
165        // call still resolves so the parent's `path_imports` map gets
166        // populated below.
167        if self.loaded.contains(canonical) {
168            return Ok(Program { items: Vec::new() });
169        }
170        self.in_progress.push(canonical.to_path_buf());
171
172        let src = std::fs::read_to_string(canonical).map_err(|source| LoadError::Io {
173            path: canonical.display().to_string(),
174            source,
175        })?;
176        let prog = parse_source(&src).map_err(|source| LoadError::Syntax {
177            path: canonical.display().to_string(),
178            source,
179        })?;
180
181        let local_names: HashSet<String> = prog
182            .items
183            .iter()
184            .filter_map(|item| match item {
185                Item::FnDecl(fd) => Some(fd.name.clone()),
186                Item::TypeDecl(td) => Some(td.name.clone()),
187                _ => None,
188            })
189            .collect();
190
191        // alias used by this file → mangling prefix of the imported file
192        let mut path_imports: HashMap<String, String> = HashMap::new();
193        let mut merged_children: Vec<Item> = Vec::new();
194        let mut std_imports: Vec<Item> = Vec::new();
195        let mut my_items: Vec<Item> = Vec::new();
196
197        for item in prog.items {
198            match item {
199                Item::Import(ref imp) if is_path_import(&imp.reference) => {
200                    let resolved = resolve_import(canonical, &imp.reference)?;
201                    let child_prefix = self.prefix_for(&resolved);
202                    path_imports.insert(imp.alias.clone(), child_prefix);
203                    let child_prog = self.load(&resolved)?;
204                    merged_children.extend(child_prog.items);
205                }
206                Item::Import(_) => std_imports.push(item),
207                _ => my_items.push(item),
208            }
209        }
210
211        let my_prefix = self.prefix_for(canonical);
212        let mangler = Mangler {
213            prefix: my_prefix,
214            local_names: &local_names,
215            path_imports: &path_imports,
216        };
217        let mangled: Vec<Item> = my_items
218            .into_iter()
219            .map(|i| mangler.mangle_item(i))
220            .collect();
221
222        self.in_progress.pop();
223        self.loaded.insert(canonical.to_path_buf());
224
225        // Output order: std imports first (deduped against children's),
226        // then merged children's items, then this file's items.
227        let mut out: Vec<Item> = Vec::new();
228        for s in std_imports {
229            if !merged_children.iter().any(|m| m == &s) {
230                out.push(s);
231            }
232        }
233        out.extend(merged_children);
234        out.extend(mangled);
235        Ok(Program { items: out })
236    }
237}
238
239fn is_path_import(reference: &str) -> bool {
240    reference.starts_with("./") || reference.starts_with("../") || reference.starts_with('/')
241}
242
243fn resolve_import(importer: &Path, reference: &str) -> Result<PathBuf, LoadError> {
244    let importer_dir = importer.parent().unwrap_or_else(|| Path::new("."));
245    let mut resolved: PathBuf = if reference.starts_with('/') {
246        PathBuf::from(reference)
247    } else {
248        importer_dir.join(reference)
249    };
250    if resolved.extension().is_none() {
251        resolved.set_extension("lex");
252    }
253    if !resolved.exists() {
254        return Err(LoadError::NotFound {
255            importer: importer.display().to_string(),
256            reference: reference.to_string(),
257        });
258    }
259    Ok(resolved)
260}
261
262struct Mangler<'a> {
263    /// Mangling prefix for items declared in this file. Empty for the
264    /// entry file, `<stem>_<hash8>` for imported files.
265    prefix: String,
266    local_names: &'a HashSet<String>,
267    /// Map from local alias to the imported file's mangling prefix.
268    /// `m.foo` rewrites to `<imported_prefix>.foo` regardless of which
269    /// alias `m` was, so two parents importing the same module agree.
270    path_imports: &'a HashMap<String, String>,
271}
272
273impl<'a> Mangler<'a> {
274    fn qualify(&self, name: &str) -> String {
275        if self.prefix.is_empty() {
276            name.to_string()
277        } else {
278            format!("{}.{}", self.prefix, name)
279        }
280    }
281
282    fn mangle_item(&self, item: Item) -> Item {
283        match item {
284            Item::Import(imp) => Item::Import(imp),
285            Item::TypeDecl(td) => Item::TypeDecl(self.mangle_type_decl(td)),
286            Item::FnDecl(fd) => Item::FnDecl(self.mangle_fn_decl(fd)),
287        }
288    }
289
290    fn mangle_type_decl(&self, td: TypeDecl) -> TypeDecl {
291        TypeDecl {
292            name: self.qualify(&td.name),
293            params: td.params,
294            definition: self.mangle_type_expr(td.definition),
295        }
296    }
297
298    fn mangle_fn_decl(&self, fd: FnDecl) -> FnDecl {
299        let mut shadow = HashSet::new();
300        for p in &fd.params {
301            shadow.insert(p.name.clone());
302        }
303        FnDecl {
304            name: self.qualify(&fd.name),
305            type_params: fd.type_params,
306            params: fd
307                .params
308                .into_iter()
309                .map(|p| Param {
310                    name: p.name,
311                    ty: self.mangle_type_expr(p.ty),
312                })
313                .collect(),
314            effects: fd.effects,
315            return_type: self.mangle_type_expr(fd.return_type),
316            body: self.mangle_block(fd.body, &shadow),
317        }
318    }
319
320    fn mangle_type_expr(&self, te: TypeExpr) -> TypeExpr {
321        match te {
322            TypeExpr::Named { name, args } => TypeExpr::Named {
323                name: self.rewrite_type_name(&name),
324                args: args.into_iter().map(|a| self.mangle_type_expr(a)).collect(),
325            },
326            TypeExpr::Record(fields) => TypeExpr::Record(
327                fields
328                    .into_iter()
329                    .map(|f| TypeField {
330                        name: f.name,
331                        ty: self.mangle_type_expr(f.ty),
332                    })
333                    .collect(),
334            ),
335            TypeExpr::Tuple(items) => {
336                TypeExpr::Tuple(items.into_iter().map(|t| self.mangle_type_expr(t)).collect())
337            }
338            TypeExpr::Function {
339                params,
340                effects,
341                ret,
342            } => TypeExpr::Function {
343                params: params
344                    .into_iter()
345                    .map(|t| self.mangle_type_expr(t))
346                    .collect(),
347                effects,
348                ret: Box::new(self.mangle_type_expr(*ret)),
349            },
350            TypeExpr::Union(variants) => TypeExpr::Union(
351                variants
352                    .into_iter()
353                    .map(|v| UnionVariant {
354                        name: v.name,
355                        payload: v.payload.map(|t| self.mangle_type_expr(t)),
356                    })
357                    .collect(),
358            ),
359            TypeExpr::Refined { base, binding, predicate } => TypeExpr::Refined {
360                base: Box::new(self.mangle_type_expr(*base)),
361                binding,
362                // The predicate is an expression; its names are
363                // resolved during type-check, not loader-time, so
364                // it passes through unchanged here. Slice 2 wires
365                // up discharge through the spec-checker.
366                predicate,
367            },
368        }
369    }
370
371    /// Rewrite a possibly-qualified type name to its mangled form.
372    fn rewrite_type_name(&self, name: &str) -> String {
373        if let Some((alias, rest)) = name.split_once('.') {
374            if let Some(child) = self.path_imports.get(alias) {
375                return format!("{child}.{rest}");
376            }
377            return name.to_string();
378        }
379        if self.local_names.contains(name) {
380            return self.qualify(name);
381        }
382        name.to_string()
383    }
384
385    fn mangle_block(&self, b: Block, shadow: &HashSet<String>) -> Block {
386        let mut shadow = shadow.clone();
387        let statements = b
388            .statements
389            .into_iter()
390            .map(|s| match s {
391                Statement::Let { name, ty, value } => {
392                    let value = self.mangle_expr(value, &shadow);
393                    let ty = ty.map(|t| self.mangle_type_expr(t));
394                    shadow.insert(name.clone());
395                    Statement::Let { name, ty, value }
396                }
397                Statement::Expr(e) => Statement::Expr(self.mangle_expr(e, &shadow)),
398            })
399            .collect();
400        let result = Box::new(self.mangle_expr(*b.result, &shadow));
401        Block { statements, result }
402    }
403
404    fn mangle_expr(&self, e: Expr, shadow: &HashSet<String>) -> Expr {
405        match e {
406            Expr::Lit(_) => e,
407            Expr::Var(name) => {
408                if !shadow.contains(&name) && self.local_names.contains(&name) {
409                    Expr::Var(self.qualify(&name))
410                } else {
411                    Expr::Var(name)
412                }
413            }
414            Expr::Block(b) => Expr::Block(self.mangle_block(b, shadow)),
415            Expr::Call { callee, args } => {
416                let mangled_args: Vec<Expr> = args
417                    .into_iter()
418                    .map(|a| self.mangle_expr(a, shadow))
419                    .collect();
420                if let Expr::Field { value, field } = (*callee).clone() {
421                    if let Expr::Var(alias) = *value {
422                        if !shadow.contains(&alias) {
423                            if let Some(child) = self.path_imports.get(&alias) {
424                                return Expr::Call {
425                                    callee: Box::new(Expr::Var(format!("{child}.{field}"))),
426                                    args: mangled_args,
427                                };
428                            }
429                        }
430                    }
431                }
432                Expr::Call {
433                    callee: Box::new(self.mangle_expr(*callee, shadow)),
434                    args: mangled_args,
435                }
436            }
437            Expr::Pipe { left, right } => Expr::Pipe {
438                left: Box::new(self.mangle_expr(*left, shadow)),
439                right: Box::new(self.mangle_expr(*right, shadow)),
440            },
441            Expr::Try(inner) => Expr::Try(Box::new(self.mangle_expr(*inner, shadow))),
442            Expr::Field { value, field } => {
443                if let Expr::Var(alias) = (*value).clone() {
444                    if !shadow.contains(&alias) {
445                        if let Some(child) = self.path_imports.get(&alias) {
446                            return Expr::Var(format!("{child}.{field}"));
447                        }
448                    }
449                }
450                Expr::Field {
451                    value: Box::new(self.mangle_expr(*value, shadow)),
452                    field,
453                }
454            }
455            Expr::BinOp { op, lhs, rhs } => Expr::BinOp {
456                op,
457                lhs: Box::new(self.mangle_expr(*lhs, shadow)),
458                rhs: Box::new(self.mangle_expr(*rhs, shadow)),
459            },
460            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
461                op,
462                expr: Box::new(self.mangle_expr(*expr, shadow)),
463            },
464            Expr::If {
465                cond,
466                then_block,
467                else_block,
468            } => Expr::If {
469                cond: Box::new(self.mangle_expr(*cond, shadow)),
470                then_block: self.mangle_block(then_block, shadow),
471                else_block: self.mangle_block(else_block, shadow),
472            },
473            Expr::Match { scrutinee, arms } => Expr::Match {
474                scrutinee: Box::new(self.mangle_expr(*scrutinee, shadow)),
475                arms: arms
476                    .into_iter()
477                    .map(|a| {
478                        let mut arm_shadow = shadow.clone();
479                        collect_pattern_binders(&a.pattern, &mut arm_shadow);
480                        Arm {
481                            pattern: self.mangle_pattern(a.pattern),
482                            body: self.mangle_expr(a.body, &arm_shadow),
483                        }
484                    })
485                    .collect(),
486            },
487            Expr::RecordLit(fields) => Expr::RecordLit(
488                fields
489                    .into_iter()
490                    .map(|f| RecordLitField {
491                        name: f.name,
492                        value: self.mangle_expr(f.value, shadow),
493                    })
494                    .collect(),
495            ),
496            Expr::TupleLit(items) => Expr::TupleLit(
497                items
498                    .into_iter()
499                    .map(|i| self.mangle_expr(i, shadow))
500                    .collect(),
501            ),
502            Expr::ListLit(items) => Expr::ListLit(
503                items
504                    .into_iter()
505                    .map(|i| self.mangle_expr(i, shadow))
506                    .collect(),
507            ),
508            Expr::Constructor { name, args } => Expr::Constructor {
509                name,
510                args: args
511                    .into_iter()
512                    .map(|a| self.mangle_expr(a, shadow))
513                    .collect(),
514            },
515            Expr::Lambda(lambda) => {
516                let mut lam_shadow = shadow.clone();
517                for p in &lambda.params {
518                    lam_shadow.insert(p.name.clone());
519                }
520                Expr::Lambda(Box::new(Lambda {
521                    params: lambda
522                        .params
523                        .into_iter()
524                        .map(|p| Param {
525                            name: p.name,
526                            ty: self.mangle_type_expr(p.ty),
527                        })
528                        .collect(),
529                    return_type: self.mangle_type_expr(lambda.return_type),
530                    effects: lambda.effects,
531                    body: self.mangle_block(lambda.body, &lam_shadow),
532                }))
533            }
534        }
535    }
536
537    fn mangle_pattern(&self, p: Pattern) -> Pattern {
538        match p {
539            Pattern::Constructor { name, args } => Pattern::Constructor {
540                name,
541                args: args.into_iter().map(|a| self.mangle_pattern(a)).collect(),
542            },
543            Pattern::Record { fields, rest } => Pattern::Record {
544                fields: fields
545                    .into_iter()
546                    .map(|f| RecordPatField {
547                        name: f.name,
548                        pattern: f.pattern.map(|p| self.mangle_pattern(p)),
549                    })
550                    .collect(),
551                rest,
552            },
553            Pattern::Tuple(items) => {
554                Pattern::Tuple(items.into_iter().map(|p| self.mangle_pattern(p)).collect())
555            }
556            Pattern::Lit(_) | Pattern::Var(_) | Pattern::Wild => p,
557        }
558    }
559}
560
561fn collect_pattern_binders(p: &Pattern, out: &mut HashSet<String>) {
562    match p {
563        Pattern::Var(name) => {
564            out.insert(name.clone());
565        }
566        Pattern::Constructor { args, .. } => {
567            for a in args {
568                collect_pattern_binders(a, out);
569            }
570        }
571        Pattern::Record { fields, .. } => {
572            for f in fields {
573                match &f.pattern {
574                    Some(p) => collect_pattern_binders(p, out),
575                    // `{ name }` shorthand binds `name`.
576                    None => {
577                        out.insert(f.name.clone());
578                    }
579                }
580            }
581        }
582        Pattern::Tuple(items) => {
583            for p in items {
584                collect_pattern_binders(p, out);
585            }
586        }
587        Pattern::Lit(_) | Pattern::Wild => {}
588    }
589}