Skip to main content

lex_syntax/
loader.rs

1//! Multi-file loader: resolves `import "./..."`, `import "../..."`, and
2//! `import "/abs/..."` statements relative to the importer, recursively
3//! parses, and produces a single [`Program`] with all stages merged.
4//!
5//! Names that are local to an imported file are mangled with a
6//! **per-file-path** prefix, so the same module imported via multiple
7//! aliases (or from multiple parents in a diamond shape) collapses to
8//! one set of mangled names — same SigId, same nominal identity.
9//! Stdlib imports (`import "std.foo" as bar`) pass through unchanged.
10//!
11//! ## Mangling
12//!
13//! Each loaded file gets a prefix derived from its canonical filesystem
14//! path. The entry file's prefix is empty (so `lex run main.lex
15//! process` works unchanged). Imported files use `<stem>_<hash>`
16//! where `hash` is the first 8 hex chars of SHA-256 of the canonical
17//! path string. The hash disambiguates same-stem files in different
18//! directories without forcing a project manifest.
19//!
20//! Within a file at prefix `P`:
21//!
22//! - `fn foo` declared in this file becomes `<P>.foo` (just `foo` at root).
23//! - `type T` declared in this file becomes `<P>.T`.
24//! - References to a locally-declared name get mangled, **unless** the
25//!   name is shadowed by a binder (let, fn param, lambda param, or
26//!   pattern binder) in scope.
27//! - `m.foo` where `m` is a path-import alias is rewritten to the
28//!   imported file's prefix-qualified name. Two parents importing the
29//!   same file see the same prefix → calls and types unify.
30//! - `m.foo` where `m` is a stdlib alias is unchanged.
31//!
32//! Variant constructors are **not** mangled — they live in a global
33//! namespace, and a collision between two imported types' constructors
34//! surfaces later as a type-check error. Same for record field names.
35//!
36//! ## Diamond imports
37//!
38//! `main.lex` imports `./left` and `./right`, both of which import
39//! `./shared`. `shared.lex` is parsed once per resolution, but its
40//! mangled items are merged into the output exactly once (subsequent
41//! loads from the same canonical path return an empty Program). This
42//! is what makes `s.build_report(...)` and `v.read_score(...)` agree
43//! on `Report`'s nominal identity.
44//!
45//! ## Limitations (tracked separately)
46//!
47//! The mangling key is the canonical filesystem path. Moving a file
48//! changes its SigId; renaming changes the file-stem half of the
49//! prefix. The eventual fix — content-addressed identity decoupled
50//! from filesystem layout — lives with store-native imports
51//! (`import "stage:..."`); see the corresponding follow-up tracker.
52
53use std::collections::{HashMap, HashSet};
54use std::path::{Path, PathBuf};
55use thiserror::Error;
56
57use sha2::{Digest, Sha256};
58
59use crate::syntax::*;
60use crate::workspace::{resolve_package_import, PackageError};
61use crate::{parse_source, SyntaxError};
62
63#[derive(Debug, Error)]
64pub enum LoadError {
65    #[error("read {path}: {source}")]
66    Io {
67        path: String,
68        #[source]
69        source: std::io::Error,
70    },
71    #[error("parse {path}: {source}")]
72    Syntax {
73        path: String,
74        #[source]
75        source: SyntaxError,
76    },
77    #[error("import cycle: {chain}")]
78    Cycle { chain: String },
79    #[error("import \"{reference}\" from {importer}: file not found")]
80    NotFound { importer: String, reference: String },
81    #[error("local imports (`./`, `../`, `/`) require a base path; cannot resolve from a string source")]
82    LocalImportInStringSource,
83    #[error("package import error: {0}")]
84    Package(#[from] PackageError),
85}
86
87/// Load a multi-file Lex program, expanding local imports relative to
88/// the entry path. Stdlib imports (`std.*`) pass through unchanged.
89pub fn load_program(entry: &Path) -> Result<Program, LoadError> {
90    let entry_canonical = entry.canonicalize().map_err(|source| LoadError::Io {
91        path: entry.display().to_string(),
92        source,
93    })?;
94    let mut state = LoaderState {
95        in_progress: Vec::new(),
96        loaded: HashSet::new(),
97        prefixes: HashMap::new(),
98    };
99    // Entry file's prefix is empty so `lex run main.lex process` works
100    // without users typing the hashed prefix.
101    state.prefixes.insert(entry_canonical.clone(), String::new());
102    state.load(&entry_canonical)
103}
104
105/// Load a Lex program from a string source. Local-path imports are
106/// rejected up-front since there's no base path to resolve from.
107pub fn load_program_from_str(src: &str) -> Result<Program, LoadError> {
108    let prog = parse_source(src).map_err(|source| LoadError::Syntax {
109        path: "<input>".into(),
110        source,
111    })?;
112    for item in &prog.items {
113        if let Item::Import(imp) = item {
114            if is_path_import(&imp.reference)
115                || split_package_import(&imp.reference).is_some()
116            {
117                return Err(LoadError::LocalImportInStringSource);
118            }
119        }
120    }
121    Ok(prog)
122}
123
124struct LoaderState {
125    in_progress: Vec<PathBuf>,
126    /// Canonical paths that have already been merged into the output.
127    /// A second `import "./shared"` from a different parent skips
128    /// re-merging — the file's mangled items are already there.
129    loaded: HashSet<PathBuf>,
130    /// Stable mangling prefix per canonical path. Computed lazily;
131    /// the entry file is seeded with an empty prefix.
132    prefixes: HashMap<PathBuf, String>,
133}
134
135impl LoaderState {
136    fn prefix_for(&mut self, canonical: &Path) -> String {
137        if let Some(p) = self.prefixes.get(canonical) {
138            return p.clone();
139        }
140        let stem = canonical
141            .file_stem()
142            .and_then(|s| s.to_str())
143            .unwrap_or("module");
144        let mut hasher = Sha256::new();
145        hasher.update(canonical.to_string_lossy().as_bytes());
146        let digest = hasher.finalize();
147        let prefix = format!("{stem}_{:08x}", u32::from_be_bytes([
148            digest[0], digest[1], digest[2], digest[3],
149        ]));
150        self.prefixes.insert(canonical.to_path_buf(), prefix.clone());
151        prefix
152    }
153
154    fn load(&mut self, canonical: &Path) -> Result<Program, LoadError> {
155        if self.in_progress.contains(&canonical.to_path_buf()) {
156            let mut chain: Vec<String> = self
157                .in_progress
158                .iter()
159                .map(|p| p.display().to_string())
160                .collect();
161            chain.push(canonical.display().to_string());
162            return Err(LoadError::Cycle {
163                chain: chain.join(" -> "),
164            });
165        }
166        // Diamond dedupe: if this file was already merged on another
167        // path through the import graph, its items are already in the
168        // output Vec — return an empty Program so the caller's
169        // `merged_children.extend(...)` is a no-op for items, but the
170        // call still resolves so the parent's `path_imports` map gets
171        // populated below.
172        if self.loaded.contains(canonical) {
173            return Ok(Program {
174                items: Vec::new(),
175                leading_comments: Vec::new(),
176                trailing_comments: Vec::new(),
177            });
178        }
179        self.in_progress.push(canonical.to_path_buf());
180
181        let src = std::fs::read_to_string(canonical).map_err(|source| LoadError::Io {
182            path: canonical.display().to_string(),
183            source,
184        })?;
185        let prog = parse_source(&src).map_err(|source| LoadError::Syntax {
186            path: canonical.display().to_string(),
187            source,
188        })?;
189
190        let local_names: HashSet<String> = prog
191            .items
192            .iter()
193            .filter_map(|item| match item {
194                Item::FnDecl(fd) => Some(fd.name.clone()),
195                Item::TypeDecl(td) => Some(td.name.clone()),
196                _ => None,
197            })
198            .collect();
199
200        // alias used by this file → mangling prefix of the imported file
201        let mut path_imports: HashMap<String, String> = HashMap::new();
202        let mut merged_children: Vec<Item> = Vec::new();
203        let mut std_imports: Vec<Item> = Vec::new();
204        let mut my_items: Vec<Item> = Vec::new();
205
206        for item in prog.items {
207            match item {
208                Item::Import(ref imp) if is_path_import(&imp.reference) => {
209                    let resolved = resolve_import(canonical, &imp.reference)?;
210                    let child_prefix = self.prefix_for(&resolved);
211                    path_imports.insert(imp.alias.clone(), child_prefix);
212                    let child_prog = self.load(&resolved)?;
213                    merged_children.extend(child_prog.items);
214                }
215                Item::Import(ref imp)
216                    if split_package_import(&imp.reference).is_some() =>
217                {
218                    let (pkg, module) =
219                        split_package_import(&imp.reference).unwrap();
220                    let resolved =
221                        resolve_package_import(canonical, pkg, module)
222                            .map_err(LoadError::Package)?
223                            .canonicalize()
224                            .map_err(|source| LoadError::Io {
225                                path: imp.reference.clone(),
226                                source,
227                            })?;
228                    let child_prefix = self.prefix_for(&resolved);
229                    path_imports.insert(imp.alias.clone(), child_prefix);
230                    let child_prog = self.load(&resolved)?;
231                    merged_children.extend(child_prog.items);
232                }
233                Item::Import(_) => std_imports.push(item),
234                _ => my_items.push(item),
235            }
236        }
237
238        let my_prefix = self.prefix_for(canonical);
239        let mangler = Mangler {
240            prefix: my_prefix,
241            local_names: &local_names,
242            path_imports: &path_imports,
243        };
244        let mangled: Vec<Item> = my_items
245            .into_iter()
246            .map(|i| mangler.mangle_item(i))
247            .collect();
248
249        self.in_progress.pop();
250        self.loaded.insert(canonical.to_path_buf());
251
252        // Output order: std imports first (deduped against children's),
253        // then merged children's items, then this file's items.
254        let mut out: Vec<Item> = Vec::new();
255        for s in std_imports {
256            if !merged_children.iter().any(|m| m == &s) {
257                out.push(s);
258            }
259        }
260        out.extend(merged_children);
261        out.extend(mangled);
262        // Top-of-file comments live on each source file independently;
263        // after import merging the merged Program represents many
264        // files at once, and there is no obvious single "top of file"
265        // to attribute them to. Drop here — they're preserved by
266        // `lex fmt` (which operates per-file) but not by the loader's
267        // import-merging path. Same rationale for trailing_comments.
268        Ok(Program {
269            items: out,
270            leading_comments: Vec::new(),
271            trailing_comments: Vec::new(),
272        })
273    }
274}
275
276fn is_path_import(reference: &str) -> bool {
277    reference.starts_with("./") || reference.starts_with("../") || reference.starts_with('/')
278}
279
280/// Returns `Some((pkg_name, module_path))` for package imports like
281/// `"lex-schema/validate"`. Stdlib (`std.*`) and relative paths are
282/// excluded — they are handled elsewhere.
283fn split_package_import(reference: &str) -> Option<(&str, &str)> {
284    if reference.starts_with("./")
285        || reference.starts_with("../")
286        || reference.starts_with('/')
287        || reference.starts_with("std.")
288    {
289        return None;
290    }
291    reference.split_once('/')
292}
293
294fn resolve_import(importer: &Path, reference: &str) -> Result<PathBuf, LoadError> {
295    let importer_dir = importer.parent().unwrap_or_else(|| Path::new("."));
296    let mut resolved: PathBuf = if reference.starts_with('/') {
297        PathBuf::from(reference)
298    } else {
299        importer_dir.join(reference)
300    };
301    if resolved.extension().is_none() {
302        resolved.set_extension("lex");
303    }
304    if !resolved.exists() {
305        return Err(LoadError::NotFound {
306            importer: importer.display().to_string(),
307            reference: reference.to_string(),
308        });
309    }
310    // Canonicalize so that `../../shared/foo` and `../other/../shared/foo`
311    // resolve to the same HashMap key, preventing duplicate loads and
312    // mismatched mangling prefixes in diamond-import graphs (#358).
313    resolved.canonicalize().map_err(|source| LoadError::Io {
314        path: resolved.display().to_string(),
315        source,
316    })
317}
318
319struct Mangler<'a> {
320    /// Mangling prefix for items declared in this file. Empty for the
321    /// entry file, `<stem>_<hash8>` for imported files.
322    prefix: String,
323    local_names: &'a HashSet<String>,
324    /// Map from local alias to the imported file's mangling prefix.
325    /// `m.foo` rewrites to `<imported_prefix>.foo` regardless of which
326    /// alias `m` was, so two parents importing the same module agree.
327    path_imports: &'a HashMap<String, String>,
328}
329
330impl<'a> Mangler<'a> {
331    fn qualify(&self, name: &str) -> String {
332        if self.prefix.is_empty() {
333            name.to_string()
334        } else {
335            format!("{}.{}", self.prefix, name)
336        }
337    }
338
339    fn mangle_item(&self, item: Item) -> Item {
340        match item {
341            Item::Import(imp) => Item::Import(imp),
342            Item::TypeDecl(td) => Item::TypeDecl(self.mangle_type_decl(td)),
343            Item::FnDecl(fd) => Item::FnDecl(self.mangle_fn_decl(fd)),
344        }
345    }
346
347    fn mangle_type_decl(&self, td: TypeDecl) -> TypeDecl {
348        TypeDecl {
349            name: self.qualify(&td.name),
350            params: td.params,
351            definition: self.mangle_type_expr(td.definition),
352            leading_comments: td.leading_comments,
353        }
354    }
355
356    fn mangle_fn_decl(&self, fd: FnDecl) -> FnDecl {
357        let mut shadow = HashSet::new();
358        for p in &fd.params {
359            shadow.insert(p.name.clone());
360        }
361        // Example args/expected sit outside the body's parameter scope:
362        // they're top-level expressions evaluated against the function
363        // signature, so the only names they can see are the file's
364        // top-level fns/types and any path-import aliases — i.e., an
365        // empty shadow set (#391).
366        let empty_shadow = HashSet::new();
367        let examples = fd
368            .examples
369            .into_iter()
370            .map(|ex| Example {
371                args: ex
372                    .args
373                    .into_iter()
374                    .map(|a| self.mangle_expr(a, &empty_shadow))
375                    .collect(),
376                expected: self.mangle_expr(ex.expected, &empty_shadow),
377            })
378            .collect();
379        FnDecl {
380            name: self.qualify(&fd.name),
381            type_params: fd.type_params,
382            params: fd
383                .params
384                .into_iter()
385                .map(|p| Param {
386                    name: p.name,
387                    ty: self.mangle_type_expr(p.ty),
388                })
389                .collect(),
390            effects: fd.effects,
391            effect_row_var: fd.effect_row_var,
392            return_type: self.mangle_type_expr(fd.return_type),
393            body: self.mangle_block(fd.body, &shadow),
394            examples,
395            leading_comments: fd.leading_comments,
396        }
397    }
398
399    fn mangle_type_expr(&self, te: TypeExpr) -> TypeExpr {
400        match te {
401            TypeExpr::Named { name, args } => TypeExpr::Named {
402                name: self.rewrite_type_name(&name),
403                args: args.into_iter().map(|a| self.mangle_type_expr(a)).collect(),
404            },
405            TypeExpr::Record(fields) => TypeExpr::Record(
406                fields
407                    .into_iter()
408                    .map(|f| TypeField {
409                        name: f.name,
410                        ty: self.mangle_type_expr(f.ty),
411                    })
412                    .collect(),
413            ),
414            TypeExpr::RecordWithSpreads { spreads, fields } => TypeExpr::RecordWithSpreads {
415                spreads: spreads.into_iter().map(|s| self.rewrite_type_name(&s)).collect(),
416                fields: fields
417                    .into_iter()
418                    .map(|f| TypeField {
419                        name: f.name,
420                        ty: self.mangle_type_expr(f.ty),
421                    })
422                    .collect(),
423            },
424            TypeExpr::Tuple(items) => {
425                TypeExpr::Tuple(items.into_iter().map(|t| self.mangle_type_expr(t)).collect())
426            }
427            TypeExpr::Function {
428                params,
429                effects,
430                effect_row_var,
431                ret,
432            } => TypeExpr::Function {
433                params: params
434                    .into_iter()
435                    .map(|t| self.mangle_type_expr(t))
436                    .collect(),
437                effects,
438                effect_row_var,
439                ret: Box::new(self.mangle_type_expr(*ret)),
440            },
441            TypeExpr::Union(variants) => TypeExpr::Union(
442                variants
443                    .into_iter()
444                    .map(|v| UnionVariant {
445                        name: v.name,
446                        payload: v.payload.map(|t| self.mangle_type_expr(t)),
447                    })
448                    .collect(),
449            ),
450            TypeExpr::Refined { base, binding, predicate } => TypeExpr::Refined {
451                base: Box::new(self.mangle_type_expr(*base)),
452                binding,
453                // The predicate is an expression; its names are
454                // resolved during type-check, not loader-time, so
455                // it passes through unchanged here. Slice 2 wires
456                // up discharge through the spec-checker.
457                predicate,
458            },
459        }
460    }
461
462    /// Rewrite a possibly-qualified type name to its mangled form.
463    fn rewrite_type_name(&self, name: &str) -> String {
464        if let Some((alias, rest)) = name.split_once('.') {
465            if let Some(child) = self.path_imports.get(alias) {
466                return format!("{child}.{rest}");
467            }
468            return name.to_string();
469        }
470        if self.local_names.contains(name) {
471            return self.qualify(name);
472        }
473        name.to_string()
474    }
475
476    fn mangle_block(&self, b: Block, shadow: &HashSet<String>) -> Block {
477        let mut shadow = shadow.clone();
478        let statements = b
479            .statements
480            .into_iter()
481            .map(|s| match s {
482                Statement::Let { name, ty, value } => {
483                    let value = self.mangle_expr(value, &shadow);
484                    let ty = ty.map(|t| self.mangle_type_expr(t));
485                    shadow.insert(name.clone());
486                    Statement::Let { name, ty, value }
487                }
488                Statement::Expr(e) => Statement::Expr(self.mangle_expr(e, &shadow)),
489            })
490            .collect();
491        let result = Box::new(self.mangle_expr(*b.result, &shadow));
492        Block { statements, result }
493    }
494
495    fn mangle_expr(&self, e: Expr, shadow: &HashSet<String>) -> Expr {
496        match e {
497            Expr::Lit(_) => e,
498            Expr::Var(name) => {
499                if !shadow.contains(&name) && self.local_names.contains(&name) {
500                    Expr::Var(self.qualify(&name))
501                } else {
502                    Expr::Var(name)
503                }
504            }
505            Expr::Block(b) => Expr::Block(self.mangle_block(b, shadow)),
506            Expr::Call { callee, args } => {
507                let mangled_args: Vec<Expr> = args
508                    .into_iter()
509                    .map(|a| self.mangle_expr(a, shadow))
510                    .collect();
511                if let Expr::Field { value, field } = (*callee).clone() {
512                    if let Expr::Var(alias) = *value {
513                        if !shadow.contains(&alias) {
514                            if let Some(child) = self.path_imports.get(&alias) {
515                                return Expr::Call {
516                                    callee: Box::new(Expr::Var(format!("{child}.{field}"))),
517                                    args: mangled_args,
518                                };
519                            }
520                        }
521                    }
522                }
523                Expr::Call {
524                    callee: Box::new(self.mangle_expr(*callee, shadow)),
525                    args: mangled_args,
526                }
527            }
528            Expr::Pipe { left, right } => Expr::Pipe {
529                left: Box::new(self.mangle_expr(*left, shadow)),
530                right: Box::new(self.mangle_expr(*right, shadow)),
531            },
532            Expr::Try(inner) => Expr::Try(Box::new(self.mangle_expr(*inner, shadow))),
533            Expr::Field { value, field } => {
534                if let Expr::Var(alias) = (*value).clone() {
535                    if !shadow.contains(&alias) {
536                        if let Some(child) = self.path_imports.get(&alias) {
537                            return Expr::Var(format!("{child}.{field}"));
538                        }
539                    }
540                }
541                Expr::Field {
542                    value: Box::new(self.mangle_expr(*value, shadow)),
543                    field,
544                }
545            }
546            Expr::BinOp { op, lhs, rhs } => Expr::BinOp {
547                op,
548                lhs: Box::new(self.mangle_expr(*lhs, shadow)),
549                rhs: Box::new(self.mangle_expr(*rhs, shadow)),
550            },
551            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
552                op,
553                expr: Box::new(self.mangle_expr(*expr, shadow)),
554            },
555            Expr::If {
556                cond,
557                then_block,
558                else_block,
559            } => Expr::If {
560                cond: Box::new(self.mangle_expr(*cond, shadow)),
561                then_block: self.mangle_block(then_block, shadow),
562                else_block: self.mangle_block(else_block, shadow),
563            },
564            Expr::Match { scrutinee, arms } => Expr::Match {
565                scrutinee: Box::new(self.mangle_expr(*scrutinee, shadow)),
566                arms: arms
567                    .into_iter()
568                    .map(|a| {
569                        let mut arm_shadow = shadow.clone();
570                        collect_pattern_binders(&a.pattern, &mut arm_shadow);
571                        Arm {
572                            pattern: self.mangle_pattern(a.pattern),
573                            body: self.mangle_expr(a.body, &arm_shadow),
574                        }
575                    })
576                    .collect(),
577            },
578            Expr::RecordLit(fields) => Expr::RecordLit(
579                fields
580                    .into_iter()
581                    .map(|f| RecordLitField {
582                        name: f.name,
583                        value: self.mangle_expr(f.value, shadow),
584                    })
585                    .collect(),
586            ),
587            Expr::TupleLit(items) => Expr::TupleLit(
588                items
589                    .into_iter()
590                    .map(|i| self.mangle_expr(i, shadow))
591                    .collect(),
592            ),
593            Expr::ListLit(items) => Expr::ListLit(
594                items
595                    .into_iter()
596                    .map(|i| self.mangle_expr(i, shadow))
597                    .collect(),
598            ),
599            Expr::Constructor { name, args } => Expr::Constructor {
600                name,
601                args: args
602                    .into_iter()
603                    .map(|a| self.mangle_expr(a, shadow))
604                    .collect(),
605            },
606            Expr::Ascription { value, ty } => Expr::Ascription {
607                value: Box::new(self.mangle_expr(*value, shadow)),
608                ty: self.mangle_type_expr(ty),
609            },
610            Expr::Lambda(lambda) => {
611                let mut lam_shadow = shadow.clone();
612                for p in &lambda.params {
613                    lam_shadow.insert(p.name.clone());
614                }
615                Expr::Lambda(Box::new(Lambda {
616                    params: lambda
617                        .params
618                        .into_iter()
619                        .map(|p| Param {
620                            name: p.name,
621                            ty: self.mangle_type_expr(p.ty),
622                        })
623                        .collect(),
624                    return_type: self.mangle_type_expr(lambda.return_type),
625                    effects: lambda.effects,
626                    effect_row_var: lambda.effect_row_var,
627                    body: self.mangle_block(lambda.body, &lam_shadow),
628                }))
629            }
630        }
631    }
632
633    fn mangle_pattern(&self, p: Pattern) -> Pattern {
634        match p {
635            Pattern::Constructor { name, args } => Pattern::Constructor {
636                name,
637                args: args.into_iter().map(|a| self.mangle_pattern(a)).collect(),
638            },
639            Pattern::Record { fields, rest } => Pattern::Record {
640                fields: fields
641                    .into_iter()
642                    .map(|f| RecordPatField {
643                        name: f.name,
644                        pattern: f.pattern.map(|p| self.mangle_pattern(p)),
645                    })
646                    .collect(),
647                rest,
648            },
649            Pattern::Tuple(items) => {
650                Pattern::Tuple(items.into_iter().map(|p| self.mangle_pattern(p)).collect())
651            }
652            Pattern::Lit(_) | Pattern::Var(_) | Pattern::Wild => p,
653        }
654    }
655}
656
657fn collect_pattern_binders(p: &Pattern, out: &mut HashSet<String>) {
658    match p {
659        Pattern::Var(name) => {
660            out.insert(name.clone());
661        }
662        Pattern::Constructor { args, .. } => {
663            for a in args {
664                collect_pattern_binders(a, out);
665            }
666        }
667        Pattern::Record { fields, .. } => {
668            for f in fields {
669                match &f.pattern {
670                    Some(p) => collect_pattern_binders(p, out),
671                    // `{ name }` shorthand binds `name`.
672                    None => {
673                        out.insert(f.name.clone());
674                    }
675                }
676            }
677        }
678        Pattern::Tuple(items) => {
679            for p in items {
680                collect_pattern_binders(p, out);
681            }
682        }
683        Pattern::Lit(_) | Pattern::Wild => {}
684    }
685}