Skip to main content

lex_syntax/
loader.rs

1//! Multi-file loader: resolves `import "./..."`, `import "../..."`, and
2//! `import "/abs/..."` statements relative to the importer, recursively
3//! parses, and produces a single [`Program`] with all stages merged.
4//!
5//! Names that are local to an imported file are mangled with a
6//! **per-file-path** prefix, so the same module imported via multiple
7//! aliases (or from multiple parents in a diamond shape) collapses to
8//! one set of mangled names — same SigId, same nominal identity.
9//! Stdlib imports (`import "std.foo" as bar`) pass through unchanged.
10//!
11//! ## Mangling
12//!
13//! Each loaded file gets a prefix derived from its canonical filesystem
14//! path. The entry file's prefix is empty (so `lex run main.lex
15//! process` works unchanged). Imported files use `<stem>_<hash>`
16//! where `hash` is the first 8 hex chars of SHA-256 of the canonical
17//! path string. The hash disambiguates same-stem files in different
18//! directories without forcing a project manifest.
19//!
20//! Within a file at prefix `P`:
21//!
22//! - `fn foo` declared in this file becomes `<P>.foo` (just `foo` at root).
23//! - `type T` declared in this file becomes `<P>.T`.
24//! - References to a locally-declared name get mangled, **unless** the
25//!   name is shadowed by a binder (let, fn param, lambda param, or
26//!   pattern binder) in scope.
27//! - `m.foo` where `m` is a path-import alias is rewritten to the
28//!   imported file's prefix-qualified name. Two parents importing the
29//!   same file see the same prefix → calls and types unify.
30//! - `m.foo` where `m` is a stdlib alias is unchanged.
31//!
32//! Variant constructors are **not** mangled — they live in a global
33//! namespace, and a collision between two imported types' constructors
34//! surfaces later as a type-check error. Same for record field names.
35//!
36//! ## Diamond imports
37//!
38//! `main.lex` imports `./left` and `./right`, both of which import
39//! `./shared`. `shared.lex` is parsed once per resolution, but its
40//! mangled items are merged into the output exactly once (subsequent
41//! loads from the same canonical path return an empty Program). This
42//! is what makes `s.build_report(...)` and `v.read_score(...)` agree
43//! on `Report`'s nominal identity.
44//!
45//! ## Limitations (tracked separately)
46//!
47//! The mangling key is the canonical filesystem path. Moving a file
48//! changes its SigId; renaming changes the file-stem half of the
49//! prefix. The eventual fix — content-addressed identity decoupled
50//! from filesystem layout — lives with store-native imports
51//! (`import "stage:..."`); see the corresponding follow-up tracker.
52
53use std::collections::{HashMap, HashSet};
54use std::path::{Path, PathBuf};
55use thiserror::Error;
56
57use sha2::{Digest, Sha256};
58
59use crate::syntax::*;
60use crate::workspace::{resolve_package_import, PackageError};
61use crate::{parse_source, SyntaxError};
62
63#[derive(Debug, Error)]
64pub enum LoadError {
65    #[error("read {path}: {source}")]
66    Io {
67        path: String,
68        #[source]
69        source: std::io::Error,
70    },
71    #[error("parse {path}: {source}")]
72    Syntax {
73        path: String,
74        #[source]
75        source: SyntaxError,
76    },
77    #[error("import cycle: {chain}")]
78    Cycle { chain: String },
79    #[error("import \"{reference}\" from {importer}: file not found")]
80    NotFound { importer: String, reference: String },
81    #[error("local imports (`./`, `../`, `/`) require a base path; cannot resolve from a string source")]
82    LocalImportInStringSource,
83    #[error("package import error: {0}")]
84    Package(#[from] PackageError),
85}
86
87/// Load a multi-file Lex program, expanding local imports relative to
88/// the entry path. Stdlib imports (`std.*`) pass through unchanged.
89pub fn load_program(entry: &Path) -> Result<Program, LoadError> {
90    let entry_canonical = entry.canonicalize().map_err(|source| LoadError::Io {
91        path: entry.display().to_string(),
92        source,
93    })?;
94    let mut state = LoaderState {
95        in_progress: Vec::new(),
96        loaded: HashSet::new(),
97        prefixes: HashMap::new(),
98    };
99    // Entry file's prefix is empty so `lex run main.lex process` works
100    // without users typing the hashed prefix.
101    state.prefixes.insert(entry_canonical.clone(), String::new());
102    state.load(&entry_canonical)
103}
104
105/// Load a Lex program from a string source. Local-path imports are
106/// rejected up-front since there's no base path to resolve from.
107pub fn load_program_from_str(src: &str) -> Result<Program, LoadError> {
108    let prog = parse_source(src).map_err(|source| LoadError::Syntax {
109        path: "<input>".into(),
110        source,
111    })?;
112    for item in &prog.items {
113        if let Item::Import(imp) = item {
114            if is_path_import(&imp.reference)
115                || split_package_import(&imp.reference).is_some()
116            {
117                return Err(LoadError::LocalImportInStringSource);
118            }
119        }
120    }
121    Ok(prog)
122}
123
124struct LoaderState {
125    in_progress: Vec<PathBuf>,
126    /// Canonical paths that have already been merged into the output.
127    /// A second `import "./shared"` from a different parent skips
128    /// re-merging — the file's mangled items are already there.
129    loaded: HashSet<PathBuf>,
130    /// Stable mangling prefix per canonical path. Computed lazily;
131    /// the entry file is seeded with an empty prefix.
132    prefixes: HashMap<PathBuf, String>,
133}
134
135impl LoaderState {
136    fn prefix_for(&mut self, canonical: &Path) -> String {
137        if let Some(p) = self.prefixes.get(canonical) {
138            return p.clone();
139        }
140        let stem = canonical
141            .file_stem()
142            .and_then(|s| s.to_str())
143            .unwrap_or("module");
144        let mut hasher = Sha256::new();
145        hasher.update(canonical.to_string_lossy().as_bytes());
146        let digest = hasher.finalize();
147        let prefix = format!("{stem}_{:08x}", u32::from_be_bytes([
148            digest[0], digest[1], digest[2], digest[3],
149        ]));
150        self.prefixes.insert(canonical.to_path_buf(), prefix.clone());
151        prefix
152    }
153
154    fn load(&mut self, canonical: &Path) -> Result<Program, LoadError> {
155        if self.in_progress.contains(&canonical.to_path_buf()) {
156            let mut chain: Vec<String> = self
157                .in_progress
158                .iter()
159                .map(|p| p.display().to_string())
160                .collect();
161            chain.push(canonical.display().to_string());
162            return Err(LoadError::Cycle {
163                chain: chain.join(" -> "),
164            });
165        }
166        // Diamond dedupe: if this file was already merged on another
167        // path through the import graph, its items are already in the
168        // output Vec — return an empty Program so the caller's
169        // `merged_children.extend(...)` is a no-op for items, but the
170        // call still resolves so the parent's `path_imports` map gets
171        // populated below.
172        if self.loaded.contains(canonical) {
173            return Ok(Program {
174                items: Vec::new(),
175                leading_comments: Vec::new(),
176                trailing_comments: Vec::new(),
177            });
178        }
179        self.in_progress.push(canonical.to_path_buf());
180
181        let src = std::fs::read_to_string(canonical).map_err(|source| LoadError::Io {
182            path: canonical.display().to_string(),
183            source,
184        })?;
185        let prog = parse_source(&src).map_err(|source| LoadError::Syntax {
186            path: canonical.display().to_string(),
187            source,
188        })?;
189
190        let local_names: HashSet<String> = prog
191            .items
192            .iter()
193            .filter_map(|item| match item {
194                Item::FnDecl(fd) => Some(fd.name.clone()),
195                Item::TypeDecl(td) => Some(td.name.clone()),
196                _ => None,
197            })
198            .collect();
199
200        // alias used by this file → mangling prefix of the imported file
201        let mut path_imports: HashMap<String, String> = HashMap::new();
202        let mut merged_children: Vec<Item> = Vec::new();
203        let mut std_imports: Vec<Item> = Vec::new();
204        let mut my_items: Vec<Item> = Vec::new();
205
206        for item in prog.items {
207            match item {
208                Item::Import(ref imp) if is_path_import(&imp.reference) => {
209                    let resolved = resolve_import(canonical, &imp.reference)?;
210                    let child_prefix = self.prefix_for(&resolved);
211                    path_imports.insert(imp.alias.clone(), child_prefix);
212                    let child_prog = self.load(&resolved)?;
213                    merged_children.extend(child_prog.items);
214                }
215                Item::Import(ref imp)
216                    if split_package_import(&imp.reference).is_some() =>
217                {
218                    let (pkg, module) =
219                        split_package_import(&imp.reference).unwrap();
220                    let resolved =
221                        resolve_package_import(canonical, pkg, module)
222                            .map_err(LoadError::Package)?
223                            .canonicalize()
224                            .map_err(|source| LoadError::Io {
225                                path: imp.reference.clone(),
226                                source,
227                            })?;
228                    let child_prefix = self.prefix_for(&resolved);
229                    path_imports.insert(imp.alias.clone(), child_prefix);
230                    let child_prog = self.load(&resolved)?;
231                    merged_children.extend(child_prog.items);
232                }
233                Item::Import(_) => std_imports.push(item),
234                _ => my_items.push(item),
235            }
236        }
237
238        let my_prefix = self.prefix_for(canonical);
239        let mangler = Mangler {
240            prefix: my_prefix,
241            local_names: &local_names,
242            path_imports: &path_imports,
243        };
244        let mangled: Vec<Item> = my_items
245            .into_iter()
246            .map(|i| mangler.mangle_item(i))
247            .collect();
248
249        self.in_progress.pop();
250        self.loaded.insert(canonical.to_path_buf());
251
252        // Output order: std imports first (deduped against children's),
253        // then merged children's items, then this file's items.
254        let mut out: Vec<Item> = Vec::new();
255        for s in std_imports {
256            if !merged_children.iter().any(|m| m == &s) {
257                out.push(s);
258            }
259        }
260        out.extend(merged_children);
261        out.extend(mangled);
262        // Top-of-file comments live on each source file independently;
263        // after import merging the merged Program represents many
264        // files at once, and there is no obvious single "top of file"
265        // to attribute them to. Drop here — they're preserved by
266        // `lex fmt` (which operates per-file) but not by the loader's
267        // import-merging path. Same rationale for trailing_comments.
268        Ok(Program {
269            items: out,
270            leading_comments: Vec::new(),
271            trailing_comments: Vec::new(),
272        })
273    }
274}
275
276fn is_path_import(reference: &str) -> bool {
277    reference.starts_with("./") || reference.starts_with("../") || reference.starts_with('/')
278}
279
280/// Returns `Some((pkg_name, module_path))` for package imports like
281/// `"lex-schema/validate"`. Stdlib (`std.*`) and relative paths are
282/// excluded — they are handled elsewhere.
283fn split_package_import(reference: &str) -> Option<(&str, &str)> {
284    if reference.starts_with("./")
285        || reference.starts_with("../")
286        || reference.starts_with('/')
287        || reference.starts_with("std.")
288    {
289        return None;
290    }
291    reference.split_once('/')
292}
293
294fn resolve_import(importer: &Path, reference: &str) -> Result<PathBuf, LoadError> {
295    let importer_dir = importer.parent().unwrap_or_else(|| Path::new("."));
296    let mut resolved: PathBuf = if reference.starts_with('/') {
297        PathBuf::from(reference)
298    } else {
299        importer_dir.join(reference)
300    };
301    if resolved.extension().is_none() {
302        resolved.set_extension("lex");
303    }
304    if !resolved.exists() {
305        return Err(LoadError::NotFound {
306            importer: importer.display().to_string(),
307            reference: reference.to_string(),
308        });
309    }
310    // Canonicalize so that `../../shared/foo` and `../other/../shared/foo`
311    // resolve to the same HashMap key, preventing duplicate loads and
312    // mismatched mangling prefixes in diamond-import graphs (#358).
313    resolved.canonicalize().map_err(|source| LoadError::Io {
314        path: resolved.display().to_string(),
315        source,
316    })
317}
318
319struct Mangler<'a> {
320    /// Mangling prefix for items declared in this file. Empty for the
321    /// entry file, `<stem>_<hash8>` for imported files.
322    prefix: String,
323    local_names: &'a HashSet<String>,
324    /// Map from local alias to the imported file's mangling prefix.
325    /// `m.foo` rewrites to `<imported_prefix>.foo` regardless of which
326    /// alias `m` was, so two parents importing the same module agree.
327    path_imports: &'a HashMap<String, String>,
328}
329
330impl<'a> Mangler<'a> {
331    fn qualify(&self, name: &str) -> String {
332        if self.prefix.is_empty() {
333            name.to_string()
334        } else {
335            format!("{}.{}", self.prefix, name)
336        }
337    }
338
339    fn mangle_item(&self, item: Item) -> Item {
340        match item {
341            Item::Import(imp) => Item::Import(imp),
342            Item::TypeDecl(td) => Item::TypeDecl(self.mangle_type_decl(td)),
343            Item::FnDecl(fd) => Item::FnDecl(self.mangle_fn_decl(fd)),
344        }
345    }
346
347    fn mangle_type_decl(&self, td: TypeDecl) -> TypeDecl {
348        TypeDecl {
349            name: self.qualify(&td.name),
350            params: td.params,
351            definition: self.mangle_type_expr(td.definition),
352            leading_comments: td.leading_comments,
353        }
354    }
355
356    fn mangle_fn_decl(&self, fd: FnDecl) -> FnDecl {
357        let mut shadow = HashSet::new();
358        for p in &fd.params {
359            shadow.insert(p.name.clone());
360        }
361        // Example args/expected sit outside the body's parameter scope:
362        // they're top-level expressions evaluated against the function
363        // signature, so the only names they can see are the file's
364        // top-level fns/types and any path-import aliases — i.e., an
365        // empty shadow set (#391).
366        let empty_shadow = HashSet::new();
367        let examples = fd
368            .examples
369            .into_iter()
370            .map(|ex| Example {
371                args: ex
372                    .args
373                    .into_iter()
374                    .map(|a| self.mangle_expr(a, &empty_shadow))
375                    .collect(),
376                expected: self.mangle_expr(ex.expected, &empty_shadow),
377            })
378            .collect();
379        FnDecl {
380            name: self.qualify(&fd.name),
381            type_params: fd.type_params,
382            params: fd
383                .params
384                .into_iter()
385                .map(|p| Param {
386                    name: p.name,
387                    ty: self.mangle_type_expr(p.ty),
388                })
389                .collect(),
390            effects: fd.effects,
391            return_type: self.mangle_type_expr(fd.return_type),
392            body: self.mangle_block(fd.body, &shadow),
393            examples,
394            leading_comments: fd.leading_comments,
395        }
396    }
397
398    fn mangle_type_expr(&self, te: TypeExpr) -> TypeExpr {
399        match te {
400            TypeExpr::Named { name, args } => TypeExpr::Named {
401                name: self.rewrite_type_name(&name),
402                args: args.into_iter().map(|a| self.mangle_type_expr(a)).collect(),
403            },
404            TypeExpr::Record(fields) => TypeExpr::Record(
405                fields
406                    .into_iter()
407                    .map(|f| TypeField {
408                        name: f.name,
409                        ty: self.mangle_type_expr(f.ty),
410                    })
411                    .collect(),
412            ),
413            TypeExpr::RecordWithSpreads { spreads, fields } => TypeExpr::RecordWithSpreads {
414                spreads: spreads.into_iter().map(|s| self.rewrite_type_name(&s)).collect(),
415                fields: fields
416                    .into_iter()
417                    .map(|f| TypeField {
418                        name: f.name,
419                        ty: self.mangle_type_expr(f.ty),
420                    })
421                    .collect(),
422            },
423            TypeExpr::Tuple(items) => {
424                TypeExpr::Tuple(items.into_iter().map(|t| self.mangle_type_expr(t)).collect())
425            }
426            TypeExpr::Function {
427                params,
428                effects,
429                ret,
430            } => TypeExpr::Function {
431                params: params
432                    .into_iter()
433                    .map(|t| self.mangle_type_expr(t))
434                    .collect(),
435                effects,
436                ret: Box::new(self.mangle_type_expr(*ret)),
437            },
438            TypeExpr::Union(variants) => TypeExpr::Union(
439                variants
440                    .into_iter()
441                    .map(|v| UnionVariant {
442                        name: v.name,
443                        payload: v.payload.map(|t| self.mangle_type_expr(t)),
444                    })
445                    .collect(),
446            ),
447            TypeExpr::Refined { base, binding, predicate } => TypeExpr::Refined {
448                base: Box::new(self.mangle_type_expr(*base)),
449                binding,
450                // The predicate is an expression; its names are
451                // resolved during type-check, not loader-time, so
452                // it passes through unchanged here. Slice 2 wires
453                // up discharge through the spec-checker.
454                predicate,
455            },
456        }
457    }
458
459    /// Rewrite a possibly-qualified type name to its mangled form.
460    fn rewrite_type_name(&self, name: &str) -> String {
461        if let Some((alias, rest)) = name.split_once('.') {
462            if let Some(child) = self.path_imports.get(alias) {
463                return format!("{child}.{rest}");
464            }
465            return name.to_string();
466        }
467        if self.local_names.contains(name) {
468            return self.qualify(name);
469        }
470        name.to_string()
471    }
472
473    fn mangle_block(&self, b: Block, shadow: &HashSet<String>) -> Block {
474        let mut shadow = shadow.clone();
475        let statements = b
476            .statements
477            .into_iter()
478            .map(|s| match s {
479                Statement::Let { name, ty, value } => {
480                    let value = self.mangle_expr(value, &shadow);
481                    let ty = ty.map(|t| self.mangle_type_expr(t));
482                    shadow.insert(name.clone());
483                    Statement::Let { name, ty, value }
484                }
485                Statement::Expr(e) => Statement::Expr(self.mangle_expr(e, &shadow)),
486            })
487            .collect();
488        let result = Box::new(self.mangle_expr(*b.result, &shadow));
489        Block { statements, result }
490    }
491
492    fn mangle_expr(&self, e: Expr, shadow: &HashSet<String>) -> Expr {
493        match e {
494            Expr::Lit(_) => e,
495            Expr::Var(name) => {
496                if !shadow.contains(&name) && self.local_names.contains(&name) {
497                    Expr::Var(self.qualify(&name))
498                } else {
499                    Expr::Var(name)
500                }
501            }
502            Expr::Block(b) => Expr::Block(self.mangle_block(b, shadow)),
503            Expr::Call { callee, args } => {
504                let mangled_args: Vec<Expr> = args
505                    .into_iter()
506                    .map(|a| self.mangle_expr(a, shadow))
507                    .collect();
508                if let Expr::Field { value, field } = (*callee).clone() {
509                    if let Expr::Var(alias) = *value {
510                        if !shadow.contains(&alias) {
511                            if let Some(child) = self.path_imports.get(&alias) {
512                                return Expr::Call {
513                                    callee: Box::new(Expr::Var(format!("{child}.{field}"))),
514                                    args: mangled_args,
515                                };
516                            }
517                        }
518                    }
519                }
520                Expr::Call {
521                    callee: Box::new(self.mangle_expr(*callee, shadow)),
522                    args: mangled_args,
523                }
524            }
525            Expr::Pipe { left, right } => Expr::Pipe {
526                left: Box::new(self.mangle_expr(*left, shadow)),
527                right: Box::new(self.mangle_expr(*right, shadow)),
528            },
529            Expr::Try(inner) => Expr::Try(Box::new(self.mangle_expr(*inner, shadow))),
530            Expr::Field { value, field } => {
531                if let Expr::Var(alias) = (*value).clone() {
532                    if !shadow.contains(&alias) {
533                        if let Some(child) = self.path_imports.get(&alias) {
534                            return Expr::Var(format!("{child}.{field}"));
535                        }
536                    }
537                }
538                Expr::Field {
539                    value: Box::new(self.mangle_expr(*value, shadow)),
540                    field,
541                }
542            }
543            Expr::BinOp { op, lhs, rhs } => Expr::BinOp {
544                op,
545                lhs: Box::new(self.mangle_expr(*lhs, shadow)),
546                rhs: Box::new(self.mangle_expr(*rhs, shadow)),
547            },
548            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
549                op,
550                expr: Box::new(self.mangle_expr(*expr, shadow)),
551            },
552            Expr::If {
553                cond,
554                then_block,
555                else_block,
556            } => Expr::If {
557                cond: Box::new(self.mangle_expr(*cond, shadow)),
558                then_block: self.mangle_block(then_block, shadow),
559                else_block: self.mangle_block(else_block, shadow),
560            },
561            Expr::Match { scrutinee, arms } => Expr::Match {
562                scrutinee: Box::new(self.mangle_expr(*scrutinee, shadow)),
563                arms: arms
564                    .into_iter()
565                    .map(|a| {
566                        let mut arm_shadow = shadow.clone();
567                        collect_pattern_binders(&a.pattern, &mut arm_shadow);
568                        Arm {
569                            pattern: self.mangle_pattern(a.pattern),
570                            body: self.mangle_expr(a.body, &arm_shadow),
571                        }
572                    })
573                    .collect(),
574            },
575            Expr::RecordLit(fields) => Expr::RecordLit(
576                fields
577                    .into_iter()
578                    .map(|f| RecordLitField {
579                        name: f.name,
580                        value: self.mangle_expr(f.value, shadow),
581                    })
582                    .collect(),
583            ),
584            Expr::TupleLit(items) => Expr::TupleLit(
585                items
586                    .into_iter()
587                    .map(|i| self.mangle_expr(i, shadow))
588                    .collect(),
589            ),
590            Expr::ListLit(items) => Expr::ListLit(
591                items
592                    .into_iter()
593                    .map(|i| self.mangle_expr(i, shadow))
594                    .collect(),
595            ),
596            Expr::Constructor { name, args } => Expr::Constructor {
597                name,
598                args: args
599                    .into_iter()
600                    .map(|a| self.mangle_expr(a, shadow))
601                    .collect(),
602            },
603            Expr::Ascription { value, ty } => Expr::Ascription {
604                value: Box::new(self.mangle_expr(*value, shadow)),
605                ty: self.mangle_type_expr(ty),
606            },
607            Expr::Lambda(lambda) => {
608                let mut lam_shadow = shadow.clone();
609                for p in &lambda.params {
610                    lam_shadow.insert(p.name.clone());
611                }
612                Expr::Lambda(Box::new(Lambda {
613                    params: lambda
614                        .params
615                        .into_iter()
616                        .map(|p| Param {
617                            name: p.name,
618                            ty: self.mangle_type_expr(p.ty),
619                        })
620                        .collect(),
621                    return_type: self.mangle_type_expr(lambda.return_type),
622                    effects: lambda.effects,
623                    body: self.mangle_block(lambda.body, &lam_shadow),
624                }))
625            }
626        }
627    }
628
629    fn mangle_pattern(&self, p: Pattern) -> Pattern {
630        match p {
631            Pattern::Constructor { name, args } => Pattern::Constructor {
632                name,
633                args: args.into_iter().map(|a| self.mangle_pattern(a)).collect(),
634            },
635            Pattern::Record { fields, rest } => Pattern::Record {
636                fields: fields
637                    .into_iter()
638                    .map(|f| RecordPatField {
639                        name: f.name,
640                        pattern: f.pattern.map(|p| self.mangle_pattern(p)),
641                    })
642                    .collect(),
643                rest,
644            },
645            Pattern::Tuple(items) => {
646                Pattern::Tuple(items.into_iter().map(|p| self.mangle_pattern(p)).collect())
647            }
648            Pattern::Lit(_) | Pattern::Var(_) | Pattern::Wild => p,
649        }
650    }
651}
652
653fn collect_pattern_binders(p: &Pattern, out: &mut HashSet<String>) {
654    match p {
655        Pattern::Var(name) => {
656            out.insert(name.clone());
657        }
658        Pattern::Constructor { args, .. } => {
659            for a in args {
660                collect_pattern_binders(a, out);
661            }
662        }
663        Pattern::Record { fields, .. } => {
664            for f in fields {
665                match &f.pattern {
666                    Some(p) => collect_pattern_binders(p, out),
667                    // `{ name }` shorthand binds `name`.
668                    None => {
669                        out.insert(f.name.clone());
670                    }
671                }
672            }
673        }
674        Pattern::Tuple(items) => {
675            for p in items {
676                collect_pattern_binders(p, out);
677            }
678        }
679        Pattern::Lit(_) | Pattern::Wild => {}
680    }
681}