Skip to main content

lex_syntax/
loader.rs

1//! Multi-file loader: resolves `import "./..."`, `import "../..."`, and
2//! `import "/abs/..."` statements relative to the importer, recursively
3//! parses, and produces a single [`Program`] with all stages merged.
4//!
5//! Names that are local to an imported file are mangled with a
6//! **per-file-path** prefix, so the same module imported via multiple
7//! aliases (or from multiple parents in a diamond shape) collapses to
8//! one set of mangled names — same SigId, same nominal identity.
9//! Stdlib imports (`import "std.foo" as bar`) pass through unchanged.
10//!
11//! ## Mangling
12//!
13//! Each loaded file gets a prefix derived from its canonical filesystem
14//! path. The entry file's prefix is empty (so `lex run main.lex
15//! process` works unchanged). Imported files use `<stem>_<hash>`
16//! where `hash` is the first 8 hex chars of SHA-256 of the canonical
17//! path string. The hash disambiguates same-stem files in different
18//! directories without forcing a project manifest.
19//!
20//! Within a file at prefix `P`:
21//!
22//! - `fn foo` declared in this file becomes `<P>.foo` (just `foo` at root).
23//! - `type T` declared in this file becomes `<P>.T`.
24//! - References to a locally-declared name get mangled, **unless** the
25//!   name is shadowed by a binder (let, fn param, lambda param, or
26//!   pattern binder) in scope.
27//! - `m.foo` where `m` is a path-import alias is rewritten to the
28//!   imported file's prefix-qualified name. Two parents importing the
29//!   same file see the same prefix → calls and types unify.
30//! - `m.foo` where `m` is a stdlib alias is unchanged.
31//!
32//! Variant constructors are **not** mangled — they live in a global
33//! namespace, and a collision between two imported types' constructors
34//! surfaces later as a type-check error. Same for record field names.
35//!
36//! ## Diamond imports
37//!
38//! `main.lex` imports `./left` and `./right`, both of which import
39//! `./shared`. `shared.lex` is parsed once per resolution, but its
40//! mangled items are merged into the output exactly once (subsequent
41//! loads from the same canonical path return an empty Program). This
42//! is what makes `s.build_report(...)` and `v.read_score(...)` agree
43//! on `Report`'s nominal identity.
44//!
45//! ## Limitations (tracked separately)
46//!
47//! The mangling key is the canonical filesystem path. Moving a file
48//! changes its SigId; renaming changes the file-stem half of the
49//! prefix. The eventual fix — content-addressed identity decoupled
50//! from filesystem layout — lives with store-native imports
51//! (`import "stage:..."`); see the corresponding follow-up tracker.
52
53use std::collections::{HashMap, HashSet};
54use std::path::{Path, PathBuf};
55use thiserror::Error;
56
57use sha2::{Digest, Sha256};
58
59use crate::syntax::*;
60use crate::workspace::{resolve_package_import, PackageError};
61use crate::{parse_source, SyntaxError};
62
63#[derive(Debug, Error)]
64pub enum LoadError {
65    #[error("read {path}: {source}")]
66    Io {
67        path: String,
68        #[source]
69        source: std::io::Error,
70    },
71    #[error("parse {path}: {source}")]
72    Syntax {
73        path: String,
74        #[source]
75        source: SyntaxError,
76    },
77    #[error("import cycle: {chain}")]
78    Cycle { chain: String },
79    #[error("import \"{reference}\" from {importer}: file not found")]
80    NotFound { importer: String, reference: String },
81    #[error("local imports (`./`, `../`, `/`) require a base path; cannot resolve from a string source")]
82    LocalImportInStringSource,
83    #[error("package import error: {0}")]
84    Package(#[from] PackageError),
85}
86
87/// Load a multi-file Lex program, expanding local imports relative to
88/// the entry path. Stdlib imports (`std.*`) pass through unchanged.
89pub fn load_program(entry: &Path) -> Result<Program, LoadError> {
90    let entry_canonical = entry.canonicalize().map_err(|source| LoadError::Io {
91        path: entry.display().to_string(),
92        source,
93    })?;
94    let mut state = LoaderState {
95        in_progress: Vec::new(),
96        loaded: HashSet::new(),
97        prefixes: HashMap::new(),
98    };
99    // Entry file's prefix is empty so `lex run main.lex process` works
100    // without users typing the hashed prefix.
101    state.prefixes.insert(entry_canonical.clone(), String::new());
102    state.load(&entry_canonical)
103}
104
105/// Load a Lex program from a string source. Local-path imports are
106/// rejected up-front since there's no base path to resolve from.
107pub fn load_program_from_str(src: &str) -> Result<Program, LoadError> {
108    let prog = parse_source(src).map_err(|source| LoadError::Syntax {
109        path: "<input>".into(),
110        source,
111    })?;
112    for item in &prog.items {
113        if let Item::Import(imp) = item {
114            if is_path_import(&imp.reference)
115                || split_package_import(&imp.reference).is_some()
116            {
117                return Err(LoadError::LocalImportInStringSource);
118            }
119        }
120    }
121    Ok(prog)
122}
123
124struct LoaderState {
125    in_progress: Vec<PathBuf>,
126    /// Canonical paths that have already been merged into the output.
127    /// A second `import "./shared"` from a different parent skips
128    /// re-merging — the file's mangled items are already there.
129    loaded: HashSet<PathBuf>,
130    /// Stable mangling prefix per canonical path. Computed lazily;
131    /// the entry file is seeded with an empty prefix.
132    prefixes: HashMap<PathBuf, String>,
133}
134
135impl LoaderState {
136    fn prefix_for(&mut self, canonical: &Path) -> String {
137        if let Some(p) = self.prefixes.get(canonical) {
138            return p.clone();
139        }
140        let stem = canonical
141            .file_stem()
142            .and_then(|s| s.to_str())
143            .unwrap_or("module");
144        let mut hasher = Sha256::new();
145        hasher.update(canonical.to_string_lossy().as_bytes());
146        let digest = hasher.finalize();
147        let prefix = format!("{stem}_{:08x}", u32::from_be_bytes([
148            digest[0], digest[1], digest[2], digest[3],
149        ]));
150        self.prefixes.insert(canonical.to_path_buf(), prefix.clone());
151        prefix
152    }
153
154    fn load(&mut self, canonical: &Path) -> Result<Program, LoadError> {
155        if self.in_progress.contains(&canonical.to_path_buf()) {
156            let mut chain: Vec<String> = self
157                .in_progress
158                .iter()
159                .map(|p| p.display().to_string())
160                .collect();
161            chain.push(canonical.display().to_string());
162            return Err(LoadError::Cycle {
163                chain: chain.join(" -> "),
164            });
165        }
166        // Diamond dedupe: if this file was already merged on another
167        // path through the import graph, its items are already in the
168        // output Vec — return an empty Program so the caller's
169        // `merged_children.extend(...)` is a no-op for items, but the
170        // call still resolves so the parent's `path_imports` map gets
171        // populated below.
172        if self.loaded.contains(canonical) {
173            return Ok(Program { items: Vec::new() });
174        }
175        self.in_progress.push(canonical.to_path_buf());
176
177        let src = std::fs::read_to_string(canonical).map_err(|source| LoadError::Io {
178            path: canonical.display().to_string(),
179            source,
180        })?;
181        let prog = parse_source(&src).map_err(|source| LoadError::Syntax {
182            path: canonical.display().to_string(),
183            source,
184        })?;
185
186        let local_names: HashSet<String> = prog
187            .items
188            .iter()
189            .filter_map(|item| match item {
190                Item::FnDecl(fd) => Some(fd.name.clone()),
191                Item::TypeDecl(td) => Some(td.name.clone()),
192                _ => None,
193            })
194            .collect();
195
196        // alias used by this file → mangling prefix of the imported file
197        let mut path_imports: HashMap<String, String> = HashMap::new();
198        let mut merged_children: Vec<Item> = Vec::new();
199        let mut std_imports: Vec<Item> = Vec::new();
200        let mut my_items: Vec<Item> = Vec::new();
201
202        for item in prog.items {
203            match item {
204                Item::Import(ref imp) if is_path_import(&imp.reference) => {
205                    let resolved = resolve_import(canonical, &imp.reference)?;
206                    let child_prefix = self.prefix_for(&resolved);
207                    path_imports.insert(imp.alias.clone(), child_prefix);
208                    let child_prog = self.load(&resolved)?;
209                    merged_children.extend(child_prog.items);
210                }
211                Item::Import(ref imp)
212                    if split_package_import(&imp.reference).is_some() =>
213                {
214                    let (pkg, module) =
215                        split_package_import(&imp.reference).unwrap();
216                    let resolved =
217                        resolve_package_import(canonical, pkg, module)
218                            .map_err(LoadError::Package)?
219                            .canonicalize()
220                            .map_err(|source| LoadError::Io {
221                                path: imp.reference.clone(),
222                                source,
223                            })?;
224                    let child_prefix = self.prefix_for(&resolved);
225                    path_imports.insert(imp.alias.clone(), child_prefix);
226                    let child_prog = self.load(&resolved)?;
227                    merged_children.extend(child_prog.items);
228                }
229                Item::Import(_) => std_imports.push(item),
230                _ => my_items.push(item),
231            }
232        }
233
234        let my_prefix = self.prefix_for(canonical);
235        let mangler = Mangler {
236            prefix: my_prefix,
237            local_names: &local_names,
238            path_imports: &path_imports,
239        };
240        let mangled: Vec<Item> = my_items
241            .into_iter()
242            .map(|i| mangler.mangle_item(i))
243            .collect();
244
245        self.in_progress.pop();
246        self.loaded.insert(canonical.to_path_buf());
247
248        // Output order: std imports first (deduped against children's),
249        // then merged children's items, then this file's items.
250        let mut out: Vec<Item> = Vec::new();
251        for s in std_imports {
252            if !merged_children.iter().any(|m| m == &s) {
253                out.push(s);
254            }
255        }
256        out.extend(merged_children);
257        out.extend(mangled);
258        Ok(Program { items: out })
259    }
260}
261
262fn is_path_import(reference: &str) -> bool {
263    reference.starts_with("./") || reference.starts_with("../") || reference.starts_with('/')
264}
265
266/// Returns `Some((pkg_name, module_path))` for package imports like
267/// `"lex-schema/validate"`. Stdlib (`std.*`) and relative paths are
268/// excluded — they are handled elsewhere.
269fn split_package_import(reference: &str) -> Option<(&str, &str)> {
270    if reference.starts_with("./")
271        || reference.starts_with("../")
272        || reference.starts_with('/')
273        || reference.starts_with("std.")
274    {
275        return None;
276    }
277    reference.split_once('/')
278}
279
280fn resolve_import(importer: &Path, reference: &str) -> Result<PathBuf, LoadError> {
281    let importer_dir = importer.parent().unwrap_or_else(|| Path::new("."));
282    let mut resolved: PathBuf = if reference.starts_with('/') {
283        PathBuf::from(reference)
284    } else {
285        importer_dir.join(reference)
286    };
287    if resolved.extension().is_none() {
288        resolved.set_extension("lex");
289    }
290    if !resolved.exists() {
291        return Err(LoadError::NotFound {
292            importer: importer.display().to_string(),
293            reference: reference.to_string(),
294        });
295    }
296    // Canonicalize so that `../../shared/foo` and `../other/../shared/foo`
297    // resolve to the same HashMap key, preventing duplicate loads and
298    // mismatched mangling prefixes in diamond-import graphs (#358).
299    resolved.canonicalize().map_err(|source| LoadError::Io {
300        path: resolved.display().to_string(),
301        source,
302    })
303}
304
305struct Mangler<'a> {
306    /// Mangling prefix for items declared in this file. Empty for the
307    /// entry file, `<stem>_<hash8>` for imported files.
308    prefix: String,
309    local_names: &'a HashSet<String>,
310    /// Map from local alias to the imported file's mangling prefix.
311    /// `m.foo` rewrites to `<imported_prefix>.foo` regardless of which
312    /// alias `m` was, so two parents importing the same module agree.
313    path_imports: &'a HashMap<String, String>,
314}
315
316impl<'a> Mangler<'a> {
317    fn qualify(&self, name: &str) -> String {
318        if self.prefix.is_empty() {
319            name.to_string()
320        } else {
321            format!("{}.{}", self.prefix, name)
322        }
323    }
324
325    fn mangle_item(&self, item: Item) -> Item {
326        match item {
327            Item::Import(imp) => Item::Import(imp),
328            Item::TypeDecl(td) => Item::TypeDecl(self.mangle_type_decl(td)),
329            Item::FnDecl(fd) => Item::FnDecl(self.mangle_fn_decl(fd)),
330        }
331    }
332
333    fn mangle_type_decl(&self, td: TypeDecl) -> TypeDecl {
334        TypeDecl {
335            name: self.qualify(&td.name),
336            params: td.params,
337            definition: self.mangle_type_expr(td.definition),
338        }
339    }
340
341    fn mangle_fn_decl(&self, fd: FnDecl) -> FnDecl {
342        let mut shadow = HashSet::new();
343        for p in &fd.params {
344            shadow.insert(p.name.clone());
345        }
346        // Example args/expected sit outside the body's parameter scope:
347        // they're top-level expressions evaluated against the function
348        // signature, so the only names they can see are the file's
349        // top-level fns/types and any path-import aliases — i.e., an
350        // empty shadow set (#391).
351        let empty_shadow = HashSet::new();
352        let examples = fd
353            .examples
354            .into_iter()
355            .map(|ex| Example {
356                args: ex
357                    .args
358                    .into_iter()
359                    .map(|a| self.mangle_expr(a, &empty_shadow))
360                    .collect(),
361                expected: self.mangle_expr(ex.expected, &empty_shadow),
362            })
363            .collect();
364        FnDecl {
365            name: self.qualify(&fd.name),
366            type_params: fd.type_params,
367            params: fd
368                .params
369                .into_iter()
370                .map(|p| Param {
371                    name: p.name,
372                    ty: self.mangle_type_expr(p.ty),
373                })
374                .collect(),
375            effects: fd.effects,
376            return_type: self.mangle_type_expr(fd.return_type),
377            body: self.mangle_block(fd.body, &shadow),
378            examples,
379        }
380    }
381
382    fn mangle_type_expr(&self, te: TypeExpr) -> TypeExpr {
383        match te {
384            TypeExpr::Named { name, args } => TypeExpr::Named {
385                name: self.rewrite_type_name(&name),
386                args: args.into_iter().map(|a| self.mangle_type_expr(a)).collect(),
387            },
388            TypeExpr::Record(fields) => TypeExpr::Record(
389                fields
390                    .into_iter()
391                    .map(|f| TypeField {
392                        name: f.name,
393                        ty: self.mangle_type_expr(f.ty),
394                    })
395                    .collect(),
396            ),
397            TypeExpr::RecordWithSpreads { spreads, fields } => TypeExpr::RecordWithSpreads {
398                spreads: spreads.into_iter().map(|s| self.rewrite_type_name(&s)).collect(),
399                fields: fields
400                    .into_iter()
401                    .map(|f| TypeField {
402                        name: f.name,
403                        ty: self.mangle_type_expr(f.ty),
404                    })
405                    .collect(),
406            },
407            TypeExpr::Tuple(items) => {
408                TypeExpr::Tuple(items.into_iter().map(|t| self.mangle_type_expr(t)).collect())
409            }
410            TypeExpr::Function {
411                params,
412                effects,
413                ret,
414            } => TypeExpr::Function {
415                params: params
416                    .into_iter()
417                    .map(|t| self.mangle_type_expr(t))
418                    .collect(),
419                effects,
420                ret: Box::new(self.mangle_type_expr(*ret)),
421            },
422            TypeExpr::Union(variants) => TypeExpr::Union(
423                variants
424                    .into_iter()
425                    .map(|v| UnionVariant {
426                        name: v.name,
427                        payload: v.payload.map(|t| self.mangle_type_expr(t)),
428                    })
429                    .collect(),
430            ),
431            TypeExpr::Refined { base, binding, predicate } => TypeExpr::Refined {
432                base: Box::new(self.mangle_type_expr(*base)),
433                binding,
434                // The predicate is an expression; its names are
435                // resolved during type-check, not loader-time, so
436                // it passes through unchanged here. Slice 2 wires
437                // up discharge through the spec-checker.
438                predicate,
439            },
440        }
441    }
442
443    /// Rewrite a possibly-qualified type name to its mangled form.
444    fn rewrite_type_name(&self, name: &str) -> String {
445        if let Some((alias, rest)) = name.split_once('.') {
446            if let Some(child) = self.path_imports.get(alias) {
447                return format!("{child}.{rest}");
448            }
449            return name.to_string();
450        }
451        if self.local_names.contains(name) {
452            return self.qualify(name);
453        }
454        name.to_string()
455    }
456
457    fn mangle_block(&self, b: Block, shadow: &HashSet<String>) -> Block {
458        let mut shadow = shadow.clone();
459        let statements = b
460            .statements
461            .into_iter()
462            .map(|s| match s {
463                Statement::Let { name, ty, value } => {
464                    let value = self.mangle_expr(value, &shadow);
465                    let ty = ty.map(|t| self.mangle_type_expr(t));
466                    shadow.insert(name.clone());
467                    Statement::Let { name, ty, value }
468                }
469                Statement::Expr(e) => Statement::Expr(self.mangle_expr(e, &shadow)),
470            })
471            .collect();
472        let result = Box::new(self.mangle_expr(*b.result, &shadow));
473        Block { statements, result }
474    }
475
476    fn mangle_expr(&self, e: Expr, shadow: &HashSet<String>) -> Expr {
477        match e {
478            Expr::Lit(_) => e,
479            Expr::Var(name) => {
480                if !shadow.contains(&name) && self.local_names.contains(&name) {
481                    Expr::Var(self.qualify(&name))
482                } else {
483                    Expr::Var(name)
484                }
485            }
486            Expr::Block(b) => Expr::Block(self.mangle_block(b, shadow)),
487            Expr::Call { callee, args } => {
488                let mangled_args: Vec<Expr> = args
489                    .into_iter()
490                    .map(|a| self.mangle_expr(a, shadow))
491                    .collect();
492                if let Expr::Field { value, field } = (*callee).clone() {
493                    if let Expr::Var(alias) = *value {
494                        if !shadow.contains(&alias) {
495                            if let Some(child) = self.path_imports.get(&alias) {
496                                return Expr::Call {
497                                    callee: Box::new(Expr::Var(format!("{child}.{field}"))),
498                                    args: mangled_args,
499                                };
500                            }
501                        }
502                    }
503                }
504                Expr::Call {
505                    callee: Box::new(self.mangle_expr(*callee, shadow)),
506                    args: mangled_args,
507                }
508            }
509            Expr::Pipe { left, right } => Expr::Pipe {
510                left: Box::new(self.mangle_expr(*left, shadow)),
511                right: Box::new(self.mangle_expr(*right, shadow)),
512            },
513            Expr::Try(inner) => Expr::Try(Box::new(self.mangle_expr(*inner, shadow))),
514            Expr::Field { value, field } => {
515                if let Expr::Var(alias) = (*value).clone() {
516                    if !shadow.contains(&alias) {
517                        if let Some(child) = self.path_imports.get(&alias) {
518                            return Expr::Var(format!("{child}.{field}"));
519                        }
520                    }
521                }
522                Expr::Field {
523                    value: Box::new(self.mangle_expr(*value, shadow)),
524                    field,
525                }
526            }
527            Expr::BinOp { op, lhs, rhs } => Expr::BinOp {
528                op,
529                lhs: Box::new(self.mangle_expr(*lhs, shadow)),
530                rhs: Box::new(self.mangle_expr(*rhs, shadow)),
531            },
532            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
533                op,
534                expr: Box::new(self.mangle_expr(*expr, shadow)),
535            },
536            Expr::If {
537                cond,
538                then_block,
539                else_block,
540            } => Expr::If {
541                cond: Box::new(self.mangle_expr(*cond, shadow)),
542                then_block: self.mangle_block(then_block, shadow),
543                else_block: self.mangle_block(else_block, shadow),
544            },
545            Expr::Match { scrutinee, arms } => Expr::Match {
546                scrutinee: Box::new(self.mangle_expr(*scrutinee, shadow)),
547                arms: arms
548                    .into_iter()
549                    .map(|a| {
550                        let mut arm_shadow = shadow.clone();
551                        collect_pattern_binders(&a.pattern, &mut arm_shadow);
552                        Arm {
553                            pattern: self.mangle_pattern(a.pattern),
554                            body: self.mangle_expr(a.body, &arm_shadow),
555                        }
556                    })
557                    .collect(),
558            },
559            Expr::RecordLit(fields) => Expr::RecordLit(
560                fields
561                    .into_iter()
562                    .map(|f| RecordLitField {
563                        name: f.name,
564                        value: self.mangle_expr(f.value, shadow),
565                    })
566                    .collect(),
567            ),
568            Expr::TupleLit(items) => Expr::TupleLit(
569                items
570                    .into_iter()
571                    .map(|i| self.mangle_expr(i, shadow))
572                    .collect(),
573            ),
574            Expr::ListLit(items) => Expr::ListLit(
575                items
576                    .into_iter()
577                    .map(|i| self.mangle_expr(i, shadow))
578                    .collect(),
579            ),
580            Expr::Constructor { name, args } => Expr::Constructor {
581                name,
582                args: args
583                    .into_iter()
584                    .map(|a| self.mangle_expr(a, shadow))
585                    .collect(),
586            },
587            Expr::Ascription { value, ty } => Expr::Ascription {
588                value: Box::new(self.mangle_expr(*value, shadow)),
589                ty: self.mangle_type_expr(ty),
590            },
591            Expr::Lambda(lambda) => {
592                let mut lam_shadow = shadow.clone();
593                for p in &lambda.params {
594                    lam_shadow.insert(p.name.clone());
595                }
596                Expr::Lambda(Box::new(Lambda {
597                    params: lambda
598                        .params
599                        .into_iter()
600                        .map(|p| Param {
601                            name: p.name,
602                            ty: self.mangle_type_expr(p.ty),
603                        })
604                        .collect(),
605                    return_type: self.mangle_type_expr(lambda.return_type),
606                    effects: lambda.effects,
607                    body: self.mangle_block(lambda.body, &lam_shadow),
608                }))
609            }
610        }
611    }
612
613    fn mangle_pattern(&self, p: Pattern) -> Pattern {
614        match p {
615            Pattern::Constructor { name, args } => Pattern::Constructor {
616                name,
617                args: args.into_iter().map(|a| self.mangle_pattern(a)).collect(),
618            },
619            Pattern::Record { fields, rest } => Pattern::Record {
620                fields: fields
621                    .into_iter()
622                    .map(|f| RecordPatField {
623                        name: f.name,
624                        pattern: f.pattern.map(|p| self.mangle_pattern(p)),
625                    })
626                    .collect(),
627                rest,
628            },
629            Pattern::Tuple(items) => {
630                Pattern::Tuple(items.into_iter().map(|p| self.mangle_pattern(p)).collect())
631            }
632            Pattern::Lit(_) | Pattern::Var(_) | Pattern::Wild => p,
633        }
634    }
635}
636
637fn collect_pattern_binders(p: &Pattern, out: &mut HashSet<String>) {
638    match p {
639        Pattern::Var(name) => {
640            out.insert(name.clone());
641        }
642        Pattern::Constructor { args, .. } => {
643            for a in args {
644                collect_pattern_binders(a, out);
645            }
646        }
647        Pattern::Record { fields, .. } => {
648            for f in fields {
649                match &f.pattern {
650                    Some(p) => collect_pattern_binders(p, out),
651                    // `{ name }` shorthand binds `name`.
652                    None => {
653                        out.insert(f.name.clone());
654                    }
655                }
656            }
657        }
658        Pattern::Tuple(items) => {
659            for p in items {
660                collect_pattern_binders(p, out);
661            }
662        }
663        Pattern::Lit(_) | Pattern::Wild => {}
664    }
665}