Skip to main content

lex_syntax/
loader.rs

1//! Multi-file loader: resolves `import "./..."`, `import "../..."`, and
2//! `import "/abs/..."` statements relative to the importer, recursively
3//! parses, and produces a single [`Program`] with all stages merged.
4//!
5//! Names that are local to an imported file are mangled with a
6//! **per-file-path** prefix, so the same module imported via multiple
7//! aliases (or from multiple parents in a diamond shape) collapses to
8//! one set of mangled names — same SigId, same nominal identity.
9//! Stdlib imports (`import "std.foo" as bar`) pass through unchanged.
10//!
11//! ## Mangling
12//!
13//! Each loaded file gets a prefix derived from its canonical filesystem
14//! path. The entry file's prefix is empty (so `lex run main.lex
15//! process` works unchanged). Imported files use `<stem>_<hash>`
16//! where `hash` is the first 8 hex chars of SHA-256 of the canonical
17//! path string. The hash disambiguates same-stem files in different
18//! directories without forcing a project manifest.
19//!
20//! Within a file at prefix `P`:
21//!
22//! - `fn foo` declared in this file becomes `<P>.foo` (just `foo` at root).
23//! - `type T` declared in this file becomes `<P>.T`.
24//! - References to a locally-declared name get mangled, **unless** the
25//!   name is shadowed by a binder (let, fn param, lambda param, or
26//!   pattern binder) in scope.
27//! - `m.foo` where `m` is a path-import alias is rewritten to the
28//!   imported file's prefix-qualified name. Two parents importing the
29//!   same file see the same prefix → calls and types unify.
30//! - `m.foo` where `m` is a stdlib alias is unchanged.
31//!
32//! Variant constructors are **not** mangled — they live in a global
33//! namespace, and a collision between two imported types' constructors
34//! surfaces later as a type-check error. Same for record field names.
35//!
36//! ## Diamond imports
37//!
38//! `main.lex` imports `./left` and `./right`, both of which import
39//! `./shared`. `shared.lex` is parsed once per resolution, but its
40//! mangled items are merged into the output exactly once (subsequent
41//! loads from the same canonical path return an empty Program). This
42//! is what makes `s.build_report(...)` and `v.read_score(...)` agree
43//! on `Report`'s nominal identity.
44//!
45//! ## Limitations (tracked separately)
46//!
47//! The mangling key is the canonical filesystem path. Moving a file
48//! changes its SigId; renaming changes the file-stem half of the
49//! prefix. The eventual fix — content-addressed identity decoupled
50//! from filesystem layout — lives with store-native imports
51//! (`import "stage:..."`); see the corresponding follow-up tracker.
52
53use std::collections::{HashMap, HashSet};
54use std::path::{Path, PathBuf};
55use thiserror::Error;
56
57use sha2::{Digest, Sha256};
58
59use crate::syntax::*;
60use crate::workspace::{resolve_package_import, PackageError};
61use crate::{parse_source, SyntaxError};
62
63#[derive(Debug, Error)]
64pub enum LoadError {
65    #[error("read {path}: {source}")]
66    Io {
67        path: String,
68        #[source]
69        source: std::io::Error,
70    },
71    #[error("parse {path}: {source}")]
72    Syntax {
73        path: String,
74        #[source]
75        source: SyntaxError,
76    },
77    #[error("import cycle: {chain}")]
78    Cycle { chain: String },
79    #[error("import \"{reference}\" from {importer}: file not found")]
80    NotFound { importer: String, reference: String },
81    #[error("local imports (`./`, `../`, `/`) require a base path; cannot resolve from a string source")]
82    LocalImportInStringSource,
83    #[error("package import error: {0}")]
84    Package(#[from] PackageError),
85}
86
87/// Load a multi-file Lex program, expanding local imports relative to
88/// the entry path. Stdlib imports (`std.*`) pass through unchanged.
89pub fn load_program(entry: &Path) -> Result<Program, LoadError> {
90    let entry_canonical = entry.canonicalize().map_err(|source| LoadError::Io {
91        path: entry.display().to_string(),
92        source,
93    })?;
94    let mut state = LoaderState {
95        in_progress: Vec::new(),
96        loaded: HashSet::new(),
97        prefixes: HashMap::new(),
98    };
99    // Entry file's prefix is empty so `lex run main.lex process` works
100    // without users typing the hashed prefix.
101    state.prefixes.insert(entry_canonical.clone(), String::new());
102    state.load(&entry_canonical)
103}
104
105/// Load a Lex program from a string source. Local-path imports are
106/// rejected up-front since there's no base path to resolve from.
107pub fn load_program_from_str(src: &str) -> Result<Program, LoadError> {
108    let prog = parse_source(src).map_err(|source| LoadError::Syntax {
109        path: "<input>".into(),
110        source,
111    })?;
112    for item in &prog.items {
113        if let Item::Import(imp) = item {
114            if is_path_import(&imp.reference)
115                || split_package_import(&imp.reference).is_some()
116            {
117                return Err(LoadError::LocalImportInStringSource);
118            }
119        }
120    }
121    Ok(prog)
122}
123
124struct LoaderState {
125    in_progress: Vec<PathBuf>,
126    /// Canonical paths that have already been merged into the output.
127    /// A second `import "./shared"` from a different parent skips
128    /// re-merging — the file's mangled items are already there.
129    loaded: HashSet<PathBuf>,
130    /// Stable mangling prefix per canonical path. Computed lazily;
131    /// the entry file is seeded with an empty prefix.
132    prefixes: HashMap<PathBuf, String>,
133}
134
135impl LoaderState {
136    fn prefix_for(&mut self, canonical: &Path) -> String {
137        if let Some(p) = self.prefixes.get(canonical) {
138            return p.clone();
139        }
140        let stem = canonical
141            .file_stem()
142            .and_then(|s| s.to_str())
143            .unwrap_or("module");
144        let mut hasher = Sha256::new();
145        hasher.update(canonical.to_string_lossy().as_bytes());
146        let digest = hasher.finalize();
147        let prefix = format!("{stem}_{:08x}", u32::from_be_bytes([
148            digest[0], digest[1], digest[2], digest[3],
149        ]));
150        self.prefixes.insert(canonical.to_path_buf(), prefix.clone());
151        prefix
152    }
153
154    fn load(&mut self, canonical: &Path) -> Result<Program, LoadError> {
155        if self.in_progress.contains(&canonical.to_path_buf()) {
156            let mut chain: Vec<String> = self
157                .in_progress
158                .iter()
159                .map(|p| p.display().to_string())
160                .collect();
161            chain.push(canonical.display().to_string());
162            return Err(LoadError::Cycle {
163                chain: chain.join(" -> "),
164            });
165        }
166        // Diamond dedupe: if this file was already merged on another
167        // path through the import graph, its items are already in the
168        // output Vec — return an empty Program so the caller's
169        // `merged_children.extend(...)` is a no-op for items, but the
170        // call still resolves so the parent's `path_imports` map gets
171        // populated below.
172        if self.loaded.contains(canonical) {
173            return Ok(Program { items: Vec::new() });
174        }
175        self.in_progress.push(canonical.to_path_buf());
176
177        let src = std::fs::read_to_string(canonical).map_err(|source| LoadError::Io {
178            path: canonical.display().to_string(),
179            source,
180        })?;
181        let prog = parse_source(&src).map_err(|source| LoadError::Syntax {
182            path: canonical.display().to_string(),
183            source,
184        })?;
185
186        let local_names: HashSet<String> = prog
187            .items
188            .iter()
189            .filter_map(|item| match item {
190                Item::FnDecl(fd) => Some(fd.name.clone()),
191                Item::TypeDecl(td) => Some(td.name.clone()),
192                _ => None,
193            })
194            .collect();
195
196        // alias used by this file → mangling prefix of the imported file
197        let mut path_imports: HashMap<String, String> = HashMap::new();
198        let mut merged_children: Vec<Item> = Vec::new();
199        let mut std_imports: Vec<Item> = Vec::new();
200        let mut my_items: Vec<Item> = Vec::new();
201
202        for item in prog.items {
203            match item {
204                Item::Import(ref imp) if is_path_import(&imp.reference) => {
205                    let resolved = resolve_import(canonical, &imp.reference)?;
206                    let child_prefix = self.prefix_for(&resolved);
207                    path_imports.insert(imp.alias.clone(), child_prefix);
208                    let child_prog = self.load(&resolved)?;
209                    merged_children.extend(child_prog.items);
210                }
211                Item::Import(ref imp)
212                    if split_package_import(&imp.reference).is_some() =>
213                {
214                    let (pkg, module) =
215                        split_package_import(&imp.reference).unwrap();
216                    let resolved =
217                        resolve_package_import(canonical, pkg, module)
218                            .map_err(LoadError::Package)?
219                            .canonicalize()
220                            .map_err(|source| LoadError::Io {
221                                path: imp.reference.clone(),
222                                source,
223                            })?;
224                    let child_prefix = self.prefix_for(&resolved);
225                    path_imports.insert(imp.alias.clone(), child_prefix);
226                    let child_prog = self.load(&resolved)?;
227                    merged_children.extend(child_prog.items);
228                }
229                Item::Import(_) => std_imports.push(item),
230                _ => my_items.push(item),
231            }
232        }
233
234        let my_prefix = self.prefix_for(canonical);
235        let mangler = Mangler {
236            prefix: my_prefix,
237            local_names: &local_names,
238            path_imports: &path_imports,
239        };
240        let mangled: Vec<Item> = my_items
241            .into_iter()
242            .map(|i| mangler.mangle_item(i))
243            .collect();
244
245        self.in_progress.pop();
246        self.loaded.insert(canonical.to_path_buf());
247
248        // Output order: std imports first (deduped against children's),
249        // then merged children's items, then this file's items.
250        let mut out: Vec<Item> = Vec::new();
251        for s in std_imports {
252            if !merged_children.iter().any(|m| m == &s) {
253                out.push(s);
254            }
255        }
256        out.extend(merged_children);
257        out.extend(mangled);
258        Ok(Program { items: out })
259    }
260}
261
262fn is_path_import(reference: &str) -> bool {
263    reference.starts_with("./") || reference.starts_with("../") || reference.starts_with('/')
264}
265
266/// Returns `Some((pkg_name, module_path))` for package imports like
267/// `"lex-schema/validate"`. Stdlib (`std.*`) and relative paths are
268/// excluded — they are handled elsewhere.
269fn split_package_import(reference: &str) -> Option<(&str, &str)> {
270    if reference.starts_with("./")
271        || reference.starts_with("../")
272        || reference.starts_with('/')
273        || reference.starts_with("std.")
274    {
275        return None;
276    }
277    reference.split_once('/')
278}
279
280fn resolve_import(importer: &Path, reference: &str) -> Result<PathBuf, LoadError> {
281    let importer_dir = importer.parent().unwrap_or_else(|| Path::new("."));
282    let mut resolved: PathBuf = if reference.starts_with('/') {
283        PathBuf::from(reference)
284    } else {
285        importer_dir.join(reference)
286    };
287    if resolved.extension().is_none() {
288        resolved.set_extension("lex");
289    }
290    if !resolved.exists() {
291        return Err(LoadError::NotFound {
292            importer: importer.display().to_string(),
293            reference: reference.to_string(),
294        });
295    }
296    // Canonicalize so that `../../shared/foo` and `../other/../shared/foo`
297    // resolve to the same HashMap key, preventing duplicate loads and
298    // mismatched mangling prefixes in diamond-import graphs (#358).
299    resolved.canonicalize().map_err(|source| LoadError::Io {
300        path: resolved.display().to_string(),
301        source,
302    })
303}
304
305struct Mangler<'a> {
306    /// Mangling prefix for items declared in this file. Empty for the
307    /// entry file, `<stem>_<hash8>` for imported files.
308    prefix: String,
309    local_names: &'a HashSet<String>,
310    /// Map from local alias to the imported file's mangling prefix.
311    /// `m.foo` rewrites to `<imported_prefix>.foo` regardless of which
312    /// alias `m` was, so two parents importing the same module agree.
313    path_imports: &'a HashMap<String, String>,
314}
315
316impl<'a> Mangler<'a> {
317    fn qualify(&self, name: &str) -> String {
318        if self.prefix.is_empty() {
319            name.to_string()
320        } else {
321            format!("{}.{}", self.prefix, name)
322        }
323    }
324
325    fn mangle_item(&self, item: Item) -> Item {
326        match item {
327            Item::Import(imp) => Item::Import(imp),
328            Item::TypeDecl(td) => Item::TypeDecl(self.mangle_type_decl(td)),
329            Item::FnDecl(fd) => Item::FnDecl(self.mangle_fn_decl(fd)),
330        }
331    }
332
333    fn mangle_type_decl(&self, td: TypeDecl) -> TypeDecl {
334        TypeDecl {
335            name: self.qualify(&td.name),
336            params: td.params,
337            definition: self.mangle_type_expr(td.definition),
338        }
339    }
340
341    fn mangle_fn_decl(&self, fd: FnDecl) -> FnDecl {
342        let mut shadow = HashSet::new();
343        for p in &fd.params {
344            shadow.insert(p.name.clone());
345        }
346        FnDecl {
347            name: self.qualify(&fd.name),
348            type_params: fd.type_params,
349            params: fd
350                .params
351                .into_iter()
352                .map(|p| Param {
353                    name: p.name,
354                    ty: self.mangle_type_expr(p.ty),
355                })
356                .collect(),
357            effects: fd.effects,
358            return_type: self.mangle_type_expr(fd.return_type),
359            body: self.mangle_block(fd.body, &shadow),
360            // Examples (#369) ride through the loader unchanged. They
361            // are self-contained calls with pure args; cross-module
362            // identifier rewriting inside example expressions is a
363            // follow-up if it becomes a real need.
364            examples: fd.examples,
365        }
366    }
367
368    fn mangle_type_expr(&self, te: TypeExpr) -> TypeExpr {
369        match te {
370            TypeExpr::Named { name, args } => TypeExpr::Named {
371                name: self.rewrite_type_name(&name),
372                args: args.into_iter().map(|a| self.mangle_type_expr(a)).collect(),
373            },
374            TypeExpr::Record(fields) => TypeExpr::Record(
375                fields
376                    .into_iter()
377                    .map(|f| TypeField {
378                        name: f.name,
379                        ty: self.mangle_type_expr(f.ty),
380                    })
381                    .collect(),
382            ),
383            TypeExpr::RecordWithSpreads { spreads, fields } => TypeExpr::RecordWithSpreads {
384                spreads: spreads.into_iter().map(|s| self.rewrite_type_name(&s)).collect(),
385                fields: fields
386                    .into_iter()
387                    .map(|f| TypeField {
388                        name: f.name,
389                        ty: self.mangle_type_expr(f.ty),
390                    })
391                    .collect(),
392            },
393            TypeExpr::Tuple(items) => {
394                TypeExpr::Tuple(items.into_iter().map(|t| self.mangle_type_expr(t)).collect())
395            }
396            TypeExpr::Function {
397                params,
398                effects,
399                ret,
400            } => TypeExpr::Function {
401                params: params
402                    .into_iter()
403                    .map(|t| self.mangle_type_expr(t))
404                    .collect(),
405                effects,
406                ret: Box::new(self.mangle_type_expr(*ret)),
407            },
408            TypeExpr::Union(variants) => TypeExpr::Union(
409                variants
410                    .into_iter()
411                    .map(|v| UnionVariant {
412                        name: v.name,
413                        payload: v.payload.map(|t| self.mangle_type_expr(t)),
414                    })
415                    .collect(),
416            ),
417            TypeExpr::Refined { base, binding, predicate } => TypeExpr::Refined {
418                base: Box::new(self.mangle_type_expr(*base)),
419                binding,
420                // The predicate is an expression; its names are
421                // resolved during type-check, not loader-time, so
422                // it passes through unchanged here. Slice 2 wires
423                // up discharge through the spec-checker.
424                predicate,
425            },
426        }
427    }
428
429    /// Rewrite a possibly-qualified type name to its mangled form.
430    fn rewrite_type_name(&self, name: &str) -> String {
431        if let Some((alias, rest)) = name.split_once('.') {
432            if let Some(child) = self.path_imports.get(alias) {
433                return format!("{child}.{rest}");
434            }
435            return name.to_string();
436        }
437        if self.local_names.contains(name) {
438            return self.qualify(name);
439        }
440        name.to_string()
441    }
442
443    fn mangle_block(&self, b: Block, shadow: &HashSet<String>) -> Block {
444        let mut shadow = shadow.clone();
445        let statements = b
446            .statements
447            .into_iter()
448            .map(|s| match s {
449                Statement::Let { name, ty, value } => {
450                    let value = self.mangle_expr(value, &shadow);
451                    let ty = ty.map(|t| self.mangle_type_expr(t));
452                    shadow.insert(name.clone());
453                    Statement::Let { name, ty, value }
454                }
455                Statement::Expr(e) => Statement::Expr(self.mangle_expr(e, &shadow)),
456            })
457            .collect();
458        let result = Box::new(self.mangle_expr(*b.result, &shadow));
459        Block { statements, result }
460    }
461
462    fn mangle_expr(&self, e: Expr, shadow: &HashSet<String>) -> Expr {
463        match e {
464            Expr::Lit(_) => e,
465            Expr::Var(name) => {
466                if !shadow.contains(&name) && self.local_names.contains(&name) {
467                    Expr::Var(self.qualify(&name))
468                } else {
469                    Expr::Var(name)
470                }
471            }
472            Expr::Block(b) => Expr::Block(self.mangle_block(b, shadow)),
473            Expr::Call { callee, args } => {
474                let mangled_args: Vec<Expr> = args
475                    .into_iter()
476                    .map(|a| self.mangle_expr(a, shadow))
477                    .collect();
478                if let Expr::Field { value, field } = (*callee).clone() {
479                    if let Expr::Var(alias) = *value {
480                        if !shadow.contains(&alias) {
481                            if let Some(child) = self.path_imports.get(&alias) {
482                                return Expr::Call {
483                                    callee: Box::new(Expr::Var(format!("{child}.{field}"))),
484                                    args: mangled_args,
485                                };
486                            }
487                        }
488                    }
489                }
490                Expr::Call {
491                    callee: Box::new(self.mangle_expr(*callee, shadow)),
492                    args: mangled_args,
493                }
494            }
495            Expr::Pipe { left, right } => Expr::Pipe {
496                left: Box::new(self.mangle_expr(*left, shadow)),
497                right: Box::new(self.mangle_expr(*right, shadow)),
498            },
499            Expr::Try(inner) => Expr::Try(Box::new(self.mangle_expr(*inner, shadow))),
500            Expr::Field { value, field } => {
501                if let Expr::Var(alias) = (*value).clone() {
502                    if !shadow.contains(&alias) {
503                        if let Some(child) = self.path_imports.get(&alias) {
504                            return Expr::Var(format!("{child}.{field}"));
505                        }
506                    }
507                }
508                Expr::Field {
509                    value: Box::new(self.mangle_expr(*value, shadow)),
510                    field,
511                }
512            }
513            Expr::BinOp { op, lhs, rhs } => Expr::BinOp {
514                op,
515                lhs: Box::new(self.mangle_expr(*lhs, shadow)),
516                rhs: Box::new(self.mangle_expr(*rhs, shadow)),
517            },
518            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
519                op,
520                expr: Box::new(self.mangle_expr(*expr, shadow)),
521            },
522            Expr::If {
523                cond,
524                then_block,
525                else_block,
526            } => Expr::If {
527                cond: Box::new(self.mangle_expr(*cond, shadow)),
528                then_block: self.mangle_block(then_block, shadow),
529                else_block: self.mangle_block(else_block, shadow),
530            },
531            Expr::Match { scrutinee, arms } => Expr::Match {
532                scrutinee: Box::new(self.mangle_expr(*scrutinee, shadow)),
533                arms: arms
534                    .into_iter()
535                    .map(|a| {
536                        let mut arm_shadow = shadow.clone();
537                        collect_pattern_binders(&a.pattern, &mut arm_shadow);
538                        Arm {
539                            pattern: self.mangle_pattern(a.pattern),
540                            body: self.mangle_expr(a.body, &arm_shadow),
541                        }
542                    })
543                    .collect(),
544            },
545            Expr::RecordLit(fields) => Expr::RecordLit(
546                fields
547                    .into_iter()
548                    .map(|f| RecordLitField {
549                        name: f.name,
550                        value: self.mangle_expr(f.value, shadow),
551                    })
552                    .collect(),
553            ),
554            Expr::TupleLit(items) => Expr::TupleLit(
555                items
556                    .into_iter()
557                    .map(|i| self.mangle_expr(i, shadow))
558                    .collect(),
559            ),
560            Expr::ListLit(items) => Expr::ListLit(
561                items
562                    .into_iter()
563                    .map(|i| self.mangle_expr(i, shadow))
564                    .collect(),
565            ),
566            Expr::Constructor { name, args } => Expr::Constructor {
567                name,
568                args: args
569                    .into_iter()
570                    .map(|a| self.mangle_expr(a, shadow))
571                    .collect(),
572            },
573            Expr::Ascription { value, ty } => Expr::Ascription {
574                value: Box::new(self.mangle_expr(*value, shadow)),
575                ty: self.mangle_type_expr(ty),
576            },
577            Expr::Lambda(lambda) => {
578                let mut lam_shadow = shadow.clone();
579                for p in &lambda.params {
580                    lam_shadow.insert(p.name.clone());
581                }
582                Expr::Lambda(Box::new(Lambda {
583                    params: lambda
584                        .params
585                        .into_iter()
586                        .map(|p| Param {
587                            name: p.name,
588                            ty: self.mangle_type_expr(p.ty),
589                        })
590                        .collect(),
591                    return_type: self.mangle_type_expr(lambda.return_type),
592                    effects: lambda.effects,
593                    body: self.mangle_block(lambda.body, &lam_shadow),
594                }))
595            }
596        }
597    }
598
599    fn mangle_pattern(&self, p: Pattern) -> Pattern {
600        match p {
601            Pattern::Constructor { name, args } => Pattern::Constructor {
602                name,
603                args: args.into_iter().map(|a| self.mangle_pattern(a)).collect(),
604            },
605            Pattern::Record { fields, rest } => Pattern::Record {
606                fields: fields
607                    .into_iter()
608                    .map(|f| RecordPatField {
609                        name: f.name,
610                        pattern: f.pattern.map(|p| self.mangle_pattern(p)),
611                    })
612                    .collect(),
613                rest,
614            },
615            Pattern::Tuple(items) => {
616                Pattern::Tuple(items.into_iter().map(|p| self.mangle_pattern(p)).collect())
617            }
618            Pattern::Lit(_) | Pattern::Var(_) | Pattern::Wild => p,
619        }
620    }
621}
622
623fn collect_pattern_binders(p: &Pattern, out: &mut HashSet<String>) {
624    match p {
625        Pattern::Var(name) => {
626            out.insert(name.clone());
627        }
628        Pattern::Constructor { args, .. } => {
629            for a in args {
630                collect_pattern_binders(a, out);
631            }
632        }
633        Pattern::Record { fields, .. } => {
634            for f in fields {
635                match &f.pattern {
636                    Some(p) => collect_pattern_binders(p, out),
637                    // `{ name }` shorthand binds `name`.
638                    None => {
639                        out.insert(f.name.clone());
640                    }
641                }
642            }
643        }
644        Pattern::Tuple(items) => {
645            for p in items {
646                collect_pattern_binders(p, out);
647            }
648        }
649        Pattern::Lit(_) | Pattern::Wild => {}
650    }
651}