Skip to main content

elenchus_compiler/
resolver.rs

1//! Source-agnostic import resolution: the [`Resolver`] trait, in-memory and file
2//! backings, path normalization, and the iterative import-graph traversal.
3use crate::domain::DomainCtx;
4use crate::error::CompileError;
5use crate::hash_hex;
6use crate::ir::UnusedImport;
7use crate::subst::collect_prefixes;
8use alloc::collections::{BTreeMap, BTreeSet};
9use alloc::string::{String, ToString};
10use alloc::vec;
11use alloc::vec::Vec;
12use elenchus_parser::Statement;
13
14// --- import resolution (source-agnostic) -----------------------------------
15
16/// Resolves `IMPORT "path"` to source text. The engine is source-agnostic: it
17/// consumes strings, so a file is merely one backing store. Mirrors
18/// vsm-grammar's `SourceResolver`.
19pub trait Resolver {
20    /// Load the raw source text for a resolved path.
21    fn load(&self, path: &str) -> Result<String, CompileError>;
22
23    /// Normalize `relative` against the importing source `base`.
24    /// Default: paths are absolute names, returned unchanged.
25    fn resolve(&self, _base: &str, relative: &str) -> String {
26        relative.to_string()
27    }
28}
29
30/// An in-memory resolver: serves sources from a name → content map. Pure
31/// `no_std`. Mirrors vsm-grammar's `MemoryResolver`.
32#[derive(Default)]
33pub struct MemoryResolver {
34    sources: BTreeMap<String, String>,
35}
36
37impl MemoryResolver {
38    /// An empty resolver with no sources.
39    pub fn new() -> Self {
40        Self::default()
41    }
42
43    /// Register `content` under `path`; returns `&mut self` for chaining.
44    pub fn add(&mut self, path: &str, content: &str) -> &mut Self {
45        self.sources.insert(path.to_string(), content.to_string());
46        self
47    }
48}
49
50/// Normalize an `IMPORT` path **identically on every OS and every surface**: treat
51/// both `/` and `\` as separators, resolve `relative` against `base`'s directory,
52/// collapse `.` / `..`, and re-join with `/`. Pure and `no_std`, so it does not
53/// depend on the host (or the compile target's) path semantics — a Windows-style
54/// and a Unix-style import resolve to the same virtual path whether the resolver is
55/// filesystem-, JS-callback-, or in-memory-backed. The single source of truth all
56/// three [`Resolver`]s share.
57pub fn normalize_import_path(base: &str, relative: &str) -> String {
58    fn is_sep(c: char) -> bool {
59        c == '/' || c == '\\'
60    }
61    fn push<'a>(parts: &mut Vec<&'a str>, seg: &'a str) {
62        match seg {
63            "" | "." => {}
64            ".." => {
65                parts.pop();
66            }
67            _ => parts.push(seg),
68        }
69    }
70    let mut absolute = base.starts_with(is_sep);
71    let mut parts: Vec<&str> = Vec::new();
72    // `base`'s directory = every segment but the last (the importing file's name).
73    let base_segs: Vec<&str> = base.split(is_sep).collect();
74    for seg in base_segs.iter().take(base_segs.len().saturating_sub(1)) {
75        push(&mut parts, seg);
76    }
77    // An absolute `relative` replaces the base directory entirely.
78    if relative.starts_with(is_sep) {
79        parts.clear();
80        absolute = true;
81    }
82    for seg in relative.split(is_sep) {
83        push(&mut parts, seg);
84    }
85    let joined = parts.join("/");
86    if absolute {
87        alloc::format!("/{joined}")
88    } else {
89        joined
90    }
91}
92
93impl Resolver for MemoryResolver {
94    fn load(&self, path: &str) -> Result<String, CompileError> {
95        self.sources
96            .get(path)
97            .cloned()
98            .ok_or_else(|| CompileError::ImportNotFound(path.to_string()))
99    }
100
101    fn resolve(&self, base: &str, relative: &str) -> String {
102        normalize_import_path(base, relative)
103    }
104}
105
106/// A filesystem-backed resolver. Mirrors vsm-grammar's `FileResolver`:
107/// relative imports resolve against the importing file's directory, with manual
108/// `..` normalization (no canonicalization, to keep a virtual layout).
109#[cfg(feature = "std")]
110pub struct FileResolver;
111
112#[cfg(feature = "std")]
113impl Resolver for FileResolver {
114    fn load(&self, path: &str) -> Result<String, CompileError> {
115        std::fs::read_to_string(path)
116            .map_err(|e| CompileError::ImportNotFound(alloc::format!("{}: {}", path, e)))
117    }
118
119    fn resolve(&self, base: &str, relative: &str) -> String {
120        // Share the one OS-independent normalizer (forward slashes, `..` collapsed)
121        // so a resolved path — and the provenance recorded in the IR — is identical
122        // on Windows and Unix, and identical to the in-memory and JS resolvers.
123        // Windows `std::fs` accepts `/` just fine.
124        normalize_import_path(base, relative)
125    }
126}
127
128/// One resolved source ready to compile: its provenance path, raw text, and the
129/// domain context (own domain + import-alias bindings) its atoms resolve against.
130pub(crate) struct ResolvedFile {
131    pub(crate) path: String,
132    pub(crate) content: String,
133    pub(crate) ctx: DomainCtx,
134}
135
136/// One `IMPORT` edge: the optional local alias, the resolved child path, and the
137/// `IMPORT` line (for the unused-import advisory).
138pub(crate) struct ImportEdge {
139    pub(crate) alias: Option<String>,
140    pub(crate) child_path: String,
141    pub(crate) line: u32,
142}
143
144/// A discovered source during graph resolution: its first-seen path, raw text,
145/// declared domain, import edges, and the set of domain prefixes its atoms use
146/// (`None` = its own domain; `Some(p)` = a `p.` prefix) — used to flag imports
147/// that the file never references.
148pub(crate) struct DiscoveredFile {
149    pub(crate) path: String,
150    pub(crate) content: String,
151    pub(crate) domain: String,
152    pub(crate) edges: Vec<ImportEdge>,
153    pub(crate) used_prefixes: BTreeSet<Option<String>>,
154}
155
156/// Resolve the whole import graph reachable from `root` into a flat list of
157/// [`ResolvedFile`]s, each distinct source appearing once.
158///
159/// Iterative depth-first traversal with an explicit work stack (`Enter`/`Exit`
160/// frames) — no native recursion, so depth is unbounded without risking a stack
161/// overflow. Memoized by content hash (a diamond/repeat is visited once); a hash
162/// re-encountered while still on the active path is a [`CompileError::CircularImport`].
163pub(crate) fn resolve_graph<R: Resolver>(
164    root: &str,
165    resolver: &R,
166) -> Result<(Vec<ResolvedFile>, Vec<UnusedImport>), CompileError> {
167    /// One unit of pending work on the traversal stack.
168    enum Step {
169        /// Visit a file at this resolved path (load, parse, enqueue its imports).
170        Enter(String),
171        /// Mark this content hash finished (pop it off the active path).
172        Exit(String),
173    }
174
175    let mut discovered: BTreeMap<String, DiscoveredFile> = BTreeMap::new(); // by hash
176    let mut path_hash: BTreeMap<String, String> = BTreeMap::new(); // resolved path → hash
177    let mut order: Vec<String> = Vec::new(); // finish order, by hash
178    let mut active: BTreeSet<String> = BTreeSet::new(); // hashes on the current DFS path
179    let mut work: Vec<Step> = vec![Step::Enter(root.to_string())];
180
181    while let Some(step) = work.pop() {
182        match step {
183            Step::Exit(hash) => {
184                active.remove(&hash);
185                order.push(hash);
186            }
187            Step::Enter(path) => {
188                let content = resolver.load(&path)?;
189                let hash = hash_hex(content.as_bytes());
190                path_hash.insert(path.clone(), hash.clone());
191                if active.contains(&hash) {
192                    return Err(CompileError::CircularImport(path)); // back-edge to an ancestor
193                }
194                if discovered.contains_key(&hash) {
195                    continue; // already fully resolved by another path — dedup
196                }
197                let program = parse_tagged(&path, &content)?;
198                let domain = extract_domain(&program, &path)?;
199                let mut edges = Vec::new();
200                let mut used_prefixes = BTreeSet::new();
201                for stmt in &program.statements {
202                    if let Statement::Import { path: p, alias } = stmt {
203                        edges.push(ImportEdge {
204                            alias: alias.as_ref().map(|a| a.data.to_string()),
205                            child_path: resolver.resolve(&path, p.data),
206                            line: p.span.location_line(),
207                        });
208                    } else {
209                        collect_prefixes(stmt, &mut used_prefixes);
210                    }
211                }
212                drop(program); // release the borrow on `content` before moving it
213                active.insert(hash.clone());
214                work.push(Step::Exit(hash.clone()));
215                for e in edges.iter().rev() {
216                    work.push(Step::Enter(e.child_path.clone()));
217                }
218                discovered.insert(
219                    hash,
220                    DiscoveredFile {
221                        path,
222                        content,
223                        domain,
224                        edges,
225                        used_prefixes,
226                    },
227                );
228            }
229        }
230    }
231
232    // Build each file's domain context now that every domain is known.
233    // Look up every file's domain (small strings) so we can then *move* each
234    // file's (potentially large) content out of `discovered` instead of cloning.
235    let domain_of: BTreeMap<&str, &str> = discovered
236        .iter()
237        .map(|(h, f)| (h.as_str(), f.domain.as_str()))
238        .collect();
239
240    let mut out = Vec::with_capacity(order.len());
241    let mut unused: Vec<UnusedImport> = Vec::new();
242    for hash in &order {
243        let file = &discovered[hash];
244        let mut aliases = BTreeMap::new();
245        aliases.insert(file.domain.clone(), file.domain.clone());
246        for edge in &file.edges {
247            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
248            let bind = edge
249                .alias
250                .clone()
251                .unwrap_or_else(|| child_domain.to_string());
252            match aliases.get(&bind) {
253                Some(existing) if existing != child_domain => {
254                    return Err(CompileError::DomainAliasClash { alias: bind });
255                }
256                _ => {
257                    aliases.insert(bind, child_domain.to_string());
258                }
259            }
260        }
261
262        // The domains this file actually references (each used prefix resolved
263        // against its own domain / imports). An imported domain absent from this
264        // set is an unused import.
265        let referenced: BTreeSet<&str> = file
266            .used_prefixes
267            .iter()
268            .filter_map(|p| match p {
269                None => Some(file.domain.as_str()),
270                Some(name) => aliases.get(name).map(|d| d.as_str()),
271            })
272            .collect();
273        for edge in &file.edges {
274            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
275            if !referenced.contains(child_domain) {
276                unused.push(UnusedImport {
277                    file: file.path.clone(),
278                    domain: child_domain.to_string(),
279                    alias: edge.alias.clone(),
280                    line: edge.line,
281                });
282            }
283        }
284
285        let ctx = DomainCtx {
286            current: file.domain.clone(),
287            aliases,
288        };
289        out.push((hash.clone(), ctx));
290    }
291    unused.sort();
292
293    // Now move content/path out of `discovered` (no large clones) and pair with
294    // the contexts built above.
295    let files = out
296        .into_iter()
297        .map(|(hash, ctx)| {
298            let file = discovered.remove(&hash).expect("hash was discovered");
299            ResolvedFile {
300                path: file.path,
301                content: file.content,
302                ctx,
303            }
304        })
305        .collect();
306    Ok((files, unused))
307}
308
309/// Parse one source, tagging any syntax [`Diagnostics`] with its file label so a
310/// `CompileError::Parse` names the right file. The single spelling of "parse, and
311/// on failure attach the file" — shared by the inline, resolved, and import paths.
312pub(crate) fn parse_tagged<'a>(
313    file: &str,
314    content: &'a str,
315) -> Result<elenchus_parser::Program<'a>, CompileError> {
316    elenchus_parser::parse(content).map_err(|mut diag| {
317        diag.set_file(file);
318        CompileError::Parse(diag)
319    })
320}
321
322/// The single `DOMAIN` a source declares, or an error if it has none or several.
323pub(crate) fn extract_domain(
324    program: &elenchus_parser::Program,
325    source: &str,
326) -> Result<String, CompileError> {
327    let mut found: Option<String> = None;
328    for stmt in &program.statements {
329        if let Statement::Domain(name) = stmt {
330            if found.is_some() {
331                return Err(CompileError::DuplicateDomain {
332                    file: source.to_string(),
333                });
334            }
335            found = Some(name.data.to_string());
336        }
337    }
338    found.ok_or_else(|| CompileError::MissingDomain {
339        file: source.to_string(),
340    })
341}