Skip to main content

lex_extension_host/resolve/
mod.rs

1//! Namespace URI resolver.
2//!
3//! A namespace declaration in `lex.toml` (or a `--ext-schema` flag)
4//! gives the host a URI; the resolver turns that URI into a
5//! filesystem directory the schema loader can scan. The model is
6//! specified in `comms/specs/proposals/extending-lex-stores.lex` and
7//! decomposes into:
8//!
9//! - **Three real transports**:
10//!   - `path:` — built-in local filesystem read. Special-cased
11//!     upstream of registry dispatch — no [`Fetcher`] impl, no cache.
12//!   - `https:` — HTTPS GET of a tarball/zip. Implemented by the
13//!     [`fetcher::HttpsFetcher`] in the registry.
14//!   - `git:` / `git+ssh:` — git clone of a repository. Implemented
15//!     by the [`fetcher::GitFetcher`] in the registry (claims both
16//!     schemes).
17//! - **N URL templates** that expand into one of the transports
18//!   above before dispatch:
19//!   - `github:owner/repo[#rev]` — github tarball (https) or clone (git).
20//!   - `gitlab:owner/repo[#rev]` — gitlab archive (https) or clone (git).
21//!
22//! ## Architecture
23//!
24//! The resolver has four layers:
25//!
26//! - **URI parsing** ([`uri::ParsedUri`]) — splits the input string
27//!   into `scheme`, `body`, `rev`, `subdir` components. Pure
28//!   syntactic, no IO.
29//! - **URL-template expansion** ([`template::expand`]) — pure
30//!   functions that rewrite forge-shorthand URIs (`github:`,
31//!   `gitlab:`) into transport URIs (`https:`, `git:`). No-op for
32//!   URIs already in a transport scheme.
33//! - **Fetchers** ([`Fetcher`] trait + per-transport impls) — each
34//!   transport has an implementation that fetches the (expanded) URI's
35//!   contents into a caller-provided directory. `path:` is built-in
36//!   and special-cased (no network, no cache); the remote transports
37//!   are pluggable via the [`FetcherRegistry`].
38//! - **Cache** ([`ResolverCache`]) — content-keyed at
39//!   `~/.cache/lex/labels/<hash>/`. Caches fetched directories
40//!   indefinitely for immutable refs (tags, SHAs) and for a 24-hour
41//!   TTL for mutable refs (branches, `None`). The fetcher tells the
42//!   cache which a given `rev` is.
43//!
44//! ## Status
45//!
46//! All three transports ship today. `path:` is built-in and special-
47//! cased upstream of registry dispatch; `https:` uses ureq + tar/zip
48//! extraction (see [`fetcher::HttpsFetcher`]); `git:` / `git+ssh:`
49//! shell out to `git clone --depth=1` (see [`fetcher::GitFetcher`]).
50//! Custom registries can compose alternative or in-process fetchers
51//! via [`FetcherRegistry::register`] — the rest of the pipeline picks
52//! them up without changes.
53
54pub mod cache;
55#[cfg(feature = "https-fetcher")]
56mod extract;
57pub mod fetcher;
58mod path;
59pub mod registry;
60mod template;
61pub mod uri;
62
63use std::path::{Path, PathBuf};
64
65pub use cache::ResolverCache;
66pub use fetcher::{FetchError, Fetcher};
67pub use registry::{default_fetcher_registry, FetcherRegistry};
68pub use uri::{ParsedUri, UriParseError};
69
70/// One resolved namespace: where its schema files live on disk and
71/// the canonical URI it came from. Returned by [`resolve_namespace`]
72/// and [`resolve_namespace_with`].
73#[derive(Debug, Clone)]
74pub struct ResolvedNamespace {
75    /// Directory the [`crate::SchemaLoader`] should scan for `.yaml`
76    /// files.
77    pub schema_dir: PathBuf,
78    /// The URI the resolver was asked about — useful for diagnostics
79    /// that want to remind the user which declaration they're
80    /// looking at.
81    pub source_uri: String,
82}
83
84/// Errors raised by [`resolve_namespace`] and [`resolve_namespace_with`].
85#[derive(Debug)]
86#[non_exhaustive]
87pub enum ResolveError {
88    /// URI didn't match any registered scheme. `scheme` is the actual
89    /// missing scheme — for plain transport URIs that matches the
90    /// scheme of `uri`, but for forge-template URIs (`github:`,
91    /// `gitlab:`) it's the *expanded* transport scheme (typically
92    /// `https`). That's what the diagnostic needs to name so the user
93    /// understands which transport fetcher is missing from the
94    /// registry, not just that the original URI failed.
95    UnknownScheme { uri: String, scheme: String },
96    /// URI failed to parse syntactically (bad fragment, missing
97    /// scheme, …). Distinct from `UnknownScheme`: the URI is
98    /// malformed at the lex layer, not just pointed at a scheme we
99    /// don't know.
100    UriParseError { uri: String, source: UriParseError },
101    /// A `path:` URI pointed at a file that doesn't exist or isn't
102    /// a directory.
103    PathNotADirectory { path: PathBuf },
104    /// `path:` URI resolved to a path that escapes the workspace
105    /// root (relative paths like `../../etc/passwd`). Same
106    /// invariant as the include-resolver — keeps a malicious
107    /// `lex.toml` from pointing at arbitrary system locations.
108    RootEscape { path: PathBuf },
109    /// `path:` resolution failed at the filesystem layer (permission
110    /// denied, broken symlink, …).
111    Io {
112        path: PathBuf,
113        source: std::io::Error,
114    },
115    /// A `path:` URI carried a `#` fragment or `?` query — those
116    /// are remote-only knobs (the resolver uses them on
117    /// `github:`/`gitlab:`/etc. for `rev` and `subdir`). Rejecting
118    /// instead of silently stripping surfaces typos like
119    /// `path:dir#main` (where the user almost certainly meant a
120    /// remote URI).
121    PathUriHasFragmentOrQuery { uri: String },
122    /// A registered fetcher returned an error during the network
123    /// fetch. Wraps the per-fetcher error type for context.
124    Fetch { uri: String, source: FetchError },
125    /// The cache directory couldn't be created or written to.
126    /// Distinct from a fetch IO error: this happens before we even
127    /// call the fetcher.
128    CacheIo {
129        path: PathBuf,
130        source: std::io::Error,
131    },
132}
133
134impl std::fmt::Display for ResolveError {
135    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
136        match self {
137            ResolveError::UnknownScheme { uri, scheme } => {
138                // When the URI's original scheme equals the missing
139                // scheme, no template expansion happened — give the
140                // plain "unknown scheme" phrasing. Otherwise the user
141                // wrote a forge template (`github:`/`gitlab:`) that
142                // expanded into a transport scheme they haven't
143                // registered; say that explicitly so the diagnostic
144                // points at what's actually missing.
145                let user_scheme = uri.split_once(':').map(|(s, _)| s).unwrap_or(uri);
146                if user_scheme == scheme {
147                    write!(
148                        f,
149                        "namespace URI `{uri}` uses transport scheme `{scheme}:` which has no registered fetcher (known: path:, https:, git:, git+ssh:, plus the github:/gitlab: URL templates)"
150                    )
151                } else {
152                    write!(
153                        f,
154                        "namespace URI `{uri}` (a `{user_scheme}:` URL template) expands to transport scheme `{scheme}:` which has no registered fetcher (known: path:, https:, git:, git+ssh:)"
155                    )
156                }
157            }
158            ResolveError::UriParseError { uri, source } => {
159                write!(f, "namespace URI `{uri}` is malformed: {source}")
160            }
161            ResolveError::PathNotADirectory { path } => write!(
162                f,
163                "namespace URI `path:{}` does not point at an existing directory",
164                path.display()
165            ),
166            ResolveError::RootEscape { path } => write!(
167                f,
168                "namespace URI `path:{}` escapes the workspace root",
169                path.display()
170            ),
171            ResolveError::Io { path, source } => {
172                write!(f, "{}: namespace resolve io error: {source}", path.display())
173            }
174            ResolveError::PathUriHasFragmentOrQuery { uri } => write!(
175                f,
176                "namespace URI `{uri}` is a `path:` scheme but carries `#` or `?` — those are remote-only knobs. Drop the fragment/query, or switch to a remote scheme that supports them."
177            ),
178            ResolveError::Fetch { uri, source } => {
179                write!(f, "namespace URI `{uri}` fetch failed: {source}")
180            }
181            ResolveError::CacheIo { path, source } => write!(
182                f,
183                "cache directory `{}` io error: {source}",
184                path.display()
185            ),
186        }
187    }
188}
189
190impl std::error::Error for ResolveError {
191    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
192        match self {
193            ResolveError::Io { source, .. } => Some(source),
194            ResolveError::UriParseError { source, .. } => Some(source),
195            ResolveError::Fetch { source, .. } => Some(source),
196            ResolveError::CacheIo { source, .. } => Some(source),
197            _ => None,
198        }
199    }
200}
201
202/// Resolve a namespace URI using the default fetcher registry and
203/// cache. Convenience wrapper around [`resolve_namespace_with`] for
204/// callers that don't need to override either.
205///
206/// The default registry ships real fetchers for the `https:` and
207/// `git:` transports (the latter also claims `git+ssh:`). `github:`
208/// and `gitlab:` are URL templates that expand into one of those
209/// transports before dispatch.
210///
211/// The default cache lives at `$XDG_CACHE_HOME/lex/labels/` (falling
212/// back to `~/.cache/lex/labels/` per XDG conventions). Cache
213/// initialisation failures surface as [`ResolveError::CacheIo`].
214pub fn resolve_namespace(
215    uri: &str,
216    workspace_root: &Path,
217) -> Result<ResolvedNamespace, ResolveError> {
218    let registry = default_fetcher_registry();
219    let cache = ResolverCache::user_default().map_err(|source| ResolveError::CacheIo {
220        path: ResolverCache::default_root(),
221        source,
222    })?;
223    resolve_namespace_with(uri, workspace_root, &registry, &cache)
224}
225
226/// Resolve a namespace URI with an explicit fetcher registry and
227/// cache. Used by [`crate::lex-fmt::boot_registry`] (one cache +
228/// one registry constructed at boot, shared across all namespaces)
229/// and by tests that want a tempdir cache or a custom fetcher.
230///
231/// Dispatch:
232///
233/// 1. Parse the URI ([`ParsedUri::parse`]). `path:` is special-cased
234///    here — it bypasses templates, registry, and cache, resolving
235///    directly against `workspace_root` like a local path.
236/// 2. Run URL-template expansion ([`template::expand`]) on the parsed
237///    URI. Forge shorthands (`github:`, `gitlab:`) become transport
238///    URIs; transport URIs pass through unchanged.
239/// 3. Look up the fetcher for the (expanded) URI's scheme in
240///    `registry`. Return [`ResolveError::UnknownScheme`] if no fetcher
241///    is registered.
242/// 4. Consult `cache` for the URI+rev. If hit (and still valid by
243///    TTL / immutability), return the cached path.
244/// 5. Otherwise call `fetcher.fetch(uri, dest)` with a fresh cache
245///    directory. Record the fetch timestamp in the cache. Return the
246///    path on success.
247pub fn resolve_namespace_with(
248    uri: &str,
249    workspace_root: &Path,
250    registry: &FetcherRegistry,
251    cache: &ResolverCache,
252) -> Result<ResolvedNamespace, ResolveError> {
253    let parsed = ParsedUri::parse(uri).map_err(|source| ResolveError::UriParseError {
254        uri: uri.to_string(),
255        source,
256    })?;
257
258    if parsed.scheme == "path" {
259        return path::resolve(&parsed, uri, workspace_root);
260    }
261
262    let expanded = template::expand(parsed).map_err(|source| ResolveError::UriParseError {
263        uri: uri.to_string(),
264        source,
265    })?;
266
267    let fetcher = registry
268        .get(&expanded.scheme)
269        .ok_or_else(|| ResolveError::UnknownScheme {
270            uri: uri.to_string(),
271            scheme: expanded.scheme.clone(),
272        })?;
273
274    let schema_dir = cache.fetch_or_reuse(&expanded, fetcher.as_ref())?;
275
276    Ok(ResolvedNamespace {
277        schema_dir,
278        source_uri: uri.to_string(),
279    })
280}
281
282#[cfg(test)]
283mod tests {
284    //! Dispatch-level tests. Per-scheme behaviour is covered in the
285    //! submodule tests (uri, path, cache, registry); these exercise
286    //! the public [`resolve_namespace`] / [`resolve_namespace_with`]
287    //! entry points and confirm errors thread through correctly.
288
289    use super::*;
290
291    fn fresh_cache() -> (tempfile::TempDir, ResolverCache) {
292        let tmp = tempfile::tempdir().unwrap();
293        let cache = ResolverCache::new(tmp.path()).unwrap();
294        (tmp, cache)
295    }
296
297    #[test]
298    fn unknown_scheme_yields_typed_error() {
299        let workspace = tempfile::tempdir().unwrap();
300        let registry = default_fetcher_registry();
301        let (_tmp, cache) = fresh_cache();
302        let err = resolve_namespace_with("ftp:server/path", workspace.path(), &registry, &cache)
303            .unwrap_err();
304        match err {
305            ResolveError::UnknownScheme { uri, scheme } => {
306                assert_eq!(uri, "ftp:server/path");
307                assert_eq!(scheme, "ftp");
308                // Plain transport URI (no template expansion) — the
309                // diagnostic should NOT use the "expands to" phrasing
310                // that's reserved for the template-expansion branch.
311                // (The "known schemes" footer mentions URL templates
312                // either way, so we discriminate on "expands to"
313                // instead.)
314                let msg = format!(
315                    "{}",
316                    ResolveError::UnknownScheme {
317                        uri,
318                        scheme: scheme.clone()
319                    }
320                );
321                assert!(
322                    !msg.contains("expands to"),
323                    "plain transport URI shouldn't use template-expansion phrasing: {msg}"
324                );
325            }
326            other => panic!("expected UnknownScheme, got: {other}"),
327        }
328    }
329
330    #[test]
331    fn unknown_scheme_after_template_expansion_names_transport() {
332        // If a custom registry omits `https:`, a `github:` template
333        // expansion still produces an https URI, and the error needs
334        // to say "expands to transport scheme `https:`" rather than
335        // misleadingly claiming `github:` is unknown.
336        let workspace = tempfile::tempdir().unwrap();
337        let registry = FetcherRegistry::new(); // empty — no https registered
338        let (_tmp, cache) = fresh_cache();
339        let err = resolve_namespace_with("github:acme/repo", workspace.path(), &registry, &cache)
340            .unwrap_err();
341        match err {
342            ResolveError::UnknownScheme { uri, scheme } => {
343                assert_eq!(uri, "github:acme/repo");
344                assert_eq!(scheme, "https", "should report the expanded transport");
345                let msg = format!(
346                    "{}",
347                    ResolveError::UnknownScheme {
348                        uri: uri.clone(),
349                        scheme: scheme.clone()
350                    }
351                );
352                assert!(
353                    msg.contains("expands to") && msg.contains("`https:`"),
354                    "template-expansion diagnostic should name the expanded transport: {msg}"
355                );
356            }
357            other => panic!("expected UnknownScheme, got: {other}"),
358        }
359    }
360
361    #[test]
362    fn malformed_uri_yields_parse_error() {
363        let workspace = tempfile::tempdir().unwrap();
364        let registry = default_fetcher_registry();
365        let (_tmp, cache) = fresh_cache();
366        let err =
367            resolve_namespace_with("not-a-uri", workspace.path(), &registry, &cache).unwrap_err();
368        assert!(matches!(err, ResolveError::UriParseError { .. }));
369    }
370
371    #[test]
372    fn path_uri_dispatches_to_path_module() {
373        let workspace = tempfile::tempdir().unwrap();
374        let dir = workspace.path().join("acme");
375        std::fs::create_dir(&dir).unwrap();
376        let registry = default_fetcher_registry();
377        let (_tmp, cache) = fresh_cache();
378        let resolved =
379            resolve_namespace_with("path:acme", workspace.path(), &registry, &cache).unwrap();
380        assert_eq!(resolved.schema_dir, dir);
381    }
382
383    #[test]
384    fn convenience_resolve_namespace_works_for_path() {
385        // The convenience entry point uses the default registry +
386        // user-default cache. For path: URIs that don't touch the
387        // cache, this should work even without a real cache dir
388        // (the cache constructor creates ~/.cache/lex/labels if
389        // missing, but the cache isn't consulted for path:).
390        let workspace = tempfile::tempdir().unwrap();
391        let dir = workspace.path().join("acme");
392        std::fs::create_dir(&dir).unwrap();
393        let resolved = resolve_namespace("path:acme", workspace.path()).unwrap();
394        assert_eq!(resolved.schema_dir, dir);
395    }
396}