lex_extension_host/resolve/mod.rs
1//! Namespace URI resolver.
2//!
3//! A namespace declaration in `lex.toml` (or a `--ext-schema` flag)
4//! gives the host a URI; the resolver turns that URI into a
5//! filesystem directory the schema loader can scan. The model is
6//! specified in `comms/specs/proposals/extending-lex-stores.lex` and
7//! decomposes into:
8//!
9//! - **Three real transports**:
10//! - `path:` — built-in local filesystem read. Special-cased
11//! upstream of registry dispatch — no [`Fetcher`] impl, no cache.
12//! - `https:` — HTTPS GET of a tarball/zip. Implemented by the
13//! [`fetcher::HttpsFetcher`] in the registry.
14//! - `git:` / `git+ssh:` — git clone of a repository. Implemented
15//! by the [`fetcher::GitFetcher`] in the registry (claims both
16//! schemes).
17//! - **N URL templates** that expand into one of the transports
18//! above before dispatch:
19//! - `github:owner/repo[#rev]` — github tarball (https) or clone (git).
20//! - `gitlab:owner/repo[#rev]` — gitlab archive (https) or clone (git).
21//!
22//! ## Architecture
23//!
24//! The resolver has four layers:
25//!
26//! - **URI parsing** ([`uri::ParsedUri`]) — splits the input string
27//! into `scheme`, `body`, `rev`, `subdir` components. Pure
28//! syntactic, no IO.
29//! - **URL-template expansion** ([`template::expand`]) — pure
30//! functions that rewrite forge-shorthand URIs (`github:`,
31//! `gitlab:`) into transport URIs (`https:`, `git:`). No-op for
32//! URIs already in a transport scheme.
33//! - **Fetchers** ([`Fetcher`] trait + per-transport impls) — each
34//! transport has an implementation that fetches the (expanded) URI's
35//! contents into a caller-provided directory. `path:` is built-in
36//! and special-cased (no network, no cache); the remote transports
37//! are pluggable via the [`FetcherRegistry`].
38//! - **Cache** ([`ResolverCache`]) — content-keyed at
39//! `~/.cache/lex/labels/<hash>/`. Caches fetched directories
40//! indefinitely for immutable refs (tags, SHAs) and for a 24-hour
41//! TTL for mutable refs (branches, `None`). The fetcher tells the
42//! cache which a given `rev` is.
43//!
44//! ## Status
45//!
46//! All three transports ship today. `path:` is built-in and special-
47//! cased upstream of registry dispatch; `https:` uses ureq + tar/zip
48//! extraction (see [`fetcher::HttpsFetcher`]); `git:` / `git+ssh:`
49//! shell out to `git clone --depth=1` (see [`fetcher::GitFetcher`]).
50//! Custom registries can compose alternative or in-process fetchers
51//! via [`FetcherRegistry::register`] — the rest of the pipeline picks
52//! them up without changes.
53
54pub mod cache;
55#[cfg(feature = "https-fetcher")]
56mod extract;
57pub mod fetcher;
58mod path;
59pub mod registry;
60mod template;
61pub mod uri;
62
63use std::path::{Path, PathBuf};
64
65pub use cache::ResolverCache;
66pub use fetcher::{FetchError, Fetcher};
67pub use registry::{default_fetcher_registry, FetcherRegistry};
68pub use uri::{ParsedUri, UriParseError};
69
70/// One resolved namespace: where its schema files live on disk and
71/// the canonical URI it came from. Returned by [`resolve_namespace`]
72/// and [`resolve_namespace_with`].
73#[derive(Debug, Clone)]
74pub struct ResolvedNamespace {
75 /// Directory the [`crate::SchemaLoader`] should scan for `.yaml`
76 /// files.
77 pub schema_dir: PathBuf,
78 /// The URI the resolver was asked about — useful for diagnostics
79 /// that want to remind the user which declaration they're
80 /// looking at.
81 pub source_uri: String,
82}
83
84/// Errors raised by [`resolve_namespace`] and [`resolve_namespace_with`].
85#[derive(Debug)]
86#[non_exhaustive]
87pub enum ResolveError {
88 /// URI didn't match any registered scheme. `scheme` is the actual
89 /// missing scheme — for plain transport URIs that matches the
90 /// scheme of `uri`, but for forge-template URIs (`github:`,
91 /// `gitlab:`) it's the *expanded* transport scheme (typically
92 /// `https`). That's what the diagnostic needs to name so the user
93 /// understands which transport fetcher is missing from the
94 /// registry, not just that the original URI failed.
95 UnknownScheme { uri: String, scheme: String },
96 /// URI failed to parse syntactically (bad fragment, missing
97 /// scheme, …). Distinct from `UnknownScheme`: the URI is
98 /// malformed at the lex layer, not just pointed at a scheme we
99 /// don't know.
100 UriParseError { uri: String, source: UriParseError },
101 /// A `path:` URI pointed at a file that doesn't exist or isn't
102 /// a directory.
103 PathNotADirectory { path: PathBuf },
104 /// `path:` URI resolved to a path that escapes the workspace
105 /// root (relative paths like `../../etc/passwd`). Same
106 /// invariant as the include-resolver — keeps a malicious
107 /// `lex.toml` from pointing at arbitrary system locations.
108 RootEscape { path: PathBuf },
109 /// `path:` resolution failed at the filesystem layer (permission
110 /// denied, broken symlink, …).
111 Io {
112 path: PathBuf,
113 source: std::io::Error,
114 },
115 /// A `path:` URI carried a `#` fragment or `?` query — those
116 /// are remote-only knobs (the resolver uses them on
117 /// `github:`/`gitlab:`/etc. for `rev` and `subdir`). Rejecting
118 /// instead of silently stripping surfaces typos like
119 /// `path:dir#main` (where the user almost certainly meant a
120 /// remote URI).
121 PathUriHasFragmentOrQuery { uri: String },
122 /// A registered fetcher returned an error during the network
123 /// fetch. Wraps the per-fetcher error type for context.
124 Fetch { uri: String, source: FetchError },
125 /// The cache directory couldn't be created or written to.
126 /// Distinct from a fetch IO error: this happens before we even
127 /// call the fetcher.
128 CacheIo {
129 path: PathBuf,
130 source: std::io::Error,
131 },
132}
133
134impl std::fmt::Display for ResolveError {
135 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
136 match self {
137 ResolveError::UnknownScheme { uri, scheme } => {
138 // When the URI's original scheme equals the missing
139 // scheme, no template expansion happened — give the
140 // plain "unknown scheme" phrasing. Otherwise the user
141 // wrote a forge template (`github:`/`gitlab:`) that
142 // expanded into a transport scheme they haven't
143 // registered; say that explicitly so the diagnostic
144 // points at what's actually missing.
145 let user_scheme = uri.split_once(':').map(|(s, _)| s).unwrap_or(uri);
146 if user_scheme == scheme {
147 write!(
148 f,
149 "namespace URI `{uri}` uses transport scheme `{scheme}:` which has no registered fetcher (known: path:, https:, git:, git+ssh:, plus the github:/gitlab: URL templates)"
150 )
151 } else {
152 write!(
153 f,
154 "namespace URI `{uri}` (a `{user_scheme}:` URL template) expands to transport scheme `{scheme}:` which has no registered fetcher (known: path:, https:, git:, git+ssh:)"
155 )
156 }
157 }
158 ResolveError::UriParseError { uri, source } => {
159 write!(f, "namespace URI `{uri}` is malformed: {source}")
160 }
161 ResolveError::PathNotADirectory { path } => write!(
162 f,
163 "namespace URI `path:{}` does not point at an existing directory",
164 path.display()
165 ),
166 ResolveError::RootEscape { path } => write!(
167 f,
168 "namespace URI `path:{}` escapes the workspace root",
169 path.display()
170 ),
171 ResolveError::Io { path, source } => {
172 write!(f, "{}: namespace resolve io error: {source}", path.display())
173 }
174 ResolveError::PathUriHasFragmentOrQuery { uri } => write!(
175 f,
176 "namespace URI `{uri}` is a `path:` scheme but carries `#` or `?` — those are remote-only knobs. Drop the fragment/query, or switch to a remote scheme that supports them."
177 ),
178 ResolveError::Fetch { uri, source } => {
179 write!(f, "namespace URI `{uri}` fetch failed: {source}")
180 }
181 ResolveError::CacheIo { path, source } => write!(
182 f,
183 "cache directory `{}` io error: {source}",
184 path.display()
185 ),
186 }
187 }
188}
189
190impl std::error::Error for ResolveError {
191 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
192 match self {
193 ResolveError::Io { source, .. } => Some(source),
194 ResolveError::UriParseError { source, .. } => Some(source),
195 ResolveError::Fetch { source, .. } => Some(source),
196 ResolveError::CacheIo { source, .. } => Some(source),
197 _ => None,
198 }
199 }
200}
201
202/// Resolve a namespace URI using the default fetcher registry and
203/// cache. Convenience wrapper around [`resolve_namespace_with`] for
204/// callers that don't need to override either.
205///
206/// The default registry ships real fetchers for the `https:` and
207/// `git:` transports (the latter also claims `git+ssh:`). `github:`
208/// and `gitlab:` are URL templates that expand into one of those
209/// transports before dispatch.
210///
211/// The default cache lives at `$XDG_CACHE_HOME/lex/labels/` (falling
212/// back to `~/.cache/lex/labels/` per XDG conventions). Cache
213/// initialisation failures surface as [`ResolveError::CacheIo`].
214pub fn resolve_namespace(
215 uri: &str,
216 workspace_root: &Path,
217) -> Result<ResolvedNamespace, ResolveError> {
218 let registry = default_fetcher_registry();
219 let cache = ResolverCache::user_default().map_err(|source| ResolveError::CacheIo {
220 path: ResolverCache::default_root(),
221 source,
222 })?;
223 resolve_namespace_with(uri, workspace_root, ®istry, &cache)
224}
225
226/// Resolve a namespace URI with an explicit fetcher registry and
227/// cache. Used by [`crate::lex-fmt::boot_registry`] (one cache +
228/// one registry constructed at boot, shared across all namespaces)
229/// and by tests that want a tempdir cache or a custom fetcher.
230///
231/// Dispatch:
232///
233/// 1. Parse the URI ([`ParsedUri::parse`]). `path:` is special-cased
234/// here — it bypasses templates, registry, and cache, resolving
235/// directly against `workspace_root` like a local path.
236/// 2. Run URL-template expansion ([`template::expand`]) on the parsed
237/// URI. Forge shorthands (`github:`, `gitlab:`) become transport
238/// URIs; transport URIs pass through unchanged.
239/// 3. Look up the fetcher for the (expanded) URI's scheme in
240/// `registry`. Return [`ResolveError::UnknownScheme`] if no fetcher
241/// is registered.
242/// 4. Consult `cache` for the URI+rev. If hit (and still valid by
243/// TTL / immutability), return the cached path.
244/// 5. Otherwise call `fetcher.fetch(uri, dest)` with a fresh cache
245/// directory. Record the fetch timestamp in the cache. Return the
246/// path on success.
247pub fn resolve_namespace_with(
248 uri: &str,
249 workspace_root: &Path,
250 registry: &FetcherRegistry,
251 cache: &ResolverCache,
252) -> Result<ResolvedNamespace, ResolveError> {
253 let parsed = ParsedUri::parse(uri).map_err(|source| ResolveError::UriParseError {
254 uri: uri.to_string(),
255 source,
256 })?;
257
258 if parsed.scheme == "path" {
259 return path::resolve(&parsed, uri, workspace_root);
260 }
261
262 let expanded = template::expand(parsed).map_err(|source| ResolveError::UriParseError {
263 uri: uri.to_string(),
264 source,
265 })?;
266
267 let fetcher = registry
268 .get(&expanded.scheme)
269 .ok_or_else(|| ResolveError::UnknownScheme {
270 uri: uri.to_string(),
271 scheme: expanded.scheme.clone(),
272 })?;
273
274 let schema_dir = cache.fetch_or_reuse(&expanded, fetcher.as_ref())?;
275
276 Ok(ResolvedNamespace {
277 schema_dir,
278 source_uri: uri.to_string(),
279 })
280}
281
282#[cfg(test)]
283mod tests {
284 //! Dispatch-level tests. Per-scheme behaviour is covered in the
285 //! submodule tests (uri, path, cache, registry); these exercise
286 //! the public [`resolve_namespace`] / [`resolve_namespace_with`]
287 //! entry points and confirm errors thread through correctly.
288
289 use super::*;
290
291 fn fresh_cache() -> (tempfile::TempDir, ResolverCache) {
292 let tmp = tempfile::tempdir().unwrap();
293 let cache = ResolverCache::new(tmp.path()).unwrap();
294 (tmp, cache)
295 }
296
297 #[test]
298 fn unknown_scheme_yields_typed_error() {
299 let workspace = tempfile::tempdir().unwrap();
300 let registry = default_fetcher_registry();
301 let (_tmp, cache) = fresh_cache();
302 let err = resolve_namespace_with("ftp:server/path", workspace.path(), ®istry, &cache)
303 .unwrap_err();
304 match err {
305 ResolveError::UnknownScheme { uri, scheme } => {
306 assert_eq!(uri, "ftp:server/path");
307 assert_eq!(scheme, "ftp");
308 // Plain transport URI (no template expansion) — the
309 // diagnostic should NOT use the "expands to" phrasing
310 // that's reserved for the template-expansion branch.
311 // (The "known schemes" footer mentions URL templates
312 // either way, so we discriminate on "expands to"
313 // instead.)
314 let msg = format!(
315 "{}",
316 ResolveError::UnknownScheme {
317 uri,
318 scheme: scheme.clone()
319 }
320 );
321 assert!(
322 !msg.contains("expands to"),
323 "plain transport URI shouldn't use template-expansion phrasing: {msg}"
324 );
325 }
326 other => panic!("expected UnknownScheme, got: {other}"),
327 }
328 }
329
330 #[test]
331 fn unknown_scheme_after_template_expansion_names_transport() {
332 // If a custom registry omits `https:`, a `github:` template
333 // expansion still produces an https URI, and the error needs
334 // to say "expands to transport scheme `https:`" rather than
335 // misleadingly claiming `github:` is unknown.
336 let workspace = tempfile::tempdir().unwrap();
337 let registry = FetcherRegistry::new(); // empty — no https registered
338 let (_tmp, cache) = fresh_cache();
339 let err = resolve_namespace_with("github:acme/repo", workspace.path(), ®istry, &cache)
340 .unwrap_err();
341 match err {
342 ResolveError::UnknownScheme { uri, scheme } => {
343 assert_eq!(uri, "github:acme/repo");
344 assert_eq!(scheme, "https", "should report the expanded transport");
345 let msg = format!(
346 "{}",
347 ResolveError::UnknownScheme {
348 uri: uri.clone(),
349 scheme: scheme.clone()
350 }
351 );
352 assert!(
353 msg.contains("expands to") && msg.contains("`https:`"),
354 "template-expansion diagnostic should name the expanded transport: {msg}"
355 );
356 }
357 other => panic!("expected UnknownScheme, got: {other}"),
358 }
359 }
360
361 #[test]
362 fn malformed_uri_yields_parse_error() {
363 let workspace = tempfile::tempdir().unwrap();
364 let registry = default_fetcher_registry();
365 let (_tmp, cache) = fresh_cache();
366 let err =
367 resolve_namespace_with("not-a-uri", workspace.path(), ®istry, &cache).unwrap_err();
368 assert!(matches!(err, ResolveError::UriParseError { .. }));
369 }
370
371 #[test]
372 fn path_uri_dispatches_to_path_module() {
373 let workspace = tempfile::tempdir().unwrap();
374 let dir = workspace.path().join("acme");
375 std::fs::create_dir(&dir).unwrap();
376 let registry = default_fetcher_registry();
377 let (_tmp, cache) = fresh_cache();
378 let resolved =
379 resolve_namespace_with("path:acme", workspace.path(), ®istry, &cache).unwrap();
380 assert_eq!(resolved.schema_dir, dir);
381 }
382
383 #[test]
384 fn convenience_resolve_namespace_works_for_path() {
385 // The convenience entry point uses the default registry +
386 // user-default cache. For path: URIs that don't touch the
387 // cache, this should work even without a real cache dir
388 // (the cache constructor creates ~/.cache/lex/labels if
389 // missing, but the cache isn't consulted for path:).
390 let workspace = tempfile::tempdir().unwrap();
391 let dir = workspace.path().join("acme");
392 std::fs::create_dir(&dir).unwrap();
393 let resolved = resolve_namespace("path:acme", workspace.path()).unwrap();
394 assert_eq!(resolved.schema_dir, dir);
395 }
396}