Skip to main content

lex_extension_host/resolve/
cache.rs

1//! Content-keyed cache for resolved namespaces.
2//!
3//! Each `(canonical_uri, rev)` pair maps to a stable directory under
4//! the cache root. First resolve fetches into the directory; later
5//! resolves with the same key either reuse it (immutable rev or
6//! within TTL for mutable refs) or re-fetch (mutable rev past TTL).
7//!
8//! Layout:
9//!
10//! ```text
11//! <root>/                                      ← $XDG_CACHE_HOME/lex/labels
12//!   <hash>/                                    ← sha256 of "<uri>#<rev>" (lowercased hex)
13//!     .lex-fetched-at                          ← unix ts of last fetch
14//!     <schema files>                           ← whatever the fetcher wrote
15//! ```
16//!
17//! Key derivation uses SHA-256 of the canonical URI + rev so the same
18//! lex.toml resolves to the same cache directory across machines —
19//! the proposal's reproducibility property (§4.4) holds as long as
20//! upstream hasn't moved a tag.
21
22use std::path::{Path, PathBuf};
23use std::time::{Duration, SystemTime};
24
25use sha2::{Digest, Sha256};
26
27use super::fetcher::Fetcher;
28use super::uri::ParsedUri;
29use super::ResolveError;
30
31/// Default TTL for mutable refs (branches, missing `rev`). Per
32/// proposal §4.4: 24 hours, after which the resolver re-fetches.
33pub const DEFAULT_MUTABLE_TTL: Duration = Duration::from_secs(24 * 60 * 60);
34
35/// Filename of the timestamp marker the cache writes after a
36/// successful fetch. Plain text (decimal unix-seconds) so it can be
37/// inspected with `cat`.
38const TIMESTAMP_FILENAME: &str = ".lex-fetched-at";
39
40/// Resolver cache. Stateless aside from the root path + TTL — every
41/// lookup re-reads the filesystem, so multiple processes sharing the
42/// same root see each other's writes immediately (modulo the usual
43/// caveats around concurrent fetches into the same directory; not a
44/// concern at v1 since fetches run serially through `boot_registry`).
45#[derive(Debug, Clone)]
46pub struct ResolverCache {
47    root: PathBuf,
48    mutable_ttl: Duration,
49}
50
51impl ResolverCache {
52    /// Create a cache rooted at `root` with the default 24-hour TTL.
53    /// Creates the directory if it doesn't exist (a missing cache
54    /// directory is normal on first run; not an error).
55    pub fn new(root: impl Into<PathBuf>) -> std::io::Result<Self> {
56        let root = root.into();
57        std::fs::create_dir_all(&root)?;
58        Ok(Self {
59            root,
60            mutable_ttl: DEFAULT_MUTABLE_TTL,
61        })
62    }
63
64    /// Create the per-user cache at `$XDG_CACHE_HOME/lex/labels`,
65    /// falling back to `$HOME/.cache/lex/labels` per XDG conventions.
66    pub fn user_default() -> std::io::Result<Self> {
67        Self::new(Self::default_root())
68    }
69
70    /// Compute the default cache root without touching the
71    /// filesystem. Exposed so [`super::resolve_namespace`] can
72    /// surface the path in its [`ResolveError::CacheIo`] when
73    /// [`Self::user_default`] fails.
74    pub fn default_root() -> PathBuf {
75        if let Ok(xdg) = std::env::var("XDG_CACHE_HOME") {
76            if !xdg.is_empty() {
77                return PathBuf::from(xdg).join("lex").join("labels");
78            }
79        }
80        if let Ok(home) = std::env::var("HOME") {
81            if !home.is_empty() {
82                return PathBuf::from(home)
83                    .join(".cache")
84                    .join("lex")
85                    .join("labels");
86            }
87        }
88        // Last-resort fallback: a per-process temp dir. Better than
89        // panicking; surfaces as "we'll re-fetch every time" which
90        // is degraded but not broken.
91        std::env::temp_dir().join(format!("lex-labels-{}", std::process::id()))
92    }
93
94    /// Override the mutable-rev TTL. Tests use this to force quick
95    /// expiry without sleeping for 24 hours.
96    pub fn with_mutable_ttl(mut self, ttl: Duration) -> Self {
97        self.mutable_ttl = ttl;
98        self
99    }
100
101    /// The cache root.
102    pub fn root(&self) -> &Path {
103        &self.root
104    }
105
106    /// Cache directory for a URI. Deterministic; doesn't touch the
107    /// filesystem.
108    pub fn entry_path(&self, uri: &ParsedUri) -> PathBuf {
109        self.root.join(hash_key(uri))
110    }
111
112    /// Resolve `uri` against the cache, fetching via `fetcher` on a
113    /// cache miss or expired mutable entry. The caller-facing entry
114    /// point used by [`super::resolve_namespace_with`].
115    ///
116    /// Returns the cache directory containing the resolved schema
117    /// (or the `subdir` thereof, if the URI requested one — the
118    /// fetcher is responsible for honouring `subdir`).
119    pub fn fetch_or_reuse(
120        &self,
121        uri: &ParsedUri,
122        fetcher: &dyn Fetcher,
123    ) -> Result<PathBuf, ResolveError> {
124        let entry = self.entry_path(uri);
125
126        // Cache hit path. Two requirements:
127        //
128        //   1. The completion marker (`TIMESTAMP_FILENAME`) exists
129        //      and contains a parseable timestamp. The marker is
130        //      written *only* after a fetcher returns Ok — its
131        //      presence proves the fetch ran to completion, so a
132        //      directory whose fetcher crashed mid-write isn't
133        //      mistaken for a complete entry.
134        //   2. For immutable refs: any complete entry is reusable
135        //      forever. For mutable refs: the marker's timestamp
136        //      must also be within `mutable_ttl`.
137        //
138        // Requiring the marker even for immutable revs is the
139        // partial-fetch defence — without it, a crash mid-fetch
140        // could leave a half-populated directory that the next
141        // resolve would happily reuse as-is for the immutable rev.
142        if entry.is_dir() {
143            if let Some(fetched_at) = read_completion_marker(&entry) {
144                let immutable = fetcher.is_immutable_rev(uri.rev.as_deref());
145                if immutable || self.is_within_ttl(fetched_at) {
146                    return Ok(entry);
147                }
148            }
149        }
150
151        // Miss, stale, or incomplete-from-prior-fetch — fetch fresh.
152        // Wipe the entry first so a partial-write from a previous
153        // failed fetch doesn't get reused via a future code change
154        // that loosens the marker check.
155        if entry.exists() {
156            std::fs::remove_dir_all(&entry).map_err(|source| ResolveError::CacheIo {
157                path: entry.clone(),
158                source,
159            })?;
160        }
161        std::fs::create_dir_all(&entry).map_err(|source| ResolveError::CacheIo {
162            path: entry.clone(),
163            source,
164        })?;
165
166        fetcher.fetch(uri, &entry).map_err(|source| {
167            // Partial-fetch cleanup: a fetcher that wrote some
168            // files then errored leaves a directory without a
169            // completion marker. The next resolve would re-fetch
170            // anyway (no marker → not a hit), but leaving a
171            // partial entry on disk is wasteful and confusing
172            // when users inspect the cache. Best-effort wipe;
173            // ignore errors since we're already returning one.
174            let _ = std::fs::remove_dir_all(&entry);
175            ResolveError::Fetch {
176                uri: uri.original.clone(),
177                source,
178            }
179        })?;
180
181        // Fetch succeeded → drop the completion marker. Failure to
182        // write the marker is non-fatal but degrades subsequent
183        // resolves to re-fetch on every call (entry won't be
184        // recognised as complete). Logging this requires plumbing
185        // a logger into the resolver; out of scope for now.
186        let _ = self.write_timestamp(&entry);
187
188        Ok(entry)
189    }
190
191    /// Check whether `fetched_at` is within the mutable-rev TTL.
192    fn is_within_ttl(&self, fetched_at: u64) -> bool {
193        let Ok(now) = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH) else {
194            return false;
195        };
196        now.as_secs().saturating_sub(fetched_at) < self.mutable_ttl.as_secs()
197    }
198
199    fn write_timestamp(&self, entry: &Path) -> std::io::Result<()> {
200        let now = SystemTime::now()
201            .duration_since(SystemTime::UNIX_EPOCH)
202            .map(|d| d.as_secs())
203            .unwrap_or(0);
204        std::fs::write(entry.join(TIMESTAMP_FILENAME), now.to_string())
205    }
206}
207
208/// Read the completion marker as a Unix timestamp. Returns `None` if
209/// the file is missing OR its contents don't parse as a `u64`
210/// (treated as "no marker"; forces a re-fetch). The marker is the
211/// indicator that a previous fetch ran to completion; immutable-rev
212/// reuse and mutable-rev freshness checks both depend on it.
213fn read_completion_marker(entry: &Path) -> Option<u64> {
214    let stamp = entry.join(TIMESTAMP_FILENAME);
215    let content = std::fs::read_to_string(&stamp).ok()?;
216    content.trim().parse::<u64>().ok()
217}
218
219/// SHA-256 of the URI + rev, lowercased hex. Stable across processes
220/// and machines: same `lex.toml` resolves to the same cache directory
221/// everywhere. Inputs include both the body and the rev so a tag
222/// change doesn't collide with the previous tag's cached content.
223fn hash_key(uri: &ParsedUri) -> String {
224    let mut h = Sha256::new();
225    h.update(uri.scheme.as_bytes());
226    h.update(b":");
227    h.update(uri.body.as_bytes());
228    if let Some(rev) = &uri.rev {
229        h.update(b"#");
230        h.update(rev.as_bytes());
231    }
232    if let Some(subdir) = &uri.subdir {
233        h.update(b"?subdir=");
234        h.update(subdir.as_bytes());
235    }
236    hex_encode(&h.finalize())
237}
238
239fn hex_encode(bytes: &[u8]) -> String {
240    let mut out = String::with_capacity(bytes.len() * 2);
241    for b in bytes {
242        out.push_str(&format!("{b:02x}"));
243    }
244    out
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    fn parse(uri: &str) -> ParsedUri {
252        ParsedUri::parse(uri).unwrap()
253    }
254
255    #[test]
256    fn hash_key_is_deterministic() {
257        let a = hash_key(&parse("github:acme/repo#v1"));
258        let b = hash_key(&parse("github:acme/repo#v1"));
259        assert_eq!(a, b);
260        assert_eq!(a.len(), 64);
261    }
262
263    #[test]
264    fn hash_key_distinguishes_rev() {
265        let a = hash_key(&parse("github:acme/repo#v1"));
266        let b = hash_key(&parse("github:acme/repo#v2"));
267        assert_ne!(a, b);
268    }
269
270    #[test]
271    fn hash_key_distinguishes_scheme() {
272        let a = hash_key(&parse("github:acme/repo"));
273        let b = hash_key(&parse("gitlab:acme/repo"));
274        assert_ne!(a, b);
275    }
276
277    #[test]
278    fn entry_path_is_stable_across_cache_instances() {
279        let tmp = tempfile::tempdir().unwrap();
280        let cache1 = ResolverCache::new(tmp.path()).unwrap();
281        let cache2 = ResolverCache::new(tmp.path()).unwrap();
282        let uri = parse("github:acme/repo#v1");
283        assert_eq!(cache1.entry_path(&uri), cache2.entry_path(&uri));
284    }
285
286    #[test]
287    fn default_root_uses_xdg_cache_home() {
288        let prev_xdg = std::env::var("XDG_CACHE_HOME").ok();
289        let prev_home = std::env::var("HOME").ok();
290        std::env::set_var("XDG_CACHE_HOME", "/tmp/xdg-test");
291        let r = ResolverCache::default_root();
292        assert_eq!(r, PathBuf::from("/tmp/xdg-test/lex/labels"));
293        match prev_xdg {
294            Some(v) => std::env::set_var("XDG_CACHE_HOME", v),
295            None => std::env::remove_var("XDG_CACHE_HOME"),
296        }
297        // Restore HOME just in case other tests rely on it.
298        if let Some(h) = prev_home {
299            std::env::set_var("HOME", h);
300        }
301    }
302
303    /// Mock fetcher: writes a known file into dest. Used by the
304    /// freshness tests to drive the cache without involving real
305    /// network IO.
306    struct MockFetcher;
307
308    impl Fetcher for MockFetcher {
309        fn fetch(&self, _uri: &ParsedUri, dest: &Path) -> Result<(), super::super::FetchError> {
310            std::fs::write(dest.join("schema.yaml"), b"schema_version: 1\nlabel: x.y\n")?;
311            Ok(())
312        }
313        fn schemes(&self) -> &'static [&'static str] {
314            &["mock"]
315        }
316    }
317
318    /// Mock fetcher that counts how many times `fetch` was called.
319    /// Used to verify cache hits don't re-fetch.
320    #[derive(Default)]
321    struct CountingFetcher {
322        calls: std::sync::atomic::AtomicUsize,
323    }
324
325    impl Fetcher for CountingFetcher {
326        fn fetch(&self, _uri: &ParsedUri, dest: &Path) -> Result<(), super::super::FetchError> {
327            self.calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
328            std::fs::write(dest.join("schema.yaml"), b"x")?;
329            Ok(())
330        }
331        fn schemes(&self) -> &'static [&'static str] {
332            &["mock"]
333        }
334    }
335
336    #[test]
337    fn fetch_or_reuse_writes_to_cache_on_miss() {
338        let tmp = tempfile::tempdir().unwrap();
339        let cache = ResolverCache::new(tmp.path()).unwrap();
340        let uri = parse("mock:something#v1");
341        let dir = cache.fetch_or_reuse(&uri, &MockFetcher).unwrap();
342        assert!(dir.starts_with(tmp.path()));
343        assert!(dir.join("schema.yaml").is_file());
344        assert!(dir.join(TIMESTAMP_FILENAME).is_file());
345    }
346
347    #[test]
348    fn fetch_or_reuse_reuses_immutable_entry() {
349        let tmp = tempfile::tempdir().unwrap();
350        let cache = ResolverCache::new(tmp.path()).unwrap();
351        let uri = parse("mock:something#v1");
352        let counter = CountingFetcher::default();
353        // First call fetches.
354        cache.fetch_or_reuse(&uri, &counter).unwrap();
355        // Second call should NOT fetch — but our CountingFetcher
356        // reports mutable rev, so freshness check applies. Wrap it
357        // in an immutable-reporting fetcher to exercise the
358        // is_immutable_rev branch.
359        let immutable = ImmutableCountingFetcher::default();
360        immutable
361            .inner
362            .calls
363            .store(0, std::sync::atomic::Ordering::SeqCst);
364        // Pre-populate cache via the inner counting fetcher first.
365        cache.fetch_or_reuse(&uri, &immutable).unwrap();
366        let after_first = immutable
367            .inner
368            .calls
369            .load(std::sync::atomic::Ordering::SeqCst);
370        cache.fetch_or_reuse(&uri, &immutable).unwrap();
371        let after_second = immutable
372            .inner
373            .calls
374            .load(std::sync::atomic::Ordering::SeqCst);
375        assert_eq!(
376            after_first, after_second,
377            "second call should be a cache hit (immutable rev), got {after_first} → {after_second}"
378        );
379    }
380
381    /// Wraps `CountingFetcher` to report `is_immutable_rev == true`.
382    #[derive(Default)]
383    struct ImmutableCountingFetcher {
384        inner: CountingFetcher,
385    }
386
387    impl Fetcher for ImmutableCountingFetcher {
388        fn fetch(&self, uri: &ParsedUri, dest: &Path) -> Result<(), super::super::FetchError> {
389            self.inner.fetch(uri, dest)
390        }
391        fn schemes(&self) -> &'static [&'static str] {
392            self.inner.schemes()
393        }
394        fn is_immutable_rev(&self, _rev: Option<&str>) -> bool {
395            true
396        }
397    }
398
399    #[test]
400    fn fetch_or_reuse_reuses_mutable_entry_within_ttl() {
401        let tmp = tempfile::tempdir().unwrap();
402        let cache = ResolverCache::new(tmp.path()).unwrap();
403        let uri = parse("mock:something#main");
404        let counter = CountingFetcher::default();
405        cache.fetch_or_reuse(&uri, &counter).unwrap();
406        cache.fetch_or_reuse(&uri, &counter).unwrap();
407        assert_eq!(
408            counter.calls.load(std::sync::atomic::Ordering::SeqCst),
409            1,
410            "second call within TTL should reuse the cached entry"
411        );
412    }
413
414    #[test]
415    fn fetch_or_reuse_refetches_mutable_entry_past_ttl() {
416        let tmp = tempfile::tempdir().unwrap();
417        // Zero-duration TTL — every call past the first is stale.
418        let cache = ResolverCache::new(tmp.path())
419            .unwrap()
420            .with_mutable_ttl(Duration::from_secs(0));
421        let uri = parse("mock:something#main");
422        let counter = CountingFetcher::default();
423        cache.fetch_or_reuse(&uri, &counter).unwrap();
424        // sleep(0) — the saturating_sub still reads 0 < 0 == false
425        // immediately, so the entry is stale on the very next call.
426        cache.fetch_or_reuse(&uri, &counter).unwrap();
427        assert_eq!(
428            counter.calls.load(std::sync::atomic::Ordering::SeqCst),
429            2,
430            "second call past TTL should re-fetch"
431        );
432    }
433
434    #[test]
435    fn fetch_or_reuse_propagates_fetch_errors() {
436        struct FailingFetcher;
437        impl Fetcher for FailingFetcher {
438            fn fetch(
439                &self,
440                _uri: &ParsedUri,
441                _dest: &Path,
442            ) -> Result<(), super::super::FetchError> {
443                Err(super::super::FetchError::Network {
444                    message: "simulated".into(),
445                })
446            }
447            fn schemes(&self) -> &'static [&'static str] {
448                &["mock"]
449            }
450        }
451        let tmp = tempfile::tempdir().unwrap();
452        let cache = ResolverCache::new(tmp.path()).unwrap();
453        let uri = parse("mock:fail");
454        let err = cache.fetch_or_reuse(&uri, &FailingFetcher).unwrap_err();
455        match err {
456            ResolveError::Fetch {
457                source: super::super::FetchError::Network { .. },
458                ..
459            } => {}
460            other => panic!("expected Fetch::Network error, got: {other}"),
461        }
462    }
463
464    /// Regression: a directory left behind by a previous failed
465    /// fetch (no completion marker) must not be mistaken for a
466    /// complete cache entry, even when the rev is immutable.
467    /// Without this, an immutable-rev fetcher that crashed
468    /// mid-write would leave a partial directory that the next
469    /// resolve happily reused forever, since the immutable path
470    /// previously bypassed all completeness checks.
471    #[test]
472    fn fetch_or_reuse_does_not_reuse_partial_entry_for_immutable_rev() {
473        let tmp = tempfile::tempdir().unwrap();
474        let cache = ResolverCache::new(tmp.path()).unwrap();
475        let uri = parse("mock:something#v1");
476
477        // Hand-craft a partial directory: exists, has some content,
478        // but lacks the completion marker (simulating a crashed
479        // mid-write fetch).
480        let entry = cache.entry_path(&uri);
481        std::fs::create_dir_all(&entry).unwrap();
482        std::fs::write(entry.join("partial-thing.yaml"), b"only half written").unwrap();
483        assert!(!entry.join(TIMESTAMP_FILENAME).exists());
484
485        // Now resolve with an immutable-reporting fetcher. The
486        // partial entry must be wiped and re-fetched, not reused.
487        struct ImmutableMockFetcher {
488            called: std::sync::atomic::AtomicUsize,
489        }
490        impl Fetcher for ImmutableMockFetcher {
491            fn fetch(&self, _uri: &ParsedUri, dest: &Path) -> Result<(), super::super::FetchError> {
492                self.called
493                    .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
494                std::fs::write(dest.join("schema.yaml"), b"complete").unwrap();
495                Ok(())
496            }
497            fn schemes(&self) -> &'static [&'static str] {
498                &["mock"]
499            }
500            fn is_immutable_rev(&self, _rev: Option<&str>) -> bool {
501                true
502            }
503        }
504        let fetcher = ImmutableMockFetcher {
505            called: std::sync::atomic::AtomicUsize::new(0),
506        };
507        let dir = cache.fetch_or_reuse(&uri, &fetcher).unwrap();
508        assert_eq!(
509            fetcher.called.load(std::sync::atomic::Ordering::SeqCst),
510            1,
511            "partial entry must be wiped and re-fetched, not reused"
512        );
513        // Partial file is gone; only the completed fetch's output remains.
514        assert!(!dir.join("partial-thing.yaml").exists());
515        assert_eq!(std::fs::read(dir.join("schema.yaml")).unwrap(), b"complete");
516        assert!(dir.join(TIMESTAMP_FILENAME).is_file());
517    }
518
519    /// Regression for the same defence-in-depth: a fetcher that
520    /// writes some content and then errors out must leave a clean
521    /// cache (no partial directory). Subsequent resolves see a
522    /// cache miss (no entry) rather than a partial entry.
523    #[test]
524    fn fetch_or_reuse_wipes_partial_writes_when_fetcher_errors() {
525        struct PartialThenFailFetcher;
526        impl Fetcher for PartialThenFailFetcher {
527            fn fetch(&self, _uri: &ParsedUri, dest: &Path) -> Result<(), super::super::FetchError> {
528                // Write some content, then error.
529                std::fs::write(dest.join("half.yaml"), b"x").unwrap();
530                Err(super::super::FetchError::Network {
531                    message: "interrupted".into(),
532                })
533            }
534            fn schemes(&self) -> &'static [&'static str] {
535                &["mock"]
536            }
537        }
538        let tmp = tempfile::tempdir().unwrap();
539        let cache = ResolverCache::new(tmp.path()).unwrap();
540        let uri = parse("mock:fail#partial");
541        let entry = cache.entry_path(&uri);
542        let _err = cache
543            .fetch_or_reuse(&uri, &PartialThenFailFetcher)
544            .unwrap_err();
545        // The partial write must have been cleaned up.
546        assert!(
547            !entry.exists(),
548            "partial entry should have been removed; still at {}",
549            entry.display()
550        );
551    }
552}