Skip to main content

entropyx_cli/
cache.rs

1//! Disk-backed caches for content-immutable lookups.
2//!
3//! Two caches live here:
4//!
5//!   - `DiskItemsCache` — parsed public-API items keyed by
6//!     `(blob_sha, Language)`. Blob SHAs are content hashes, so the
7//!     cached parse result is correct forever for the same key.
8//!   - `DiskPrCache` — pull-request metadata keyed by
9//!     `(owner, repo, sha)`. Once a commit is associated with a merged
10//!     PR, that fact is immutable; safe to cache forever.
11//!
12//! Default location: `$XDG_CACHE_HOME/entropyx/`, falling back to
13//! `~/.cache/entropyx/`. Override via `$ENTROPYX_CACHE_DIR`. Cache
14//! files are JSON for inspectability; cache directory is created on
15//! save if missing.
16
17use entropyx_ast::Language;
18use entropyx_tq::PullRequestRef;
19use std::collections::HashMap;
20use std::fs;
21use std::io;
22use std::path::{Path, PathBuf};
23
24const ITEMS_FILE: &str = "items.json";
25const PRS_FILE: &str = "prs.json";
26
27/// Resolve the default cache directory. Returns `None` only when none
28/// of `$ENTROPYX_CACHE_DIR`, `$XDG_CACHE_HOME`, or `$HOME` is set.
29pub fn default_cache_dir() -> Option<PathBuf> {
30    if let Ok(d) = std::env::var("ENTROPYX_CACHE_DIR") {
31        return Some(PathBuf::from(d));
32    }
33    if let Ok(d) = std::env::var("XDG_CACHE_HOME") {
34        return Some(PathBuf::from(d).join("entropyx"));
35    }
36    let home = std::env::var("HOME").ok()?;
37    Some(PathBuf::from(home).join(".cache").join("entropyx"))
38}
39
40fn lang_key(lang: Language) -> &'static str {
41    match lang {
42        Language::Rust => "rust",
43        Language::Go => "go",
44        Language::Python => "python",
45        Language::TypeScript => "typescript",
46        Language::Java => "java",
47        Language::JavaScript => "javascript",
48        Language::Ruby => "ruby",
49        Language::Cpp => "cpp",
50    }
51}
52
53/// SHA-keyed cache of parsed public-API items, persisted as JSON.
54#[derive(Debug, Default)]
55pub struct DiskItemsCache {
56    path: PathBuf,
57    map: HashMap<String, Vec<String>>,
58}
59
60impl DiskItemsCache {
61    /// Load (or initialize empty) at the given file path. Missing or
62    /// corrupt cache files are silently treated as empty so a stale
63    /// cache never blocks a scan.
64    pub fn load_at(path: PathBuf) -> Self {
65        let map = fs::read_to_string(&path)
66            .ok()
67            .and_then(|s| serde_json::from_str(&s).ok())
68            .unwrap_or_default();
69        Self { path, map }
70    }
71
72    /// Convenience: load from the default cache directory.
73    pub fn load_default() -> Self {
74        let path = default_cache_dir()
75            .map(|d| d.join(ITEMS_FILE))
76            .unwrap_or_else(|| PathBuf::from(ITEMS_FILE));
77        Self::load_at(path)
78    }
79
80    pub fn get(&self, sha: &str, lang: Language) -> Option<Vec<String>> {
81        self.map.get(&Self::key(sha, lang)).cloned()
82    }
83
84    pub fn insert(&mut self, sha: String, lang: Language, items: Vec<String>) {
85        self.map.insert(Self::key(&sha, lang), items);
86    }
87
88    pub fn len(&self) -> usize {
89        self.map.len()
90    }
91
92    pub fn is_empty(&self) -> bool {
93        self.map.is_empty()
94    }
95
96    /// Persist to disk. Creates the parent directory if necessary.
97    /// Errors are returned to the caller — callers typically log and
98    /// proceed (a save failure shouldn't crash a successful scan).
99    pub fn save(&self) -> io::Result<()> {
100        if let Some(parent) = self.path.parent() {
101            fs::create_dir_all(parent)?;
102        }
103        let json = serde_json::to_string(&self.map).map_err(io::Error::other)?;
104        fs::write(&self.path, json)
105    }
106
107    pub fn path(&self) -> &Path {
108        &self.path
109    }
110
111    fn key(sha: &str, lang: Language) -> String {
112        format!("{}/{sha}", lang_key(lang))
113    }
114}
115
116/// `(owner, repo, sha)`-keyed cache of pull-request lookups, persisted
117/// as JSON. `None` values are stored explicitly to record "queried,
118/// no PR found" — distinct from "never queried" (cache miss).
119#[derive(Debug, Default)]
120pub struct DiskPrCache {
121    path: PathBuf,
122    map: HashMap<String, Option<PullRequestRef>>,
123}
124
125impl DiskPrCache {
126    pub fn load_at(path: PathBuf) -> Self {
127        let map = fs::read_to_string(&path)
128            .ok()
129            .and_then(|s| serde_json::from_str(&s).ok())
130            .unwrap_or_default();
131        Self { path, map }
132    }
133
134    pub fn load_default() -> Self {
135        let path = default_cache_dir()
136            .map(|d| d.join(PRS_FILE))
137            .unwrap_or_else(|| PathBuf::from(PRS_FILE));
138        Self::load_at(path)
139    }
140
141    /// Three-state lookup:
142    ///   - `Some(Some(pr))` — cached, PR found
143    ///   - `Some(None)` — cached, no PR for this commit (don't re-query)
144    ///   - `None` — not cached, caller should query the network
145    pub fn get(&self, owner: &str, repo: &str, sha: &str) -> Option<Option<PullRequestRef>> {
146        self.map.get(&Self::key(owner, repo, sha)).cloned()
147    }
148
149    pub fn insert(&mut self, owner: &str, repo: &str, sha: &str, pr: Option<PullRequestRef>) {
150        self.map.insert(Self::key(owner, repo, sha), pr);
151    }
152
153    pub fn len(&self) -> usize {
154        self.map.len()
155    }
156
157    pub fn is_empty(&self) -> bool {
158        self.map.is_empty()
159    }
160
161    pub fn save(&self) -> io::Result<()> {
162        if let Some(parent) = self.path.parent() {
163            fs::create_dir_all(parent)?;
164        }
165        let json = serde_json::to_string(&self.map).map_err(io::Error::other)?;
166        fs::write(&self.path, json)
167    }
168
169    pub fn path(&self) -> &Path {
170        &self.path
171    }
172
173    fn key(owner: &str, repo: &str, sha: &str) -> String {
174        format!("{owner}/{repo}/{sha}")
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181    use tempfile::tempdir;
182
183    #[test]
184    fn items_cache_round_trips() {
185        let td = tempdir().expect("tempdir");
186        let path = td.path().join("items.json");
187
188        {
189            let mut c = DiskItemsCache::load_at(path.clone());
190            c.insert(
191                "abc123".to_string(),
192                Language::Rust,
193                vec!["fn:foo/2".to_string(), "struct:Bar".to_string()],
194            );
195            c.insert(
196                "def456".to_string(),
197                Language::Go,
198                vec!["fn:Hello".to_string()],
199            );
200            c.save().expect("save");
201        }
202
203        let c = DiskItemsCache::load_at(path);
204        assert_eq!(
205            c.get("abc123", Language::Rust),
206            Some(vec!["fn:foo/2".to_string(), "struct:Bar".to_string()]),
207        );
208        assert_eq!(
209            c.get("def456", Language::Go),
210            Some(vec!["fn:Hello".to_string()])
211        );
212        // Wrong language for an existing SHA is a miss.
213        assert_eq!(c.get("abc123", Language::Go), None);
214        // Unknown SHA is a miss.
215        assert_eq!(c.get("missing", Language::Rust), None);
216    }
217
218    #[test]
219    fn items_cache_handles_corrupt_file_as_empty() {
220        let td = tempdir().expect("tempdir");
221        let path = td.path().join("items.json");
222        fs::write(&path, "{ not valid json").unwrap();
223
224        let c = DiskItemsCache::load_at(path);
225        assert!(c.is_empty(), "corrupt file → empty cache, no panic");
226    }
227
228    #[test]
229    fn items_cache_handles_missing_file_as_empty() {
230        let td = tempdir().expect("tempdir");
231        let path = td.path().join("does_not_exist.json");
232        let c = DiskItemsCache::load_at(path);
233        assert!(c.is_empty());
234    }
235
236    #[test]
237    fn items_cache_save_creates_parent_dir() {
238        let td = tempdir().expect("tempdir");
239        let path = td.path().join("nested").join("subdir").join("items.json");
240        let mut c = DiskItemsCache::load_at(path.clone());
241        c.insert(
242            "abc".to_string(),
243            Language::Rust,
244            vec!["fn:x/0".to_string()],
245        );
246        c.save().expect("save creates parents");
247        assert!(path.exists(), "file written");
248    }
249
250    #[test]
251    fn pr_cache_distinguishes_no_pr_from_unknown() {
252        let td = tempdir().expect("tempdir");
253        let path = td.path().join("prs.json");
254
255        let pr = PullRequestRef {
256            number: 42,
257            title: "fix: thing".to_string(),
258            state: "closed".to_string(),
259            merged: true,
260            merged_at: Some("2026-04-01T12:00:00Z".to_string()),
261            author: Some("alice".to_string()),
262        };
263
264        {
265            let mut c = DiskPrCache::load_at(path.clone());
266            c.insert("acme", "widgets", "with_pr_sha", Some(pr.clone()));
267            c.insert("acme", "widgets", "direct_push_sha", None);
268            c.save().expect("save");
269        }
270
271        let c = DiskPrCache::load_at(path);
272        assert_eq!(c.get("acme", "widgets", "with_pr_sha"), Some(Some(pr)));
273        // Cached "no PR" → don't re-query the network.
274        assert_eq!(c.get("acme", "widgets", "direct_push_sha"), Some(None));
275        // Truly unknown → caller must hit network.
276        assert_eq!(c.get("acme", "widgets", "unknown_sha"), None);
277    }
278
279    #[test]
280    fn default_cache_dir_uses_explicit_override() {
281        // SAFETY: see note in entropyx-github tests; env vars are
282        // process-global and the parallel-test risk is accepted for v0.1.
283        let prior = std::env::var("ENTROPYX_CACHE_DIR").ok();
284        unsafe {
285            std::env::set_var("ENTROPYX_CACHE_DIR", "/tmp/entropyx-test-cache");
286        }
287        assert_eq!(
288            default_cache_dir(),
289            Some(PathBuf::from("/tmp/entropyx-test-cache")),
290        );
291        unsafe {
292            std::env::remove_var("ENTROPYX_CACHE_DIR");
293        }
294        if let Some(v) = prior {
295            unsafe {
296                std::env::set_var("ENTROPYX_CACHE_DIR", v);
297            }
298        }
299    }
300}