Skip to main content

open_loops/
inventory.rs

1//! SHA-validated ahead/behind memo store.
2//!
3//! One JSON file per git common-dir lives at
4//! `~/.open-loops/inventory/<fnv64hex>.json`. The heavy git phase
5//! (`rev-list`) is memoised per `(branch, head_sha, ab_base_sha)` pair.
6//! Reads are tolerant; writes are atomic (tmp → rename).
7use anyhow::{Context, Result};
8use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10use std::path::{Path, PathBuf};
11
12const INVENTORY_EXT: &str = "json";
13
14/// One memoised ahead/behind entry for a branch.
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
16pub struct LoopMemo {
17    pub branch: String,
18    /// HEAD SHA of the branch at the time of the computation.
19    pub head_sha: String,
20    /// HEAD SHA of the default branch at the time of the computation.
21    pub ab_base_sha: String,
22    pub ahead: u32,
23    pub behind: u32,
24}
25
26/// Per-repo inventory file serialised to JSON.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct InventoryFile {
29    /// Absolute path of the repo root used for identity confirmation.
30    pub repo_path: PathBuf,
31    /// Timestamp of the last write (used for TTL validation).
32    pub indexed_at: DateTime<Utc>,
33    /// Memoised entries, one per unmerged branch.
34    pub loops: Vec<LoopMemo>,
35}
36
37/// Thin wrapper around the inventory directory.
38#[derive(Debug, Clone)]
39pub struct InventoryStore {
40    /// Directory containing `<hash>.json` files.
41    pub dir: PathBuf,
42}
43
44impl InventoryStore {
45    /// Creates a store whose directory is `<base>/inventory`.
46    pub fn new(base: &Path) -> Self {
47        Self {
48            dir: base.join("inventory"),
49        }
50    }
51
52    /// Loads the inventory file for `hash`, or `None` when absent or corrupt.
53    pub fn load(&self, hash: &str) -> Option<InventoryFile> {
54        let path = path_for_hash(&self.dir, hash);
55        let raw = std::fs::read_to_string(&path).ok()?;
56        match serde_json::from_str::<InventoryFile>(&raw) {
57            Ok(f) => Some(f),
58            Err(e) => {
59                eprintln!(
60                    "warning: corrupt inventory file {}: {e:#}; ignoring",
61                    path.display()
62                );
63                None
64            }
65        }
66    }
67
68    /// Atomically writes `file` to `<dir>/<hash>.json` via a tmp file + rename.
69    pub fn save(&self, hash: &str, file: &InventoryFile) -> Result<()> {
70        std::fs::create_dir_all(&self.dir)
71            .with_context(|| format!("creating inventory dir {}", self.dir.display()))?;
72        let final_path = path_for_hash(&self.dir, hash);
73        let tmp_path = tmp_path_for_hash(&self.dir, hash);
74        let json = serde_json::to_string_pretty(file).context("serialising inventory file")?;
75        std::fs::write(&tmp_path, &json)
76            .with_context(|| format!("writing tmp inventory {}", tmp_path.display()))?;
77        std::fs::rename(&tmp_path, &final_path)
78            .with_context(|| format!("renaming inventory tmp to {}", final_path.display()))?;
79        Ok(())
80    }
81
82    /// Reclaims inventory files whose `repo_path` no longer exists on disk.
83    ///
84    /// This is a global garbage-collect, intentionally NOT scoped to the current
85    /// refresh query: a repo gone from disk is an orphan regardless of which
86    /// query triggered the refresh, so its stale memo is always removed. Repos
87    /// that are merely outside the query but still present on disk are kept —
88    /// they are not orphans. Removal is self-healing: a returning repo is simply
89    /// recomputed on the next scan.
90    ///
91    /// Called lazily from `loops refresh` only (ADR 0004 pattern).
92    pub fn prune_orphans(&self) -> Result<()> {
93        if !self.dir.exists() {
94            return Ok(());
95        }
96        for entry in std::fs::read_dir(&self.dir)
97            .with_context(|| format!("reading inventory dir {}", self.dir.display()))?
98            .flatten()
99        {
100            let path = entry.path();
101            if path.extension().is_none_or(|e| e != INVENTORY_EXT) {
102                continue;
103            }
104            let stem = path
105                .file_stem()
106                .map(|s| s.to_string_lossy().into_owned())
107                .unwrap_or_default();
108            // Skip tmp files that lack a proper stem.
109            if stem.starts_with('.') {
110                continue;
111            }
112            // Parse directly rather than via `load` (which warns), so an
113            // unreadable file yields only this one line — not a contradictory
114            // `corrupt …; ignoring` followed by `removed …`. A loadable file
115            // whose repo is gone is an orphan; an unreadable file can't prove its
116            // repo exists, so reclaim it too, labelled honestly.
117            let parsed = std::fs::read_to_string(&path)
118                .ok()
119                .and_then(|raw| serde_json::from_str::<InventoryFile>(&raw).ok());
120            let reason = match parsed {
121                Some(f) if f.repo_path.exists() => continue,
122                Some(_) => "orphan",
123                None => "unreadable",
124            };
125            match std::fs::remove_file(&path) {
126                Ok(()) => eprintln!("warning: removed {reason} inventory {}", path.display()),
127                // A concurrent prune already removed it — not worth a warning.
128                Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
129                Err(e) => eprintln!(
130                    "warning: failed to remove {reason} inventory {}: {e:#}",
131                    path.display()
132                ),
133            }
134        }
135        Ok(())
136    }
137}
138
139/// FNV-1a 64-bit hash of the absolute common-dir path, returned as 16 hex chars.
140///
141/// Using FNV-1a avoids adding a new crate dependency. The hash is stable across
142/// processes as long as the path string representation is identical.
143pub fn common_dir_hash(common_dir: &Path) -> String {
144    const FNV_OFFSET: u64 = 14_695_981_039_346_656_037;
145    const FNV_PRIME: u64 = 1_099_511_628_211;
146    let mut hash = FNV_OFFSET;
147    for byte in common_dir.to_string_lossy().bytes() {
148        hash ^= u64::from(byte);
149        hash = hash.wrapping_mul(FNV_PRIME);
150    }
151    format!("{hash:016x}")
152}
153
154/// Returns the path for a given hash in `dir`.
155pub(crate) fn path_for_hash(dir: &Path, hash: &str) -> PathBuf {
156    dir.join(format!("{hash}.{INVENTORY_EXT}"))
157}
158
159/// Per-process temporary path used by [`InventoryStore::save`].
160///
161/// The pid keeps the tmp name unique so two `loops` processes writing the same
162/// repo never race on one tmp file. The atomic rename already guarantees a
163/// reader never sees a partial file, but a *shared* tmp name made one writer's
164/// rename fail with ENOENT after the other renamed it away. Extension stays
165/// `tmp` (not `json`) so prune and listing skip it.
166fn tmp_path_for_hash(dir: &Path, hash: &str) -> PathBuf {
167    dir.join(format!(".{hash}.{}.json.tmp", std::process::id()))
168}
169
170/// Looks up the cached ahead/behind for a branch, validating SHA keys and TTL.
171///
172/// Returns `None` when:
173/// - No matching entry exists.
174/// - `ttl_secs > 0` and the file is older than the TTL.
175pub fn lookup_ahead_behind(
176    file: &InventoryFile,
177    branch: &str,
178    head_sha: &str,
179    ab_base_sha: &str,
180    ttl_secs: u64,
181    now: DateTime<Utc>,
182) -> Option<(u32, u32)> {
183    if ttl_secs > 0 {
184        let age_secs = now.signed_duration_since(file.indexed_at).num_seconds();
185        if age_secs < 0 || age_secs as u64 > ttl_secs {
186            return None;
187        }
188    }
189    file.loops
190        .iter()
191        .find(|m| m.branch == branch && m.head_sha == head_sha && m.ab_base_sha == ab_base_sha)
192        .map(|m| (m.ahead, m.behind))
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198    use std::path::PathBuf;
199
200    fn make_file(repo_path: &str, memos: Vec<LoopMemo>) -> InventoryFile {
201        InventoryFile {
202            repo_path: PathBuf::from(repo_path),
203            indexed_at: Utc::now(),
204            loops: memos,
205        }
206    }
207
208    fn make_memo(branch: &str, head: &str, base: &str, ahead: u32, behind: u32) -> LoopMemo {
209        LoopMemo {
210            branch: branch.to_string(),
211            head_sha: head.to_string(),
212            ab_base_sha: base.to_string(),
213            ahead,
214            behind,
215        }
216    }
217
218    #[test]
219    fn common_dir_hash_is_16_hex_chars() {
220        let h = common_dir_hash(std::path::Path::new("/home/user/proj/.git"));
221        assert_eq!(h.len(), 16);
222        assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
223    }
224
225    #[test]
226    fn common_dir_hash_is_deterministic() {
227        let p = std::path::Path::new("/home/user/proj/.git");
228        assert_eq!(common_dir_hash(p), common_dir_hash(p));
229    }
230
231    #[test]
232    fn common_dir_hash_differs_for_different_paths() {
233        let a = common_dir_hash(std::path::Path::new("/a/.git"));
234        let b = common_dir_hash(std::path::Path::new("/b/.git"));
235        assert_ne!(a, b);
236    }
237
238    #[test]
239    fn path_for_hash_joins_correctly() {
240        let p = path_for_hash(std::path::Path::new("/inv"), "abc123");
241        assert_eq!(p, PathBuf::from("/inv/abc123.json"));
242    }
243
244    #[test]
245    fn lookup_returns_none_for_unknown_branch() {
246        let file = make_file("/repo", vec![make_memo("main", "aaa", "bbb", 1, 0)]);
247        let result = lookup_ahead_behind(&file, "feat/x", "aaa", "bbb", 0, Utc::now());
248        assert!(result.is_none());
249    }
250
251    #[test]
252    fn lookup_returns_values_when_shas_match() {
253        let file = make_file("/repo", vec![make_memo("feat/x", "head1", "base1", 3, 1)]);
254        let result = lookup_ahead_behind(&file, "feat/x", "head1", "base1", 0, Utc::now());
255        assert_eq!(result, Some((3, 1)));
256    }
257
258    #[test]
259    fn lookup_returns_none_when_head_sha_changed() {
260        let file = make_file("/repo", vec![make_memo("feat/x", "head1", "base1", 3, 1)]);
261        let result = lookup_ahead_behind(&file, "feat/x", "head2", "base1", 0, Utc::now());
262        assert!(result.is_none());
263    }
264
265    #[test]
266    fn lookup_returns_none_when_base_sha_changed() {
267        let file = make_file("/repo", vec![make_memo("feat/x", "head1", "base1", 3, 1)]);
268        let result = lookup_ahead_behind(&file, "feat/x", "head1", "base2", 0, Utc::now());
269        assert!(result.is_none());
270    }
271
272    #[test]
273    fn lookup_respects_ttl_when_file_is_stale() {
274        use chrono::Duration;
275        let old_time = Utc::now() - Duration::seconds(200);
276        let file = InventoryFile {
277            repo_path: PathBuf::from("/repo"),
278            indexed_at: old_time,
279            loops: vec![make_memo("feat/x", "h", "b", 1, 0)],
280        };
281        // TTL 100s but file is 200s old → None
282        let result = lookup_ahead_behind(&file, "feat/x", "h", "b", 100, Utc::now());
283        assert!(result.is_none());
284    }
285
286    #[test]
287    fn lookup_returns_value_when_within_ttl() {
288        use chrono::Duration;
289        let recent = Utc::now() - Duration::seconds(50);
290        let file = InventoryFile {
291            repo_path: PathBuf::from("/repo"),
292            indexed_at: recent,
293            loops: vec![make_memo("feat/x", "h", "b", 1, 0)],
294        };
295        // TTL 100s, file is 50s old → hit
296        let result = lookup_ahead_behind(&file, "feat/x", "h", "b", 100, Utc::now());
297        assert_eq!(result, Some((1, 0)));
298    }
299
300    #[test]
301    fn lookup_ignores_ttl_when_zero() {
302        use chrono::Duration;
303        let very_old = Utc::now() - Duration::days(365);
304        let file = InventoryFile {
305            repo_path: PathBuf::from("/repo"),
306            indexed_at: very_old,
307            loops: vec![make_memo("feat/x", "h", "b", 2, 3)],
308        };
309        // TTL 0 → SHA-only validation, always hit if SHAs match
310        let result = lookup_ahead_behind(&file, "feat/x", "h", "b", 0, Utc::now());
311        assert_eq!(result, Some((2, 3)));
312    }
313
314    #[test]
315    fn store_save_and_load_roundtrip() {
316        let tmp = tempfile::tempdir().unwrap();
317        let store = InventoryStore::new(tmp.path());
318        let file = make_file("/repo", vec![make_memo("feat/x", "h1", "b1", 4, 2)]);
319        let hash = "test0123456789ab";
320        store.save(hash, &file).unwrap();
321
322        let loaded = store.load(hash).unwrap();
323        assert_eq!(loaded.repo_path, file.repo_path);
324        assert_eq!(loaded.loops.len(), 1);
325        assert_eq!(loaded.loops[0].ahead, 4);
326        assert_eq!(loaded.loops[0].behind, 2);
327    }
328
329    #[test]
330    fn store_load_returns_none_for_missing_file() {
331        let tmp = tempfile::tempdir().unwrap();
332        let store = InventoryStore::new(tmp.path());
333        assert!(store.load("nonexistent0000000").is_none());
334    }
335
336    #[test]
337    fn store_load_returns_none_for_corrupt_json() {
338        let tmp = tempfile::tempdir().unwrap();
339        let store = InventoryStore::new(tmp.path());
340        std::fs::create_dir_all(&store.dir).unwrap();
341        let hash = "corrupt000000000";
342        std::fs::write(path_for_hash(&store.dir, hash), b"{not json}").unwrap();
343        assert!(store.load(hash).is_none());
344    }
345
346    #[test]
347    fn store_save_is_atomic_via_tmp_rename() {
348        let tmp = tempfile::tempdir().unwrap();
349        let store = InventoryStore::new(tmp.path());
350        let file = make_file("/repo", vec![]);
351        let hash = "atomic0123456789";
352        store.save(hash, &file).unwrap();
353
354        // No tmp file (any pid suffix) should remain after a successful save.
355        let leftover_tmp = std::fs::read_dir(&store.dir)
356            .unwrap()
357            .flatten()
358            .any(|e| e.path().extension().is_some_and(|x| x == "tmp"));
359        assert!(!leftover_tmp, "tmp file should be renamed away");
360        assert!(path_for_hash(&store.dir, hash).exists());
361    }
362
363    #[test]
364    fn save_tmp_name_is_unique_per_process() {
365        let p = tmp_path_for_hash(std::path::Path::new("/inv"), "abc123");
366        let name = p.file_name().unwrap().to_string_lossy();
367        // pid keeps two concurrent writers off the same tmp file (BUG-2 fix).
368        assert!(name.contains(&std::process::id().to_string()));
369        assert!(name.starts_with(".abc123."));
370        assert!(name.ends_with(".json.tmp"));
371        // Extension is `tmp`, so prune/listing (which key on `json`) skip it.
372        assert_eq!(p.extension().unwrap(), "tmp");
373    }
374
375    #[test]
376    fn lookup_returns_none_for_future_indexed_at() {
377        use chrono::Duration;
378        // Clock skew: indexed_at is in the future, so age is negative.
379        let future = Utc::now() + Duration::seconds(100);
380        let file = InventoryFile {
381            repo_path: PathBuf::from("/repo"),
382            indexed_at: future,
383            loops: vec![make_memo("feat/x", "h", "b", 1, 0)],
384        };
385        let result = lookup_ahead_behind(&file, "feat/x", "h", "b", 50, Utc::now());
386        assert!(result.is_none(), "negative age must be treated as a miss");
387    }
388
389    #[test]
390    fn store_load_returns_none_for_zero_byte_file() {
391        let tmp = tempfile::tempdir().unwrap();
392        let store = InventoryStore::new(tmp.path());
393        std::fs::create_dir_all(&store.dir).unwrap();
394        let hash = "zerobyte00000000";
395        std::fs::write(path_for_hash(&store.dir, hash), b"").unwrap();
396        assert!(store.load(hash).is_none());
397    }
398
399    #[test]
400    fn store_load_tolerates_unknown_extra_fields() {
401        let tmp = tempfile::tempdir().unwrap();
402        let store = InventoryStore::new(tmp.path());
403        std::fs::create_dir_all(&store.dir).unwrap();
404        let hash = "extrafields00000";
405        // Forward-compat: unknown top-level and per-memo fields are ignored.
406        let raw = r#"{
407            "repo_path": "/repo",
408            "indexed_at": "2020-01-01T00:00:00Z",
409            "future_field": 42,
410            "loops": [
411                {"branch":"feat/x","head_sha":"h","ab_base_sha":"b",
412                 "ahead":1,"behind":2,"bogus":true}
413            ]
414        }"#;
415        std::fs::write(path_for_hash(&store.dir, hash), raw).unwrap();
416        let loaded = store.load(hash).expect("unknown fields must not fail load");
417        assert_eq!(loaded.loops[0].ahead, 1);
418        assert_eq!(loaded.loops[0].behind, 2);
419    }
420
421    #[test]
422    fn store_load_returns_none_when_path_is_a_directory() {
423        let tmp = tempfile::tempdir().unwrap();
424        let store = InventoryStore::new(tmp.path());
425        std::fs::create_dir_all(&store.dir).unwrap();
426        let hash = "isadirectory0000";
427        // A directory sitting where the JSON file would be must not panic.
428        std::fs::create_dir(path_for_hash(&store.dir, hash)).unwrap();
429        assert!(store.load(hash).is_none());
430    }
431
432    #[test]
433    fn prune_orphans_skips_non_json_and_tmp_files() {
434        let tmp = tempfile::tempdir().unwrap();
435        let store = InventoryStore::new(tmp.path());
436        std::fs::create_dir_all(&store.dir).unwrap();
437
438        // An orphan JSON (repo_path missing) — must be removed.
439        let orphan = make_file("/nonexistent/repo", vec![]);
440        store.save("orphan0000000000", &orphan).unwrap();
441        // Non-JSON and tmp files — must survive (not inventory files).
442        let notes = store.dir.join("notes.txt");
443        std::fs::write(&notes, b"keep me").unwrap();
444        let leftover_tmp = store.dir.join(".something.json.tmp");
445        std::fs::write(&leftover_tmp, b"in-flight").unwrap();
446
447        store.prune_orphans().unwrap();
448
449        assert!(!path_for_hash(&store.dir, "orphan0000000000").exists());
450        assert!(notes.exists(), "non-json files must be left alone");
451        assert!(leftover_tmp.exists(), "tmp files must be left alone");
452    }
453
454    #[test]
455    fn prune_orphans_reclaims_unreadable_file() {
456        let tmp = tempfile::tempdir().unwrap();
457        let store = InventoryStore::new(tmp.path());
458        std::fs::create_dir_all(&store.dir).unwrap();
459        // A corrupt file can't prove its repo exists, so prune reclaims it (it is
460        // labelled "unreadable" rather than misreported as an "orphan").
461        let hash = "corruptlive00000";
462        std::fs::write(path_for_hash(&store.dir, hash), b"{ broken json").unwrap();
463
464        store.prune_orphans().unwrap();
465
466        assert!(!path_for_hash(&store.dir, hash).exists());
467    }
468
469    #[test]
470    fn prune_orphans_distinguishes_live_orphan_and_unreadable() {
471        let tmp = tempfile::tempdir().unwrap();
472        let store = InventoryStore::new(tmp.path());
473        std::fs::create_dir_all(&store.dir).unwrap();
474        // live + valid → kept (repo_path is the tempdir, which exists)
475        store
476            .save(
477                "live000000000000",
478                &make_file(tmp.path().to_str().unwrap(), vec![]),
479            )
480            .unwrap();
481        // valid but repo gone → orphan → removed
482        store
483            .save("orphan0000000000", &make_file("/no/such/repo/here", vec![]))
484            .unwrap();
485        // corrupt → unreadable → removed
486        std::fs::write(path_for_hash(&store.dir, "unreadable000000"), b"{ nope").unwrap();
487
488        store.prune_orphans().unwrap();
489
490        assert!(path_for_hash(&store.dir, "live000000000000").exists());
491        assert!(!path_for_hash(&store.dir, "orphan0000000000").exists());
492        assert!(!path_for_hash(&store.dir, "unreadable000000").exists());
493    }
494
495    #[test]
496    fn prune_orphans_removes_file_when_repo_path_missing() {
497        let tmp = tempfile::tempdir().unwrap();
498        let store = InventoryStore::new(tmp.path());
499        let file = make_file("/nonexistent/repo/path", vec![]);
500        let hash = "orphan0123456789";
501        store.save(hash, &file).unwrap();
502
503        store.prune_orphans().unwrap();
504
505        assert!(!path_for_hash(&store.dir, hash).exists());
506    }
507
508    #[test]
509    fn prune_orphans_keeps_file_when_repo_path_exists() {
510        let tmp = tempfile::tempdir().unwrap();
511        let store = InventoryStore::new(tmp.path());
512        let file = make_file(tmp.path().to_str().unwrap(), vec![]);
513        let hash = "active0123456789";
514        store.save(hash, &file).unwrap();
515
516        store.prune_orphans().unwrap();
517
518        assert!(path_for_hash(&store.dir, hash).exists());
519    }
520}