Skip to main content

contributor_graphs/
cache.rs

1//! On-disk cache that makes re-runs fast. Everything lives under one
2//! tool-specific directory in the XDG cache location
3//! (`$XDG_CACHE_HOME/contributor-graphs`, else `~/.cache/contributor-graphs`):
4//!
5//! - `clones/<key>/`     bare partial clones (see [`crate::repo`]).
6//! - `commits/<key>.json` parsed `git log` output, keyed by the branch tip SHA
7//!   so it is reused only while the history is unchanged.
8//! - `github/authors.json`  commit SHA -> resolved GitHub login + avatar URL
9//!   (immutable, so never expires).
10//! - `github/profiles.json` login -> display name + company (time-limited).
11//! - `github/avatars.json`  avatar URL -> embedded data URI (time-limited).
12//!
13//! The git-history and clone caches alone turn a multi-minute whole-org run
14//! into a few seconds when nothing has changed; the GitHub caches remove the
15//! thousands of API calls and avatar downloads on top of that.
16
17use crate::model::CommitFilter;
18use serde::{Deserialize, Serialize};
19use std::collections::HashMap;
20use std::path::{Path, PathBuf};
21
22/// Bump when the cached layout changes in a backwards-incompatible way.
23const COMMITS_VERSION: u32 = 2;
24/// Profiles can change (a company move), so they expire.
25const PROFILE_TTL: i64 = 30 * 24 * 60 * 60;
26/// Avatars change rarely; keep them longer.
27const AVATAR_TTL: i64 = 90 * 24 * 60 * 60;
28/// An org's repo list changes as repos are added or archived; keep it briefly
29/// so back-to-back runs are fast but new repos still appear the same day.
30const ORG_TTL: i64 = 6 * 60 * 60;
31
32/// The tool's cache directory, or `None` if no home/XDG dir can be found.
33pub fn root() -> Option<PathBuf> {
34    if let Ok(x) = std::env::var("XDG_CACHE_HOME") {
35        let x = x.trim();
36        if !x.is_empty() {
37            return Some(PathBuf::from(x).join("contributor-graphs"));
38        }
39    }
40    let home = std::env::var("HOME").ok().filter(|h| !h.is_empty())?;
41    Some(
42        PathBuf::from(home)
43            .join(".cache")
44            .join("contributor-graphs"),
45    )
46}
47
48fn read_json<T: for<'de> Deserialize<'de>>(path: &Path) -> Option<T> {
49    serde_json::from_str(&std::fs::read_to_string(path).ok()?).ok()
50}
51
52fn write_json<T: Serialize>(path: &Path, value: &T) {
53    if let Some(parent) = path.parent() {
54        let _ = std::fs::create_dir_all(parent);
55    }
56    if let Ok(s) = serde_json::to_string(value) {
57        let _ = std::fs::write(path, s);
58    }
59}
60
61/// A commit as stored in the git-history cache (no source index; that is
62/// re-assigned per run when the pool is rebuilt).
63#[derive(Clone, Serialize, Deserialize)]
64pub struct CachedCommit {
65    pub sha: String,
66    pub ts: i64,
67    pub name: String,
68    pub email: String,
69    #[serde(default, skip_serializing_if = "Vec::is_empty")]
70    pub coauthors: Vec<(String, String)>,
71}
72
73/// One repository's cached `git log`, valid only while `tip` and the filters
74/// still match the live repository.
75#[derive(Serialize, Deserialize)]
76pub struct CommitsCache {
77    pub version: u32,
78    pub tip: String,
79    pub since: Option<String>,
80    pub until: Option<String>,
81    pub no_merges: bool,
82    pub commits: Vec<CachedCommit>,
83}
84
85fn commits_path(root: &Path, key: &str) -> PathBuf {
86    root.join("commits").join(format!("{key}.json"))
87}
88
89#[derive(Clone, Serialize, Deserialize)]
90pub struct Author {
91    pub login: String,
92    pub avatar_url: String,
93}
94
95#[derive(Clone, Serialize, Deserialize)]
96struct ProfileRec {
97    name: Option<String>,
98    company: Option<String>,
99    ts: i64,
100}
101
102#[derive(Clone, Serialize, Deserialize)]
103struct AvatarRec {
104    data: String,
105    ts: i64,
106}
107
108#[derive(Clone, Serialize, Deserialize)]
109struct OrgRec {
110    repos: Vec<String>,
111    ts: i64,
112}
113
114/// In-memory view of the GitHub caches for one run. Loaded once, queried before
115/// any network call, and written back at the end. Reads are skipped (treated as
116/// misses) when `refresh` is set, but existing entries are still preserved on
117/// save so unrelated repositories keep their cache.
118pub struct Caches {
119    root: Option<PathBuf>,
120    refresh: bool,
121    now: i64,
122    authors: HashMap<String, Author>,
123    profiles: HashMap<String, ProfileRec>,
124    avatars: HashMap<String, AvatarRec>,
125    orgs: HashMap<String, OrgRec>,
126    authors_dirty: bool,
127    profiles_dirty: bool,
128    avatars_dirty: bool,
129    orgs_dirty: bool,
130}
131
132impl Caches {
133    pub fn load(refresh: bool, now: i64) -> Self {
134        let root = root();
135        let dir = root.as_ref().map(|r| r.join("github"));
136        fn load_map<V: for<'de> Deserialize<'de>>(
137            dir: &Option<PathBuf>,
138            name: &str,
139        ) -> HashMap<String, V> {
140            dir.as_ref()
141                .and_then(|d| read_json(&d.join(name)))
142                .unwrap_or_default()
143        }
144        Caches {
145            authors: load_map(&dir, "authors.json"),
146            profiles: load_map(&dir, "profiles.json"),
147            avatars: load_map(&dir, "avatars.json"),
148            orgs: load_map(&dir, "orgs.json"),
149            root,
150            refresh,
151            now,
152            authors_dirty: false,
153            profiles_dirty: false,
154            avatars_dirty: false,
155            orgs_dirty: false,
156        }
157    }
158
159    /// Cached commits for a repo, returned only if the freshness token (`tip`)
160    /// and the filters still match this run.
161    pub fn commits(
162        &self,
163        key: &str,
164        tip: &str,
165        filter: &CommitFilter,
166    ) -> Option<Vec<CachedCommit>> {
167        if self.refresh {
168            return None;
169        }
170        let entry: CommitsCache = read_json(&commits_path(self.root.as_ref()?, key))?;
171        (entry.version == COMMITS_VERSION
172            && entry.tip == tip
173            && entry.since == filter.since
174            && entry.until == filter.until
175            && entry.no_merges == filter.no_merges)
176            .then_some(entry.commits)
177    }
178
179    /// Store a repo's parsed commits against the current tip SHA.
180    pub fn put_commits(
181        &self,
182        key: &str,
183        tip: &str,
184        filter: &CommitFilter,
185        commits: Vec<CachedCommit>,
186    ) {
187        let Some(root) = &self.root else { return };
188        let entry = CommitsCache {
189            version: COMMITS_VERSION,
190            tip: tip.to_string(),
191            since: filter.since.clone(),
192            until: filter.until.clone(),
193            no_merges: filter.no_merges,
194            commits,
195        };
196        write_json(&commits_path(root, key), &entry);
197    }
198
199    pub fn author(&self, sha: &str) -> Option<Author> {
200        if self.refresh {
201            return None;
202        }
203        self.authors.get(sha).cloned()
204    }
205
206    pub fn put_author(&mut self, sha: String, login: String, avatar_url: String) {
207        self.authors.insert(sha, Author { login, avatar_url });
208        self.authors_dirty = true;
209    }
210
211    pub fn profile(&self, login: &str) -> Option<(Option<String>, Option<String>)> {
212        if self.refresh {
213            return None;
214        }
215        let rec = self.profiles.get(login)?;
216        (self.now - rec.ts < PROFILE_TTL).then(|| (rec.name.clone(), rec.company.clone()))
217    }
218
219    pub fn put_profile(&mut self, login: String, name: Option<String>, company: Option<String>) {
220        self.profiles.insert(
221            login,
222            ProfileRec {
223                name,
224                company,
225                ts: self.now,
226            },
227        );
228        self.profiles_dirty = true;
229    }
230
231    pub fn avatar(&self, key: &str) -> Option<String> {
232        if self.refresh {
233            return None;
234        }
235        let rec = self.avatars.get(key)?;
236        (self.now - rec.ts < AVATAR_TTL).then(|| rec.data.clone())
237    }
238
239    pub fn put_avatar(&mut self, key: String, data: String) {
240        self.avatars.insert(key, AvatarRec { data, ts: self.now });
241        self.avatars_dirty = true;
242    }
243
244    /// The cached repo list for an org/user, if still within its short TTL.
245    pub fn org_repos(&self, owner: &str) -> Option<Vec<String>> {
246        if self.refresh {
247            return None;
248        }
249        let rec = self.orgs.get(owner)?;
250        (self.now - rec.ts < ORG_TTL).then(|| rec.repos.clone())
251    }
252
253    pub fn put_org_repos(&mut self, owner: String, repos: Vec<String>) {
254        self.orgs.insert(
255            owner,
256            OrgRec {
257                repos,
258                ts: self.now,
259            },
260        );
261        self.orgs_dirty = true;
262    }
263
264    /// Persist any maps that changed this run.
265    pub fn save(&self) {
266        let Some(root) = &self.root else { return };
267        let dir = root.join("github");
268        if self.authors_dirty {
269            write_json(&dir.join("authors.json"), &self.authors);
270        }
271        if self.profiles_dirty {
272            write_json(&dir.join("profiles.json"), &self.profiles);
273        }
274        if self.avatars_dirty {
275            write_json(&dir.join("avatars.json"), &self.avatars);
276        }
277        if self.orgs_dirty {
278            write_json(&dir.join("orgs.json"), &self.orgs);
279        }
280    }
281}