use std::fs;
use std::path::{Path, PathBuf};
use std::time::SystemTime;
use serde::{Serialize, de::DeserializeOwned};
use super::Repository;
use super::integration::MergeProbeResult;
use crate::git::LineDiff;
const MAX_ENTRIES_PER_KIND: usize = 5000;
const KIND_MERGE_TREE_CONFLICTS: &str = "merge-tree-conflicts";
const KIND_MERGE_ADD_PROBE: &str = "merge-add-probe";
const KIND_IS_ANCESTOR: &str = "is-ancestor";
const KIND_HAS_ADDED_CHANGES: &str = "has-added-changes";
const KIND_DIFF_STATS: &str = "diff-stats";
const ALL_KINDS: &[&str] = &[
KIND_MERGE_TREE_CONFLICTS,
KIND_MERGE_ADD_PROBE,
KIND_IS_ANCESTOR,
KIND_HAS_ADDED_CHANGES,
KIND_DIFF_STATS,
];
fn cache_dir(repo: &Repository, kind: &str) -> PathBuf {
repo.wt_dir().join("cache").join(kind)
}
fn symmetric_key(sha1: &str, sha2: &str) -> String {
if sha1 <= sha2 {
format!("{sha1}-{sha2}.json")
} else {
format!("{sha2}-{sha1}.json")
}
}
fn asymmetric_key(first: &str, second: &str) -> String {
format!("{first}-{second}.json")
}
fn read<T: DeserializeOwned>(path: &Path) -> Option<T> {
let json = fs::read_to_string(path).ok()?;
match serde_json::from_str::<T>(&json) {
Ok(value) => Some(value),
Err(e) => {
log::debug!("sha_cache: corrupt entry at {}: {}", path.display(), e);
None
}
}
}
fn write<T: Serialize>(path: &Path, value: &T) {
if let Some(parent) = path.parent()
&& let Err(e) = fs::create_dir_all(parent)
{
log::debug!(
"sha_cache: failed to create dir {}: {}",
parent.display(),
e
);
return;
}
let Ok(json) = serde_json::to_string(value) else {
log::debug!(
"sha_cache: failed to serialize entry for {}",
path.display()
);
return;
};
if let Err(e) = fs::write(path, &json) {
log::debug!("sha_cache: failed to write {}: {}", path.display(), e);
}
}
fn sweep_lru(dir: &Path, max: usize) {
let Ok(entries) = fs::read_dir(dir) else {
return;
};
let json_entries: Vec<_> = entries
.filter_map(|e| e.ok())
.filter(|e| e.file_name().to_str().is_some_and(|s| s.ends_with(".json")))
.collect();
if json_entries.len() <= max {
return;
}
let mut with_mtime: Vec<(PathBuf, SystemTime)> = json_entries
.into_iter()
.filter_map(|e| {
let mtime = e.metadata().ok()?.modified().ok()?;
Some((e.path(), mtime))
})
.collect();
with_mtime.sort_by_key(|(_, mtime)| *mtime);
let excess = with_mtime.len().saturating_sub(max);
for (path, _) in with_mtime.iter().take(excess) {
let _ = fs::remove_file(path);
}
log::debug!("sha_cache: swept {} entries from {}", excess, dir.display());
}
pub(super) fn merge_conflicts(repo: &Repository, sha1: &str, sha2: &str) -> Option<bool> {
let path = cache_dir(repo, KIND_MERGE_TREE_CONFLICTS).join(symmetric_key(sha1, sha2));
read::<bool>(&path)
}
pub(super) fn put_merge_conflicts(repo: &Repository, sha1: &str, sha2: &str, value: bool) {
let dir = cache_dir(repo, KIND_MERGE_TREE_CONFLICTS);
let path = dir.join(symmetric_key(sha1, sha2));
write(&path, &value);
sweep_lru(&dir, MAX_ENTRIES_PER_KIND);
}
pub(super) fn merge_add_probe(
repo: &Repository,
branch_sha: &str,
target_sha: &str,
) -> Option<MergeProbeResult> {
let path = cache_dir(repo, KIND_MERGE_ADD_PROBE).join(asymmetric_key(branch_sha, target_sha));
read::<MergeProbeResult>(&path)
}
pub(super) fn put_merge_add_probe(
repo: &Repository,
branch_sha: &str,
target_sha: &str,
value: MergeProbeResult,
) {
let dir = cache_dir(repo, KIND_MERGE_ADD_PROBE);
let path = dir.join(asymmetric_key(branch_sha, target_sha));
write(&path, &value);
sweep_lru(&dir, MAX_ENTRIES_PER_KIND);
}
pub(super) fn is_ancestor(repo: &Repository, base_sha: &str, head_sha: &str) -> Option<bool> {
let path = cache_dir(repo, KIND_IS_ANCESTOR).join(asymmetric_key(base_sha, head_sha));
read::<bool>(&path)
}
pub(super) fn put_is_ancestor(repo: &Repository, base_sha: &str, head_sha: &str, value: bool) {
let dir = cache_dir(repo, KIND_IS_ANCESTOR);
let path = dir.join(asymmetric_key(base_sha, head_sha));
write(&path, &value);
sweep_lru(&dir, MAX_ENTRIES_PER_KIND);
}
pub(super) fn has_added_changes(
repo: &Repository,
branch_sha: &str,
target_sha: &str,
) -> Option<bool> {
let path = cache_dir(repo, KIND_HAS_ADDED_CHANGES).join(asymmetric_key(branch_sha, target_sha));
read::<bool>(&path)
}
pub(super) fn put_has_added_changes(
repo: &Repository,
branch_sha: &str,
target_sha: &str,
value: bool,
) {
let dir = cache_dir(repo, KIND_HAS_ADDED_CHANGES);
let path = dir.join(asymmetric_key(branch_sha, target_sha));
write(&path, &value);
sweep_lru(&dir, MAX_ENTRIES_PER_KIND);
}
pub(super) fn diff_stats(repo: &Repository, base_sha: &str, head_sha: &str) -> Option<LineDiff> {
let path = cache_dir(repo, KIND_DIFF_STATS).join(asymmetric_key(base_sha, head_sha));
read::<LineDiff>(&path)
}
pub(super) fn put_diff_stats(repo: &Repository, base_sha: &str, head_sha: &str, value: LineDiff) {
let dir = cache_dir(repo, KIND_DIFF_STATS);
let path = dir.join(asymmetric_key(base_sha, head_sha));
write(&path, &value);
sweep_lru(&dir, MAX_ENTRIES_PER_KIND);
}
pub(crate) fn clear_all(repo: &Repository) -> usize {
let mut cleared = 0;
for kind in ALL_KINDS {
let dir = cache_dir(repo, kind);
let Ok(entries) = fs::read_dir(&dir) else {
continue;
};
for entry in entries.flatten() {
let path = entry.path();
if path.extension().is_some_and(|ext| ext == "json") && fs::remove_file(&path).is_ok() {
cleared += 1;
}
}
}
cleared
}
#[cfg(test)]
mod tests {
use super::*;
use crate::testing::TestRepo;
#[test]
fn test_symmetric_key_sorts_pair() {
assert_eq!(symmetric_key("aaaa", "bbbb"), "aaaa-bbbb.json");
assert_eq!(symmetric_key("bbbb", "aaaa"), "aaaa-bbbb.json");
assert_eq!(
symmetric_key("deadbeef", "deadbeef"),
"deadbeef-deadbeef.json"
);
}
#[test]
fn test_asymmetric_key_preserves_order() {
assert_eq!(asymmetric_key("aaaa", "bbbb"), "aaaa-bbbb.json");
assert_eq!(asymmetric_key("bbbb", "aaaa"), "bbbb-aaaa.json");
}
#[test]
fn test_merge_conflicts_roundtrip() {
let test = TestRepo::with_initial_commit();
let repo = Repository::at(test.root_path()).unwrap();
assert_eq!(merge_conflicts(&repo, "aaaa", "bbbb"), None);
put_merge_conflicts(&repo, "aaaa", "bbbb", true);
assert_eq!(merge_conflicts(&repo, "aaaa", "bbbb"), Some(true));
assert_eq!(merge_conflicts(&repo, "bbbb", "aaaa"), Some(true));
put_merge_conflicts(&repo, "aaaa", "bbbb", false);
assert_eq!(merge_conflicts(&repo, "aaaa", "bbbb"), Some(false));
}
#[test]
fn test_merge_add_probe_roundtrip() {
let test = TestRepo::with_initial_commit();
let repo = Repository::at(test.root_path()).unwrap();
assert_eq!(merge_add_probe(&repo, "aaaa", "bbbb"), None);
let value = MergeProbeResult {
would_merge_add: true,
is_patch_id_match: false,
};
put_merge_add_probe(&repo, "aaaa", "bbbb", value);
assert_eq!(merge_add_probe(&repo, "aaaa", "bbbb"), Some(value));
assert_eq!(merge_add_probe(&repo, "bbbb", "aaaa"), None);
}
#[test]
fn test_kinds_are_isolated() {
let test = TestRepo::with_initial_commit();
let repo = Repository::at(test.root_path()).unwrap();
put_merge_conflicts(&repo, "aaaa", "bbbb", true);
assert_eq!(merge_add_probe(&repo, "aaaa", "bbbb"), None);
}
#[test]
fn test_corrupt_entry_returns_none() {
let test = TestRepo::with_initial_commit();
let repo = Repository::at(test.root_path()).unwrap();
put_merge_conflicts(&repo, "aaaa", "bbbb", true);
let path = cache_dir(&repo, KIND_MERGE_TREE_CONFLICTS).join(symmetric_key("aaaa", "bbbb"));
fs::write(&path, "not valid json {{{").unwrap();
assert_eq!(merge_conflicts(&repo, "aaaa", "bbbb"), None);
}
#[test]
fn test_sweep_lru_trims_oldest_entries() {
let test = TestRepo::with_initial_commit();
let repo = Repository::at(test.root_path()).unwrap();
let dir = cache_dir(&repo, KIND_MERGE_TREE_CONFLICTS);
fs::create_dir_all(&dir).unwrap();
for i in 0..5 {
let path = dir.join(format!("entry{i}.json"));
fs::write(&path, "true").unwrap();
std::thread::sleep(std::time::Duration::from_millis(10));
}
sweep_lru(&dir, 3);
let mut remaining: Vec<_> = fs::read_dir(&dir)
.unwrap()
.filter_map(|e| e.ok())
.map(|e| e.file_name().to_string_lossy().into_owned())
.collect();
remaining.sort();
assert_eq!(remaining, ["entry2.json", "entry3.json", "entry4.json"]);
}
#[test]
fn test_sweep_lru_no_op_under_bound() {
let test = TestRepo::with_initial_commit();
let repo = Repository::at(test.root_path()).unwrap();
let dir = cache_dir(&repo, KIND_MERGE_TREE_CONFLICTS);
fs::create_dir_all(&dir).unwrap();
for i in 0..3 {
fs::write(dir.join(format!("entry{i}.json")), "true").unwrap();
}
sweep_lru(&dir, 5);
let count = fs::read_dir(&dir).unwrap().count();
assert_eq!(count, 3, "should not delete anything when under bound");
}
#[test]
fn test_has_merge_conflicts_reads_cache() {
let test = TestRepo::with_initial_commit();
test.run_git(&["checkout", "-b", "feature"]);
fs::write(test.root_path().join("new.txt"), "content\n").unwrap();
test.run_git(&["add", "new.txt"]);
test.run_git(&["commit", "-m", "Add file"]);
test.run_git(&["checkout", "main"]);
let repo = Repository::at(test.root_path()).unwrap();
assert!(!repo.has_merge_conflicts("main", "feature").unwrap());
let dir = cache_dir(&repo, KIND_MERGE_TREE_CONFLICTS);
let entries: Vec<_> = fs::read_dir(&dir)
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| e.file_name().to_str().is_some_and(|s| s.ends_with(".json")))
.collect();
assert_eq!(entries.len(), 1, "exactly one cache entry expected");
fs::write(entries[0].path(), "true").unwrap();
let repo2 = Repository::at(test.root_path()).unwrap();
assert!(repo2.has_merge_conflicts("main", "feature").unwrap());
}
#[test]
fn test_has_merge_conflicts_by_tree_uses_composite_cache_key() {
let test = TestRepo::with_initial_commit();
test.run_git(&["checkout", "-b", "feature"]);
fs::write(test.root_path().join("dirty.txt"), "uncommitted\n").unwrap();
test.run_git(&["add", "dirty.txt"]);
let branch_head = test.git_output(&["rev-parse", "HEAD"]);
let tree_sha = test.git_output(&["write-tree"]);
let repo = Repository::at(test.root_path()).unwrap();
let result = repo
.has_merge_conflicts_by_tree("main", &branch_head, &tree_sha)
.unwrap();
let dir = cache_dir(&repo, KIND_MERGE_TREE_CONFLICTS);
let entries: Vec<_> = fs::read_dir(&dir)
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| e.file_name().to_str().is_some_and(|s| s.ends_with(".json")))
.collect();
assert_eq!(entries.len(), 1, "exactly one cache entry expected");
let filename = entries[0].file_name().to_string_lossy().into_owned();
assert!(
filename.contains(&tree_sha),
"cache filename should contain tree SHA ({tree_sha}), got: {filename}"
);
let tampered = !result;
fs::write(entries[0].path(), serde_json::to_string(&tampered).unwrap()).unwrap();
let repo2 = Repository::at(test.root_path()).unwrap();
let cached = repo2
.has_merge_conflicts_by_tree("main", &branch_head, &tree_sha)
.unwrap();
assert_eq!(cached, tampered, "should read tampered value from cache");
}
#[test]
fn test_has_merge_conflicts_by_tree_invalidates_on_branch_head_change() {
let test = TestRepo::with_initial_commit();
fs::write(test.root_path().join("shared.txt"), "initial\n").unwrap();
test.run_git(&["add", "shared.txt"]);
test.run_git(&["commit", "-m", "base: add shared.txt"]);
test.run_git(&["checkout", "-b", "feature"]);
fs::write(test.root_path().join("shared.txt"), "feature content\n").unwrap();
test.run_git(&["add", "shared.txt"]);
test.run_git(&["commit", "-m", "feature: modify shared.txt"]);
test.run_git(&["checkout", "main"]);
fs::write(test.root_path().join("shared.txt"), "main content\n").unwrap();
test.run_git(&["add", "shared.txt"]);
test.run_git(&["commit", "-m", "main: modify shared.txt"]);
test.run_git(&["checkout", "feature"]);
fs::write(test.root_path().join("extra.txt"), "extra\n").unwrap();
test.run_git(&["add", "extra.txt"]);
let head_before = test.git_output(&["rev-parse", "HEAD"]);
let tree1 = test.git_output(&["write-tree"]);
let repo = Repository::at(test.root_path()).unwrap();
let result_before = repo
.has_merge_conflicts_by_tree("main", &head_before, &tree1)
.unwrap();
assert!(result_before, "should conflict before rebase");
test.run_git(&["reset", "HEAD", "extra.txt"]);
fs::remove_file(test.root_path().join("extra.txt")).unwrap();
test.run_git(&["rebase", "main", "--strategy-option=theirs"]);
fs::write(test.root_path().join("extra.txt"), "extra\n").unwrap();
test.run_git(&["add", "extra.txt"]);
let head_after = test.git_output(&["rev-parse", "HEAD"]);
let tree2 = test.git_output(&["write-tree"]);
assert_ne!(
head_before, head_after,
"branch HEAD should change after rebase"
);
let repo2 = Repository::at(test.root_path()).unwrap();
let result_after = repo2
.has_merge_conflicts_by_tree("main", &head_after, &tree2)
.unwrap();
assert!(
!result_after,
"should not conflict after rebase (different branch HEAD = different cache key)"
);
}
#[test]
fn test_merge_integration_probe_reads_cache() {
let test = TestRepo::with_initial_commit();
test.run_git(&["checkout", "-b", "feature"]);
fs::write(test.root_path().join("new.txt"), "content\n").unwrap();
test.run_git(&["add", "new.txt"]);
test.run_git(&["commit", "-m", "Feature"]);
test.run_git(&["checkout", "main"]);
test.run_git(&["merge", "feature"]);
let repo = Repository::at(test.root_path()).unwrap();
let real = repo.merge_integration_probe("feature", "main").unwrap();
assert!(!real.would_merge_add);
let dir = cache_dir(&repo, KIND_MERGE_ADD_PROBE);
let entries: Vec<_> = fs::read_dir(&dir)
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| e.file_name().to_str().is_some_and(|s| s.ends_with(".json")))
.collect();
assert_eq!(entries.len(), 1, "exactly one cache entry expected");
let tampered = MergeProbeResult {
would_merge_add: true,
is_patch_id_match: true,
};
fs::write(entries[0].path(), serde_json::to_string(&tampered).unwrap()).unwrap();
let repo2 = Repository::at(test.root_path()).unwrap();
let cached = repo2.merge_integration_probe("feature", "main").unwrap();
assert!(cached.would_merge_add);
assert!(cached.is_patch_id_match);
}
#[test]
fn test_is_ancestor_roundtrip() {
let test = TestRepo::with_initial_commit();
let repo = Repository::at(test.root_path()).unwrap();
assert_eq!(is_ancestor(&repo, "aaaa", "bbbb"), None);
put_is_ancestor(&repo, "aaaa", "bbbb", true);
assert_eq!(is_ancestor(&repo, "aaaa", "bbbb"), Some(true));
assert_eq!(is_ancestor(&repo, "bbbb", "aaaa"), None);
}
#[test]
fn test_has_added_changes_roundtrip() {
let test = TestRepo::with_initial_commit();
let repo = Repository::at(test.root_path()).unwrap();
assert_eq!(has_added_changes(&repo, "aaaa", "bbbb"), None);
put_has_added_changes(&repo, "aaaa", "bbbb", false);
assert_eq!(has_added_changes(&repo, "aaaa", "bbbb"), Some(false));
assert_eq!(has_added_changes(&repo, "bbbb", "aaaa"), None);
}
#[test]
fn test_diff_stats_roundtrip() {
let test = TestRepo::with_initial_commit();
let repo = Repository::at(test.root_path()).unwrap();
assert_eq!(diff_stats(&repo, "aaaa", "bbbb"), None);
let value = LineDiff {
added: 42,
deleted: 7,
};
put_diff_stats(&repo, "aaaa", "bbbb", value);
assert_eq!(diff_stats(&repo, "aaaa", "bbbb"), Some(value));
assert_eq!(diff_stats(&repo, "bbbb", "aaaa"), None);
}
#[test]
fn test_is_ancestor_reads_cache() {
let test = TestRepo::with_initial_commit();
test.run_git(&["checkout", "-b", "feature"]);
fs::write(test.root_path().join("new.txt"), "content\n").unwrap();
test.run_git(&["add", "new.txt"]);
test.run_git(&["commit", "-m", "Feature"]);
test.run_git(&["checkout", "main"]);
let repo = Repository::at(test.root_path()).unwrap();
assert!(!repo.is_ancestor("feature", "main").unwrap());
let dir = cache_dir(&repo, KIND_IS_ANCESTOR);
let entries: Vec<_> = fs::read_dir(&dir)
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| e.file_name().to_str().is_some_and(|s| s.ends_with(".json")))
.collect();
assert_eq!(entries.len(), 1);
fs::write(entries[0].path(), "true").unwrap();
let repo2 = Repository::at(test.root_path()).unwrap();
assert!(repo2.is_ancestor("feature", "main").unwrap());
}
#[test]
fn test_has_added_changes_reads_cache() {
let test = TestRepo::with_initial_commit();
test.run_git(&["checkout", "-b", "feature"]);
fs::write(test.root_path().join("new.txt"), "content\n").unwrap();
test.run_git(&["add", "new.txt"]);
test.run_git(&["commit", "-m", "Feature"]);
test.run_git(&["checkout", "main"]);
let repo = Repository::at(test.root_path()).unwrap();
assert!(repo.has_added_changes("feature", "main").unwrap());
let dir = cache_dir(&repo, KIND_HAS_ADDED_CHANGES);
let entries: Vec<_> = fs::read_dir(&dir)
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| e.file_name().to_str().is_some_and(|s| s.ends_with(".json")))
.collect();
assert_eq!(entries.len(), 1);
fs::write(entries[0].path(), "false").unwrap();
let repo2 = Repository::at(test.root_path()).unwrap();
assert!(!repo2.has_added_changes("feature", "main").unwrap());
}
#[test]
fn test_branch_diff_stats_reads_cache() {
let test = TestRepo::with_initial_commit();
test.run_git(&["checkout", "-b", "feature"]);
fs::write(test.root_path().join("new.txt"), "content\n").unwrap();
test.run_git(&["add", "new.txt"]);
test.run_git(&["commit", "-m", "Feature"]);
test.run_git(&["checkout", "main"]);
let repo = Repository::at(test.root_path()).unwrap();
let real = repo.branch_diff_stats("main", "feature").unwrap();
assert_eq!(real.added, 1);
assert_eq!(real.deleted, 0);
let dir = cache_dir(&repo, KIND_DIFF_STATS);
let entries: Vec<_> = fs::read_dir(&dir)
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| e.file_name().to_str().is_some_and(|s| s.ends_with(".json")))
.collect();
assert_eq!(entries.len(), 1);
let tampered = LineDiff {
added: 999,
deleted: 888,
};
fs::write(entries[0].path(), serde_json::to_string(&tampered).unwrap()).unwrap();
let repo2 = Repository::at(test.root_path()).unwrap();
let cached = repo2.branch_diff_stats("main", "feature").unwrap();
assert_eq!(cached.added, 999);
assert_eq!(cached.deleted, 888);
}
#[test]
fn test_clear_all_covers_all_kinds() {
let test = TestRepo::with_initial_commit();
let repo = Repository::at(test.root_path()).unwrap();
put_merge_conflicts(&repo, "a", "b", true);
put_merge_add_probe(
&repo,
"a",
"b",
MergeProbeResult {
would_merge_add: true,
is_patch_id_match: false,
},
);
put_is_ancestor(&repo, "a", "b", true);
put_has_added_changes(&repo, "a", "b", true);
put_diff_stats(
&repo,
"a",
"b",
LineDiff {
added: 1,
deleted: 0,
},
);
let cleared = clear_all(&repo);
assert_eq!(cleared, 5, "should clear one entry per kind");
assert_eq!(merge_conflicts(&repo, "a", "b"), None);
assert_eq!(merge_add_probe(&repo, "a", "b"), None);
assert_eq!(is_ancestor(&repo, "a", "b"), None);
assert_eq!(has_added_changes(&repo, "a", "b"), None);
assert_eq!(diff_stats(&repo, "a", "b"), None);
}
}