use std::fs;
use std::num::NonZeroUsize;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use lru::LruCache;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::git::{BlameResult, ChangeKind, CommitInfo, GitError, Repo};
pub const GIT_CACHE_SCHEMA: u16 = 1;
pub const GIT_CACHE_DIR: &str = "git-cache";
#[derive(Debug, Error)]
pub enum CacheError {
#[error("io error on {path}: {source}")]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("git error: {0}")]
Git(#[from] GitError),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct CommitFilesPayload {
schema_ver: u16,
files: Vec<(crate::path::RelPath, ChangeKind)>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct LogPayload {
schema_ver: u16,
commits: Vec<CommitInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct BlamePayload {
schema_ver: u16,
result: BlameResult,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BlameKey {
pub suspect_sha: String,
pub path: crate::path::RelPath,
pub range: Option<(u32, u32)>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct LogKey {
pub head_sha: String,
pub path: Option<crate::path::RelPath>,
pub limit: u32,
pub include_files: bool,
}
type CommitFileChange = (crate::path::RelPath, ChangeKind);
pub struct GitCache {
commit_files: Mutex<LruCache<String , Arc<Vec<CommitFileChange>>>>,
log: Mutex<LruCache<LogKey, Arc<Vec<CommitInfo>>>>,
blame: Mutex<LruCache<BlameKey, Arc<BlameResult>>>,
disk: Option<PathBuf>,
}
impl GitCache {
pub fn open(
basemind_dir: &Path,
mem_capacity: usize,
persist: bool,
) -> Result<Self, CacheError> {
let disk = if persist {
let root = basemind_dir.join(GIT_CACHE_DIR);
ensure_subdir(&root, "commit_files")?;
ensure_subdir(&root, "log")?;
ensure_subdir(&root, "blame")?;
evict_log_cache(&root, log_cache_max_bytes_from_env());
Some(root)
} else {
None
};
let cap = NonZeroUsize::new(mem_capacity.max(1)).expect("capacity > 0");
Ok(Self {
commit_files: Mutex::new(LruCache::new(cap)),
log: Mutex::new(LruCache::new(cap)),
blame: Mutex::new(LruCache::new(cap)),
disk,
})
}
pub fn commit_files(
&self,
repo: &Repo,
commit_sha: &str,
) -> Result<Arc<Vec<CommitFileChange>>, CacheError> {
if let Some(hit) = self.commit_files.lock().unwrap().get(commit_sha).cloned() {
return Ok(hit);
}
if let Some(disk) = self.read_commit_files_disk(commit_sha) {
let arc = Arc::new(disk);
self.commit_files
.lock()
.unwrap()
.put(commit_sha.to_string(), Arc::clone(&arc));
return Ok(arc);
}
let computed = repo.commit_files_uncached(commit_sha)?;
let arc = Arc::new(computed);
self.commit_files
.lock()
.unwrap()
.put(commit_sha.to_string(), Arc::clone(&arc));
self.write_commit_files_disk(commit_sha, &arc);
Ok(arc)
}
pub fn log(
&self,
repo: &Repo,
head_sha: &str,
path: Option<&crate::path::RelPath>,
limit: u32,
include_files: bool,
) -> Result<Arc<Vec<CommitInfo>>, CacheError> {
let key = LogKey {
head_sha: head_sha.to_string(),
path: path.cloned(),
limit,
include_files,
};
if let Some(hit) = self.log.lock().unwrap().get(&key).cloned() {
return Ok(hit);
}
if let Some(disk) = self.read_log_disk(&key) {
let arc = Arc::new(disk);
self.log.lock().unwrap().put(key.clone(), Arc::clone(&arc));
return Ok(arc);
}
let commits = match path {
Some(p) => repo.log_for_path(p, limit as usize)?,
None => repo.log_paths(limit as usize, include_files)?,
};
let arc = Arc::new(commits);
self.log.lock().unwrap().put(key.clone(), Arc::clone(&arc));
self.write_log_disk(&key, &arc);
Ok(arc)
}
pub fn blame(
&self,
repo: &Repo,
suspect_sha: &str,
path: &crate::path::RelPath,
range: Option<(u32, u32)>,
) -> Result<Arc<BlameResult>, CacheError> {
let key = BlameKey {
suspect_sha: suspect_sha.to_string(),
path: path.clone(),
range,
};
if let Some(hit) = self.blame.lock().unwrap().get(&key).cloned() {
return Ok(hit);
}
if let Some(disk) = self.read_blame_disk(&key) {
let arc = Arc::new(disk);
self.blame
.lock()
.unwrap()
.put(key.clone(), Arc::clone(&arc));
return Ok(arc);
}
let computed = repo.blame_file(suspect_sha, path, range)?;
let arc = Arc::new(computed);
self.blame
.lock()
.unwrap()
.put(key.clone(), Arc::clone(&arc));
self.write_blame_disk(&key, &arc);
Ok(arc)
}
pub fn clear(&self) -> Result<usize, CacheError> {
let mut removed = 0usize;
if let Some(root) = &self.disk
&& root.exists()
{
removed += count_files(root);
fs::remove_dir_all(root).map_err(|source| CacheError::Io {
path: root.clone(),
source,
})?;
fs::create_dir_all(root).map_err(|source| CacheError::Io {
path: root.clone(),
source,
})?;
}
self.commit_files.lock().unwrap().clear();
self.log.lock().unwrap().clear();
self.blame.lock().unwrap().clear();
Ok(removed)
}
fn read_commit_files_disk(&self, sha: &str) -> Option<Vec<CommitFileChange>> {
let path = self.commit_files_path(sha)?;
if !path.exists() {
return None;
}
let bytes = fs::read(&path).ok()?;
let payload: CommitFilesPayload = rmp_serde::from_slice(&bytes).ok()?;
if payload.schema_ver != GIT_CACHE_SCHEMA {
return None;
}
Some(payload.files)
}
fn write_commit_files_disk(&self, sha: &str, files: &[CommitFileChange]) {
let Some(path) = self.commit_files_path(sha) else {
return;
};
let payload = CommitFilesPayload {
schema_ver: GIT_CACHE_SCHEMA,
files: files.to_vec(),
};
let Ok(bytes) = rmp_serde::to_vec_named(&payload) else {
return;
};
let _ = atomic_write(&path, &bytes);
}
fn read_log_disk(&self, key: &LogKey) -> Option<Vec<CommitInfo>> {
let path = self.log_path(key)?;
if !path.exists() {
return None;
}
let bytes = fs::read(&path).ok()?;
let payload: LogPayload = rmp_serde::from_slice(&bytes).ok()?;
if payload.schema_ver != GIT_CACHE_SCHEMA {
return None;
}
Some(payload.commits)
}
fn write_log_disk(&self, key: &LogKey, commits: &[CommitInfo]) {
let Some(path) = self.log_path(key) else {
return;
};
let payload = LogPayload {
schema_ver: GIT_CACHE_SCHEMA,
commits: commits.to_vec(),
};
let Ok(bytes) = rmp_serde::to_vec_named(&payload) else {
return;
};
let _ = atomic_write(&path, &bytes);
}
fn read_blame_disk(&self, key: &BlameKey) -> Option<BlameResult> {
let path = self.blame_path(key)?;
if !path.exists() {
return None;
}
let bytes = fs::read(&path).ok()?;
let payload: BlamePayload = rmp_serde::from_slice(&bytes).ok()?;
if payload.schema_ver != GIT_CACHE_SCHEMA {
return None;
}
Some(payload.result)
}
fn write_blame_disk(&self, key: &BlameKey, result: &BlameResult) {
let Some(path) = self.blame_path(key) else {
return;
};
let payload = BlamePayload {
schema_ver: GIT_CACHE_SCHEMA,
result: result.clone(),
};
let Ok(bytes) = rmp_serde::to_vec_named(&payload) else {
return;
};
let _ = atomic_write(&path, &bytes);
}
fn blame_path(&self, key: &BlameKey) -> Option<PathBuf> {
let root = self.disk.as_ref()?;
let path_hash = blake3::hash(key.path.as_bytes());
let range_tag = match key.range {
None => "all".to_string(),
Some((lo, hi)) => format!("{lo}-{hi}"),
};
Some(root.join("blame").join(format!(
"{}__{}__{range_tag}.msgpack",
key.suspect_sha,
hex::encode(&path_hash.as_bytes()[..8])
)))
}
fn commit_files_path(&self, sha: &str) -> Option<PathBuf> {
let root = self.disk.as_ref()?;
Some(root.join("commit_files").join(format!("{sha}.msgpack")))
}
fn log_path(&self, key: &LogKey) -> Option<PathBuf> {
let root = self.disk.as_ref()?;
let scope = match &key.path {
None => format!("all-{}-{}", key.limit, key.include_files as u8),
Some(p) => {
let h = blake3::hash(p.as_bytes());
format!("path-{}-{}", hex::encode(&h.as_bytes()[..8]), key.limit)
}
};
Some(
root.join("log")
.join(format!("{}__{}.msgpack", key.head_sha, scope)),
)
}
}
fn ensure_subdir(root: &Path, sub: &str) -> Result<(), CacheError> {
let path = root.join(sub);
fs::create_dir_all(&path).map_err(|source| CacheError::Io { path, source })
}
fn atomic_write(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
let tmp = path.with_extension(format!("msgpack.{}.tmp", std::process::id()));
fs::write(&tmp, bytes)?;
fs::rename(&tmp, path)
}
fn count_files(dir: &Path) -> usize {
let mut count = 0;
let mut stack = vec![dir.to_path_buf()];
while let Some(d) = stack.pop() {
let Ok(rd) = fs::read_dir(&d) else { continue };
for entry in rd.flatten() {
let p = entry.path();
if p.is_dir() {
stack.push(p);
} else {
count += 1;
}
}
}
count
}
const LOG_CACHE_DEFAULT_MAX_BYTES: u64 = 256 * 1024 * 1024;
fn log_cache_max_bytes_from_env() -> u64 {
std::env::var("BASEMIND_GIT_CACHE_LOG_MAX_BYTES")
.ok()
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(LOG_CACHE_DEFAULT_MAX_BYTES)
}
pub(crate) fn evict_log_cache(cache_root: &Path, max_bytes: u64) {
if max_bytes == 0 {
return;
}
let log_dir = cache_root.join("log");
let mut entries: Vec<(PathBuf, u64, std::time::SystemTime)> = Vec::new();
let mut total: u64 = 0;
if let Ok(rd) = fs::read_dir(&log_dir) {
for entry in rd.flatten() {
let path = entry.path();
let Ok(md) = entry.metadata() else { continue };
if !md.is_file() {
continue;
}
let size = md.len();
let mtime = md.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH);
total = total.saturating_add(size);
entries.push((path, size, mtime));
}
}
if total <= max_bytes {
return;
}
entries.sort_by_key(|(_, _, mtime)| *mtime); let mut over = total - max_bytes;
for (path, size, _) in entries {
if over == 0 {
break;
}
if fs::remove_file(&path).is_ok() {
over = over.saturating_sub(size);
}
}
}