use std::collections::HashMap;
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use super::bm25::Bm25Index;
use super::hasher::FileHashCache;
use super::parser::Symbol;
use super::ranker::SymbolRanker;
pub type CachedRepoMap = (
HashMap<PathBuf, Vec<Symbol>>,
FileHashCache,
Bm25Index,
SymbolRanker,
);
const CACHE_VERSION: u32 = 1;
#[derive(Serialize, Deserialize)]
struct CacheMeta {
version: u32,
written_at: u64,
file_count: usize,
symbol_count: usize,
}
#[derive(Serialize, Deserialize)]
struct DiskDocument {
rel_path: String,
tf: HashMap<String, u32>,
token_count: u32,
}
#[derive(Serialize, Deserialize)]
struct DiskBm25 {
documents: HashMap<String, DiskDocument>, doc_freq: HashMap<String, u32>,
total_tokens: u64,
}
#[derive(Serialize, Deserialize)]
struct DiskRanker {
reference_counts: HashMap<String, u32>,
}
pub fn cache_dir_for(project_root: &Path) -> Option<PathBuf> {
let home = dirs::home_dir()?;
let key = blake3::hash(project_root.to_string_lossy().as_bytes());
let short_hash = &key.to_hex()[..16];
Some(home.join(".collet").join("cache").join(short_hash))
}
pub fn save(
project_root: &Path,
symbols: &HashMap<PathBuf, Vec<Symbol>>,
hash_cache: &FileHashCache,
bm25: &Bm25Index,
ranker: &SymbolRanker,
) -> anyhow::Result<()> {
let dir = cache_dir_for(project_root).ok_or_else(|| anyhow::anyhow!("no home dir"))?;
std::fs::create_dir_all(&dir)?;
let meta = CacheMeta {
version: CACHE_VERSION,
written_at: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs(),
file_count: symbols.len(),
symbol_count: symbols.values().map(|v| v.len()).sum(),
};
atomic_write(&dir.join("meta.json"), &serde_json::to_vec(&meta)?)?;
let rel_symbols: HashMap<String, &Vec<Symbol>> = symbols
.iter()
.map(|(path, syms)| {
let rel = path
.strip_prefix(project_root)
.unwrap_or(path)
.to_string_lossy()
.to_string();
(rel, syms)
})
.collect();
atomic_write(
&dir.join("symbols.json"),
&serde_json::to_vec(&rel_symbols)?,
)?;
atomic_write(&dir.join("hash_cache.json"), &hash_cache.to_json()?)?;
let disk_bm25 = bm25_to_disk(bm25, project_root);
atomic_write(&dir.join("bm25.json"), &serde_json::to_vec(&disk_bm25)?)?;
let disk_ranker = ranker_to_disk(ranker);
atomic_write(&dir.join("ranker.json"), &serde_json::to_vec(&disk_ranker)?)?;
tracing::info!(
files = meta.file_count,
symbols = meta.symbol_count,
dir = %dir.display(),
"RepoMap disk cache saved",
);
Ok(())
}
pub fn load(project_root: &Path) -> Option<CachedRepoMap> {
let dir = cache_dir_for(project_root)?;
if !dir.exists() {
return None;
}
let meta_bytes = std::fs::read(dir.join("meta.json")).ok()?;
let meta: CacheMeta = serde_json::from_slice(&meta_bytes).ok()?;
if meta.version != CACHE_VERSION {
tracing::info!(
disk = meta.version,
current = CACHE_VERSION,
"RepoMap cache version mismatch — rebuilding",
);
return None;
}
let sym_bytes = std::fs::read(dir.join("symbols.json")).ok()?;
let rel_symbols: HashMap<String, Vec<Symbol>> = serde_json::from_slice(&sym_bytes).ok()?;
let symbols: HashMap<PathBuf, Vec<Symbol>> = rel_symbols
.into_iter()
.map(|(rel, syms)| (project_root.join(&rel), syms))
.collect();
let hc_bytes = std::fs::read(dir.join("hash_cache.json")).ok()?;
let hash_cache = FileHashCache::from_json(&hc_bytes)?;
let bm25_bytes = std::fs::read(dir.join("bm25.json")).ok()?;
let disk_bm25: DiskBm25 = serde_json::from_slice(&bm25_bytes).ok()?;
let bm25 = bm25_from_disk(disk_bm25, project_root);
let ranker_bytes = std::fs::read(dir.join("ranker.json")).ok()?;
let disk_ranker: DiskRanker = serde_json::from_slice(&ranker_bytes).ok()?;
let ranker = ranker_from_disk(disk_ranker);
tracing::info!(
files = symbols.len(),
symbols = symbols.values().map(|v| v.len()).sum::<usize>(),
age_secs = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
.saturating_sub(meta.written_at),
"RepoMap disk cache loaded",
);
Some((symbols, hash_cache, bm25, ranker))
}
fn bm25_to_disk(bm25: &Bm25Index, root: &Path) -> DiskBm25 {
let documents: HashMap<String, DiskDocument> = bm25
.documents
.iter()
.map(|(abs_path, doc)| {
let rel = abs_path
.strip_prefix(root)
.unwrap_or(abs_path)
.to_string_lossy()
.to_string();
(
rel,
DiskDocument {
rel_path: doc.rel_path.clone(),
tf: doc.tf.clone(),
token_count: doc.token_count,
},
)
})
.collect();
DiskBm25 {
documents,
doc_freq: bm25.doc_freq.clone(),
total_tokens: bm25.total_tokens,
}
}
fn bm25_from_disk(disk: DiskBm25, root: &Path) -> Bm25Index {
let documents = disk
.documents
.into_iter()
.map(|(rel, doc)| {
let abs_path = root.join(&rel);
(
abs_path.clone(),
super::bm25::Document {
rel_path: doc.rel_path,
abs_path,
tf: doc.tf,
token_count: doc.token_count,
},
)
})
.collect();
Bm25Index {
documents,
doc_freq: disk.doc_freq,
total_tokens: disk.total_tokens,
root: root.to_path_buf(),
}
}
fn ranker_to_disk(ranker: &SymbolRanker) -> DiskRanker {
DiskRanker {
reference_counts: ranker.reference_counts().clone(),
}
}
fn ranker_from_disk(disk: DiskRanker) -> SymbolRanker {
SymbolRanker::from_reference_counts(disk.reference_counts)
}
fn atomic_write(path: &Path, data: &[u8]) -> anyhow::Result<()> {
let tmp = path.with_extension("tmp");
std::fs::write(&tmp, data)?;
std::fs::rename(&tmp, path)?;
Ok(())
}