use crate::{Finding, SourceModel, SymbolIndex};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
#[derive(Debug, Serialize, Deserialize)]
struct FileEntry {
mtime_secs: u64,
size: u64,
content_hash: u64,
#[serde(default)]
imports: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize)]
struct FindingsEntry {
content_hash: u64,
findings: Vec<Finding>,
}
#[derive(Debug, Serialize, Deserialize, Default)]
struct CacheMeta {
env_hash: u64,
files: HashMap<String, FileEntry>,
}
pub struct ProjectCache {
root: PathBuf,
meta: CacheMeta,
dirty: bool,
mem_models: HashMap<u64, SourceModel>,
mem_symbols: HashMap<u64, SymbolIndex>,
}
fn hash_all_configs(dir: &Path, h: &mut impl std::hash::Hasher) {
use std::hash::Hash;
let cfg = dir.join(".cha.toml");
if let Ok(content) = std::fs::read_to_string(&cfg) {
content.hash(h);
}
let Ok(entries) = std::fs::read_dir(dir) else {
return;
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
let name = entry.file_name();
let s = name.to_string_lossy();
if !s.starts_with('.') && !matches!(s.as_ref(), "target" | "node_modules" | "dist") {
hash_all_configs(&path, h);
}
}
}
}
fn cache_dir(root: &Path) -> PathBuf {
root.join(".cha/cache")
}
fn content_hash(content: &str) -> u64 {
use std::hash::{Hash, Hasher};
let mut h = std::collections::hash_map::DefaultHasher::new();
content.hash(&mut h);
h.finish()
}
fn get_layered<T: serde::de::DeserializeOwned + Clone>(
mem: &mut HashMap<u64, T>,
root: &Path,
subdir: &str,
chash: u64,
) -> Option<T> {
if let Some(v) = mem.get(&chash) {
return Some(v.clone());
}
let path = cache_dir(root)
.join(subdir)
.join(format!("{chash:016x}.bin"));
let bytes = std::fs::read(&path).ok()?;
let val: T = bincode::deserialize(&bytes).ok()?;
mem.insert(chash, val.clone());
Some(val)
}
fn put_layered<T: serde::Serialize + Clone>(
mem: &mut HashMap<u64, T>,
root: &Path,
subdir: &str,
chash: u64,
value: &T,
) {
mem.insert(chash, value.clone());
let dir = cache_dir(root).join(subdir);
let _ = std::fs::create_dir_all(&dir);
if let Ok(bytes) = bincode::serialize(value) {
let _ = std::fs::write(dir.join(format!("{chash:016x}.bin")), bytes);
}
}
fn file_mtime_and_size(path: &Path) -> Option<(u64, u64)> {
let meta = std::fs::metadata(path).ok()?;
let mtime = meta
.modified()
.ok()?
.duration_since(std::time::UNIX_EPOCH)
.ok()?
.as_secs();
Some((mtime, meta.len()))
}
impl ProjectCache {
pub fn open(project_root: &Path, env_hash: u64) -> Self {
let dir = cache_dir(project_root);
let meta_path = dir.join("meta.bin");
let meta = std::fs::read(&meta_path)
.ok()
.and_then(|b| bincode::deserialize::<CacheMeta>(&b).ok())
.unwrap_or_default();
let meta = if meta.env_hash != env_hash {
let _ = std::fs::remove_dir_all(&dir);
CacheMeta {
env_hash,
..Default::default()
}
} else {
meta
};
Self {
root: project_root.to_path_buf(),
meta,
dirty: false,
mem_models: HashMap::new(),
mem_symbols: HashMap::new(),
}
}
pub fn check_file(&self, rel_path: &str, path: &Path) -> FileStatus {
let Some(entry) = self.meta.files.get(rel_path) else {
return FileStatus::Changed;
};
if let Some((mtime, size)) = file_mtime_and_size(path)
&& mtime == entry.mtime_secs
&& size == entry.size
{
return FileStatus::Unchanged(entry.content_hash);
}
FileStatus::Changed
}
pub fn get_model(&mut self, chash: u64) -> Option<SourceModel> {
get_layered(&mut self.mem_models, &self.root, "parse", chash)
}
pub fn put_model(&mut self, chash: u64, model: &SourceModel) {
put_layered(&mut self.mem_models, &self.root, "parse", chash, model);
}
pub fn get_symbols(&mut self, chash: u64) -> Option<SymbolIndex> {
get_layered(&mut self.mem_symbols, &self.root, "symbols", chash)
}
pub fn put_symbols(&mut self, chash: u64, idx: &SymbolIndex) {
put_layered(&mut self.mem_symbols, &self.root, "symbols", chash, idx);
}
pub fn get_findings(&self, chash: u64) -> Option<Vec<Finding>> {
let path = cache_dir(&self.root)
.join("findings")
.join(format!("{chash:016x}.bin"));
let bytes = std::fs::read(&path).ok()?;
let entry: FindingsEntry = bincode::deserialize(&bytes).ok()?;
(entry.content_hash == chash).then_some(entry.findings)
}
pub fn put_findings(&mut self, chash: u64, findings: &[Finding]) {
let dir = cache_dir(&self.root).join("findings");
let _ = std::fs::create_dir_all(&dir);
let entry = FindingsEntry {
content_hash: chash,
findings: findings.to_vec(),
};
if let Ok(bytes) = bincode::serialize(&entry) {
let _ = std::fs::write(dir.join(format!("{chash:016x}.bin")), bytes);
}
}
pub fn update_file_entry(
&mut self,
rel_path: String,
path: &Path,
chash: u64,
imports: Vec<String>,
) {
let (mtime_secs, size) = file_mtime_and_size(path).unwrap_or((0, 0));
self.meta.files.insert(
rel_path,
FileEntry {
mtime_secs,
size,
content_hash: chash,
imports,
},
);
self.dirty = true;
}
pub fn get_imports(&self, rel_path: &str) -> Option<&[String]> {
self.meta.files.get(rel_path).map(|e| e.imports.as_slice())
}
pub fn flush(&self) {
if !self.dirty {
return;
}
let dir = cache_dir(&self.root);
let _ = std::fs::create_dir_all(&dir);
if let Ok(bytes) = bincode::serialize(&self.meta) {
let _ = std::fs::write(dir.join("meta.bin"), bytes);
}
self.gc();
}
fn gc(&self) {
let hashes: std::collections::HashSet<String> = self
.meta
.files
.values()
.map(|e| format!("{:016x}.bin", e.content_hash))
.collect();
for subdir in &["parse", "findings", "symbols"] {
let dir = cache_dir(&self.root).join(subdir);
let Ok(entries) = std::fs::read_dir(&dir) else {
continue;
};
for entry in entries.flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if name.ends_with(".bin") && !hashes.contains(&name) {
let _ = std::fs::remove_file(entry.path());
}
}
}
let legacy = cache_dir(&self.root).join("analysis.json");
let _ = std::fs::remove_file(legacy);
}
}
pub enum FileStatus {
Unchanged(u64),
Changed,
}
pub fn hash_content(s: &str) -> u64 {
content_hash(s)
}
pub fn env_hash(project_root: &Path, plugin_dirs: &[PathBuf]) -> u64 {
use std::hash::{Hash, Hasher};
let mut h = std::collections::hash_map::DefaultHasher::new();
hash_cha_binary(&mut h);
hash_all_configs(project_root, &mut h);
for dir in plugin_dirs {
if let Ok(entries) = std::fs::read_dir(dir) {
for entry in entries.flatten() {
if let Ok(mtime) = entry.metadata().and_then(|m| m.modified()) {
mtime.hash(&mut h);
}
entry.file_name().hash(&mut h);
}
}
}
h.finish()
}
fn hash_cha_binary(h: &mut impl std::hash::Hasher) {
use std::hash::Hash;
match std::env::current_exe().and_then(|p| p.metadata()?.modified()) {
Ok(mtime) => mtime.hash(h),
Err(_) => env!("CARGO_PKG_VERSION").hash(h),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{SourceModel, TypeRef};
use std::path::PathBuf;
fn unique_tmp_dir() -> PathBuf {
let base = std::env::temp_dir().join(format!(
"cha-cache-test-{}-{}",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0),
));
std::fs::create_dir_all(&base).unwrap();
base
}
fn sample_model() -> SourceModel {
SourceModel {
language: "c".into(),
total_lines: 10,
functions: vec![],
classes: vec![],
imports: vec![],
comments: vec![],
type_aliases: vec![
("MyId".into(), "uint32_t".into()),
("Handle".into(), "void*".into()),
],
}
}
#[test]
fn cache_roundtrip_preserves_type_aliases() {
let tmp = unique_tmp_dir();
let mut cache = ProjectCache::open(&tmp, 0xdeadbeef);
let model = sample_model();
let chash: u64 = 0xdead_beef_1234_5678;
cache.update_file_entry("x.c".into(), &tmp.join("nope"), chash, vec![]);
cache.put_model(chash, &model);
let got = cache.get_model(chash).expect("cached model present");
assert_eq!(got.type_aliases, model.type_aliases);
cache.flush();
drop(cache);
let mut fresh = ProjectCache::open(&tmp, 0xdeadbeef);
let from_disk = fresh.get_model(chash).expect("on-disk model present");
assert_eq!(from_disk.type_aliases, model.type_aliases);
}
#[test]
fn cache_roundtrip_preserves_typeref_origin() {
use crate::{FunctionInfo, TypeOrigin};
let tmp = unique_tmp_dir();
let mut cache = ProjectCache::open(&tmp, 0xdeadbeef);
let model = SourceModel {
language: "rust".into(),
total_lines: 5,
functions: vec![FunctionInfo {
name: "f".into(),
parameter_types: vec![TypeRef {
name: "ExtThing".into(),
raw: "ext::ExtThing".into(),
origin: TypeOrigin::External("ext".into()),
}],
..Default::default()
}],
classes: vec![],
imports: vec![],
comments: vec![],
type_aliases: vec![],
};
cache.put_model(99, &model);
let got = cache.get_model(99).unwrap();
let p = &got.functions[0].parameter_types[0];
assert_eq!(p.name, "ExtThing");
assert!(matches!(&p.origin, TypeOrigin::External(m) if m == "ext"));
}
#[test]
fn symbol_index_roundtrip_preserves_classes_and_functions() {
use crate::{ClassSymbol, FunctionSymbol, SymbolIndex};
let tmp = unique_tmp_dir();
let mut cache = ProjectCache::open(&tmp, 0xdeadbeef);
let idx = SymbolIndex {
language: "c".into(),
total_lines: 42,
imports: vec![],
classes: vec![ClassSymbol {
name: "Foo".into(),
parent_name: Some("Base".into()),
is_interface: false,
is_exported: true,
method_count: 3,
has_behavior: true,
field_names: vec!["x".into()],
field_types: vec!["int".into()],
start_line: 10,
end_line: 20,
..Default::default()
}],
functions: vec![FunctionSymbol {
name: "bar".into(),
is_exported: true,
parameter_count: 2,
called_functions: vec!["helper".into(), "log".into()],
start_line: 30,
end_line: 40,
..Default::default()
}],
type_aliases: vec![("Handle".into(), "void*".into())],
};
let chash = 0x1234_abcd_u64;
cache.update_file_entry("t.c".into(), &tmp.join("nope"), chash, vec![]);
cache.put_symbols(chash, &idx);
let got_l1 = cache.get_symbols(chash).expect("L1 hit");
assert_eq!(got_l1.classes[0].name, "Foo");
assert_eq!(got_l1.functions[0].called_functions.len(), 2);
assert_eq!(got_l1.type_aliases[0].0, "Handle");
cache.flush();
drop(cache);
let mut fresh = ProjectCache::open(&tmp, 0xdeadbeef);
let from_disk = fresh.get_symbols(chash).expect("L2 hit");
assert_eq!(from_disk.classes[0].parent_name.as_deref(), Some("Base"));
assert_eq!(from_disk.functions[0].parameter_count, 2);
}
}