1use crate::{Finding, SourceModel, SymbolIndex};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5
6#[derive(Debug, Serialize, Deserialize)]
8struct FileEntry {
9 mtime_secs: u64,
10 size: u64,
11 content_hash: u64,
12 #[serde(default)]
14 imports: Vec<String>,
15}
16
17#[derive(Debug, Serialize, Deserialize)]
19struct FindingsEntry {
20 content_hash: u64,
21 findings: Vec<Finding>,
22}
23
24#[derive(Debug, Serialize, Deserialize, Default)]
26struct CacheMeta {
27 env_hash: u64,
28 files: HashMap<String, FileEntry>,
29}
30
31pub struct ProjectCache {
37 root: PathBuf,
38 meta: CacheMeta,
39 dirty: bool,
40 mem_models: HashMap<u64, SourceModel>,
42 mem_symbols: HashMap<u64, SymbolIndex>,
45}
46
47fn hash_all_configs(dir: &Path, h: &mut impl std::hash::Hasher) {
48 use std::hash::Hash;
49 let cfg = dir.join(".cha.toml");
50 if let Ok(content) = std::fs::read_to_string(&cfg) {
51 content.hash(h);
52 }
53 let Ok(entries) = std::fs::read_dir(dir) else {
54 return;
55 };
56 for entry in entries.flatten() {
57 let path = entry.path();
58 if path.is_dir() {
59 let name = entry.file_name();
60 let s = name.to_string_lossy();
61 if !s.starts_with('.') && !is_skip_dir(&s) {
62 hash_all_configs(&path, h);
63 }
64 }
65 }
66}
67
68fn cache_dir(root: &Path) -> PathBuf {
69 root.join(".cha/cache")
70}
71
72fn is_skip_dir(name: &str) -> bool {
73 matches!(
74 name,
75 "target"
76 | "node_modules"
77 | "dist"
78 | "build"
79 | "out"
80 | "__pycache__"
81 | "venv"
82 | ".venv"
83 | "vendor"
84 )
85}
86
87fn content_hash(content: &str) -> u64 {
88 use std::hash::{Hash, Hasher};
89 let mut h = std::collections::hash_map::DefaultHasher::new();
90 content.hash(&mut h);
91 h.finish()
92}
93
94fn get_layered<T: serde::de::DeserializeOwned + Clone>(
97 mem: &mut HashMap<u64, T>,
98 root: &Path,
99 subdir: &str,
100 chash: u64,
101) -> Option<T> {
102 if let Some(v) = mem.get(&chash) {
103 return Some(v.clone());
104 }
105 let path = cache_dir(root)
106 .join(subdir)
107 .join(format!("{chash:016x}.bin"));
108 let bytes = std::fs::read(&path).ok()?;
109 let val: T = bincode::deserialize(&bytes).ok()?;
110 mem.insert(chash, val.clone());
111 Some(val)
112}
113
114fn put_layered<T: serde::Serialize + Clone>(
117 mem: &mut HashMap<u64, T>,
118 root: &Path,
119 subdir: &str,
120 chash: u64,
121 value: &T,
122) {
123 mem.insert(chash, value.clone());
124 let dir = cache_dir(root).join(subdir);
125 let _ = std::fs::create_dir_all(&dir);
126 if let Ok(bytes) = bincode::serialize(value) {
127 let _ = std::fs::write(dir.join(format!("{chash:016x}.bin")), bytes);
128 }
129}
130
131fn file_mtime_and_size(path: &Path) -> Option<(u64, u64)> {
132 let meta = std::fs::metadata(path).ok()?;
133 let mtime = meta
134 .modified()
135 .ok()?
136 .duration_since(std::time::UNIX_EPOCH)
137 .ok()?
138 .as_secs();
139 Some((mtime, meta.len()))
140}
141
142impl ProjectCache {
143 pub fn open(project_root: &Path, env_hash: u64) -> Self {
145 let dir = cache_dir(project_root);
146 let meta_path = dir.join("meta.bin");
147 let meta = std::fs::read(&meta_path)
148 .ok()
149 .and_then(|b| bincode::deserialize::<CacheMeta>(&b).ok())
150 .unwrap_or_default();
151 let meta = if meta.env_hash != env_hash {
152 let _ = std::fs::remove_dir_all(&dir);
154 CacheMeta {
155 env_hash,
156 ..Default::default()
157 }
158 } else {
159 meta
160 };
161 Self {
162 root: project_root.to_path_buf(),
163 meta,
164 dirty: false,
165 mem_models: HashMap::new(),
166 mem_symbols: HashMap::new(),
167 }
168 }
169
170 pub fn check_file(&self, rel_path: &str, path: &Path) -> FileStatus {
173 let Some(entry) = self.meta.files.get(rel_path) else {
174 return FileStatus::Changed;
175 };
176 if let Some((mtime, size)) = file_mtime_and_size(path)
177 && mtime == entry.mtime_secs
178 && size == entry.size
179 {
180 return FileStatus::Unchanged(entry.content_hash);
181 }
182 FileStatus::Changed
183 }
184
185 pub fn get_model(&mut self, chash: u64) -> Option<SourceModel> {
187 get_layered(&mut self.mem_models, &self.root, "parse", chash)
188 }
189
190 pub fn put_model(&mut self, chash: u64, model: &SourceModel) {
192 put_layered(&mut self.mem_models, &self.root, "parse", chash, model);
193 }
194
195 pub fn get_symbols(&mut self, chash: u64) -> Option<SymbolIndex> {
200 get_layered(&mut self.mem_symbols, &self.root, "symbols", chash)
201 }
202
203 pub fn put_symbols(&mut self, chash: u64, idx: &SymbolIndex) {
206 put_layered(&mut self.mem_symbols, &self.root, "symbols", chash, idx);
207 }
208
209 pub fn get_findings(&self, chash: u64) -> Option<Vec<Finding>> {
211 let path = cache_dir(&self.root)
212 .join("findings")
213 .join(format!("{chash:016x}.bin"));
214 let bytes = std::fs::read(&path).ok()?;
215 let entry: FindingsEntry = bincode::deserialize(&bytes).ok()?;
216 (entry.content_hash == chash).then_some(entry.findings)
217 }
218
219 pub fn put_findings(&mut self, chash: u64, findings: &[Finding]) {
221 let dir = cache_dir(&self.root).join("findings");
222 let _ = std::fs::create_dir_all(&dir);
223 let entry = FindingsEntry {
224 content_hash: chash,
225 findings: findings.to_vec(),
226 };
227 if let Ok(bytes) = bincode::serialize(&entry) {
228 let _ = std::fs::write(dir.join(format!("{chash:016x}.bin")), bytes);
229 }
230 }
231
232 pub fn update_file_entry(
234 &mut self,
235 rel_path: String,
236 path: &Path,
237 chash: u64,
238 imports: Vec<String>,
239 ) {
240 let (mtime_secs, size) = file_mtime_and_size(path).unwrap_or((0, 0));
241 self.meta.files.insert(
242 rel_path,
243 FileEntry {
244 mtime_secs,
245 size,
246 content_hash: chash,
247 imports,
248 },
249 );
250 self.dirty = true;
251 }
252
253 pub fn get_imports(&self, rel_path: &str) -> Option<&[String]> {
255 self.meta.files.get(rel_path).map(|e| e.imports.as_slice())
256 }
257
258 pub fn flush(&self) {
260 if !self.dirty {
261 return;
262 }
263 let dir = cache_dir(&self.root);
264 let _ = std::fs::create_dir_all(&dir);
265 if let Ok(bytes) = bincode::serialize(&self.meta) {
266 let _ = std::fs::write(dir.join("meta.bin"), bytes);
267 }
268 self.gc();
269 }
270
271 fn gc(&self) {
273 let hashes: std::collections::HashSet<String> = self
274 .meta
275 .files
276 .values()
277 .map(|e| format!("{:016x}.bin", e.content_hash))
278 .collect();
279 for subdir in &["parse", "findings", "symbols"] {
280 let dir = cache_dir(&self.root).join(subdir);
281 let Ok(entries) = std::fs::read_dir(&dir) else {
282 continue;
283 };
284 for entry in entries.flatten() {
285 let name = entry.file_name().to_string_lossy().to_string();
286 if name.ends_with(".bin") && !hashes.contains(&name) {
287 let _ = std::fs::remove_file(entry.path());
288 }
289 }
290 }
291 let legacy = cache_dir(&self.root).join("analysis.json");
293 let _ = std::fs::remove_file(legacy);
294 }
295}
296
297pub enum FileStatus {
299 Unchanged(u64),
301 Changed,
303}
304
305pub fn hash_content(s: &str) -> u64 {
307 content_hash(s)
308}
309
310pub fn env_hash(project_root: &Path, plugin_dirs: &[PathBuf]) -> u64 {
325 use std::hash::{Hash, Hasher};
326 let mut h = std::collections::hash_map::DefaultHasher::new();
327 hash_cha_binary(&mut h);
328 hash_all_configs(project_root, &mut h);
329 for dir in plugin_dirs {
330 if let Ok(entries) = std::fs::read_dir(dir) {
331 for entry in entries.flatten() {
332 if let Ok(mtime) = entry.metadata().and_then(|m| m.modified()) {
333 mtime.hash(&mut h);
334 }
335 entry.file_name().hash(&mut h);
336 }
337 }
338 }
339 h.finish()
340}
341
342fn hash_cha_binary(h: &mut impl std::hash::Hasher) {
347 use std::hash::Hash;
348 match std::env::current_exe().and_then(|p| p.metadata()?.modified()) {
349 Ok(mtime) => mtime.hash(h),
350 Err(_) => env!("CARGO_PKG_VERSION").hash(h),
351 }
352}
353
354#[cfg(test)]
355mod tests {
356 use super::*;
357 use crate::{SourceModel, TypeRef};
358 use std::path::PathBuf;
359
360 fn unique_tmp_dir() -> PathBuf {
361 use std::sync::atomic::{AtomicU64, Ordering};
362 static SEQ: AtomicU64 = AtomicU64::new(0);
365 let base = std::env::temp_dir().join(format!(
366 "cha-cache-test-{}-{}-{}",
367 std::process::id(),
368 std::time::SystemTime::now()
369 .duration_since(std::time::UNIX_EPOCH)
370 .map(|d| d.as_nanos())
371 .unwrap_or(0),
372 SEQ.fetch_add(1, Ordering::Relaxed),
373 ));
374 std::fs::create_dir_all(&base).unwrap();
375 base
376 }
377
378 fn sample_model() -> SourceModel {
379 SourceModel {
380 language: "c".into(),
381 total_lines: 10,
382 functions: vec![],
383 classes: vec![],
384 imports: vec![],
385 comments: vec![],
386 type_aliases: vec![
387 ("MyId".into(), "uint32_t".into()),
388 ("Handle".into(), "void*".into()),
389 ],
390 }
391 }
392
393 #[test]
398 fn cache_roundtrip_preserves_type_aliases() {
399 let tmp = unique_tmp_dir();
400 let mut cache = ProjectCache::open(&tmp, 0xdeadbeef);
401 let model = sample_model();
402 let chash: u64 = 0xdead_beef_1234_5678;
403 cache.update_file_entry("x.c".into(), &tmp.join("nope"), chash, vec![]);
405 cache.put_model(chash, &model);
406 let got = cache.get_model(chash).expect("cached model present");
407 assert_eq!(got.type_aliases, model.type_aliases);
408 cache.flush();
411 drop(cache);
412 let mut fresh = ProjectCache::open(&tmp, 0xdeadbeef);
413 let from_disk = fresh.get_model(chash).expect("on-disk model present");
414 assert_eq!(from_disk.type_aliases, model.type_aliases);
415 }
416
417 #[test]
421 fn cache_roundtrip_preserves_typeref_origin() {
422 use crate::{FunctionInfo, TypeOrigin};
423 let tmp = unique_tmp_dir();
424 let mut cache = ProjectCache::open(&tmp, 0xdeadbeef);
425 let model = SourceModel {
426 language: "rust".into(),
427 total_lines: 5,
428 functions: vec![FunctionInfo {
429 name: "f".into(),
430 parameter_types: vec![TypeRef {
431 name: "ExtThing".into(),
432 raw: "ext::ExtThing".into(),
433 origin: TypeOrigin::External("ext".into()),
434 }],
435 ..Default::default()
436 }],
437 classes: vec![],
438 imports: vec![],
439 comments: vec![],
440 type_aliases: vec![],
441 };
442 cache.put_model(99, &model);
443 let got = cache.get_model(99).unwrap();
444 let p = &got.functions[0].parameter_types[0];
445 assert_eq!(p.name, "ExtThing");
446 assert!(matches!(&p.origin, TypeOrigin::External(m) if m == "ext"));
447 }
448
449 #[test]
454 fn symbol_index_roundtrip_preserves_classes_and_functions() {
455 use crate::{ClassSymbol, FunctionSymbol, SymbolIndex};
456 let tmp = unique_tmp_dir();
457 let mut cache = ProjectCache::open(&tmp, 0xdeadbeef);
458 let idx = SymbolIndex {
459 language: "c".into(),
460 total_lines: 42,
461 imports: vec![],
462 classes: vec![ClassSymbol {
463 name: "Foo".into(),
464 parent_name: Some("Base".into()),
465 is_interface: false,
466 is_exported: true,
467 method_count: 3,
468 has_behavior: true,
469 field_names: vec!["x".into()],
470 field_types: vec!["int".into()],
471 start_line: 10,
472 end_line: 20,
473 ..Default::default()
474 }],
475 functions: vec![FunctionSymbol {
476 name: "bar".into(),
477 is_exported: true,
478 parameter_count: 2,
479 called_functions: vec!["helper".into(), "log".into()],
480 start_line: 30,
481 end_line: 40,
482 ..Default::default()
483 }],
484 type_aliases: vec![("Handle".into(), "void*".into())],
485 };
486 let chash = 0x1234_abcd_u64;
487 cache.update_file_entry("t.c".into(), &tmp.join("nope"), chash, vec![]);
488 cache.put_symbols(chash, &idx);
489 let got_l1 = cache.get_symbols(chash).expect("L1 hit");
490 assert_eq!(got_l1.classes[0].name, "Foo");
491 assert_eq!(got_l1.functions[0].called_functions.len(), 2);
492 assert_eq!(got_l1.type_aliases[0].0, "Handle");
493 cache.flush();
495 drop(cache);
496 let mut fresh = ProjectCache::open(&tmp, 0xdeadbeef);
497 let from_disk = fresh.get_symbols(chash).expect("L2 hit");
498 assert_eq!(from_disk.classes[0].parent_name.as_deref(), Some("Base"));
499 assert_eq!(from_disk.functions[0].parameter_count, 2);
500 }
501}