Skip to main content

cha_core/
cache.rs

1use crate::{Finding, SourceModel};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5
6/// Per-file cache metadata.
7#[derive(Debug, Serialize, Deserialize)]
8struct FileEntry {
9    mtime_secs: u64,
10    size: u64,
11    content_hash: u64,
12    /// Cached import sources for fast unstable_dependency analysis.
13    #[serde(default)]
14    imports: Vec<String>,
15}
16
17/// Per-file findings cache entry.
18#[derive(Debug, Serialize, Deserialize)]
19struct FindingsEntry {
20    content_hash: u64,
21    findings: Vec<Finding>,
22}
23
24/// On-disk cache metadata.
25#[derive(Debug, Serialize, Deserialize, Default)]
26struct CacheMeta {
27    env_hash: u64,
28    files: HashMap<String, FileEntry>,
29}
30
31/// Unified project cache: parse results + findings.
32pub struct ProjectCache {
33    root: PathBuf,
34    meta: CacheMeta,
35    dirty: bool,
36    /// L1 in-memory parse cache (avoids repeated disk reads within same process).
37    mem_models: HashMap<u64, SourceModel>,
38}
39
40fn hash_all_configs(dir: &Path, h: &mut impl std::hash::Hasher) {
41    use std::hash::Hash;
42    let cfg = dir.join(".cha.toml");
43    if let Ok(content) = std::fs::read_to_string(&cfg) {
44        content.hash(h);
45    }
46    let Ok(entries) = std::fs::read_dir(dir) else {
47        return;
48    };
49    for entry in entries.flatten() {
50        let path = entry.path();
51        if path.is_dir() {
52            let name = entry.file_name();
53            let s = name.to_string_lossy();
54            if !s.starts_with('.') && !matches!(s.as_ref(), "target" | "node_modules" | "dist") {
55                hash_all_configs(&path, h);
56            }
57        }
58    }
59}
60
61fn cache_dir(root: &Path) -> PathBuf {
62    root.join(".cha/cache")
63}
64
65fn content_hash(content: &str) -> u64 {
66    use std::hash::{Hash, Hasher};
67    let mut h = std::collections::hash_map::DefaultHasher::new();
68    content.hash(&mut h);
69    h.finish()
70}
71
72fn file_mtime_and_size(path: &Path) -> Option<(u64, u64)> {
73    let meta = std::fs::metadata(path).ok()?;
74    let mtime = meta
75        .modified()
76        .ok()?
77        .duration_since(std::time::UNIX_EPOCH)
78        .ok()?
79        .as_secs();
80    Some((mtime, meta.len()))
81}
82
83impl ProjectCache {
84    /// Open or create a cache for the given project root.
85    pub fn open(project_root: &Path, env_hash: u64) -> Self {
86        let dir = cache_dir(project_root);
87        let meta_path = dir.join("meta.bin");
88        let meta = std::fs::read(&meta_path)
89            .ok()
90            .and_then(|b| bincode::deserialize::<CacheMeta>(&b).ok())
91            .unwrap_or_default();
92        let meta = if meta.env_hash != env_hash {
93            // Environment changed — full invalidation
94            let _ = std::fs::remove_dir_all(&dir);
95            CacheMeta {
96                env_hash,
97                ..Default::default()
98            }
99        } else {
100            meta
101        };
102        Self {
103            root: project_root.to_path_buf(),
104            meta,
105            dirty: false,
106            mem_models: HashMap::new(),
107        }
108    }
109
110    /// Check if a file is unchanged (mtime + size match).
111    /// Returns (is_unchanged, content_hash) — hash is 0 if unchanged and not yet computed.
112    pub fn check_file(&self, rel_path: &str, path: &Path) -> FileStatus {
113        let Some(entry) = self.meta.files.get(rel_path) else {
114            return FileStatus::Changed;
115        };
116        if let Some((mtime, size)) = file_mtime_and_size(path)
117            && mtime == entry.mtime_secs
118            && size == entry.size
119        {
120            return FileStatus::Unchanged(entry.content_hash);
121        }
122        FileStatus::Changed
123    }
124
125    /// Get cached SourceModel: L1 memory → L2 disk.
126    pub fn get_model(&mut self, chash: u64) -> Option<SourceModel> {
127        if let Some(m) = self.mem_models.get(&chash) {
128            return Some(m.clone());
129        }
130        let path = cache_dir(&self.root)
131            .join("parse")
132            .join(format!("{chash:016x}.bin"));
133        let bytes = std::fs::read(&path).ok()?;
134        let model: SourceModel = bincode::deserialize(&bytes).ok()?;
135        self.mem_models.insert(chash, model.clone());
136        Some(model)
137    }
138
139    /// Store a SourceModel in L1 + L2.
140    pub fn put_model(&mut self, chash: u64, model: &SourceModel) {
141        self.mem_models.insert(chash, model.clone());
142        let dir = cache_dir(&self.root).join("parse");
143        let _ = std::fs::create_dir_all(&dir);
144        if let Ok(bytes) = bincode::serialize(model) {
145            let _ = std::fs::write(dir.join(format!("{chash:016x}.bin")), bytes);
146        }
147    }
148
149    /// Get cached findings for a file.
150    pub fn get_findings(&self, chash: u64) -> Option<Vec<Finding>> {
151        let path = cache_dir(&self.root)
152            .join("findings")
153            .join(format!("{chash:016x}.bin"));
154        let bytes = std::fs::read(&path).ok()?;
155        let entry: FindingsEntry = bincode::deserialize(&bytes).ok()?;
156        (entry.content_hash == chash).then_some(entry.findings)
157    }
158
159    /// Store findings for a file.
160    pub fn put_findings(&mut self, chash: u64, findings: &[Finding]) {
161        let dir = cache_dir(&self.root).join("findings");
162        let _ = std::fs::create_dir_all(&dir);
163        let entry = FindingsEntry {
164            content_hash: chash,
165            findings: findings.to_vec(),
166        };
167        if let Ok(bytes) = bincode::serialize(&entry) {
168            let _ = std::fs::write(dir.join(format!("{chash:016x}.bin")), bytes);
169        }
170    }
171
172    /// Update file metadata after processing.
173    pub fn update_file_entry(
174        &mut self,
175        rel_path: String,
176        path: &Path,
177        chash: u64,
178        imports: Vec<String>,
179    ) {
180        let (mtime_secs, size) = file_mtime_and_size(path).unwrap_or((0, 0));
181        self.meta.files.insert(
182            rel_path,
183            FileEntry {
184                mtime_secs,
185                size,
186                content_hash: chash,
187                imports,
188            },
189        );
190        self.dirty = true;
191    }
192
193    /// Get cached imports for a file (from meta, no disk I/O).
194    pub fn get_imports(&self, rel_path: &str) -> Option<&[String]> {
195        self.meta.files.get(rel_path).map(|e| e.imports.as_slice())
196    }
197
198    /// Flush metadata to disk and clean up orphan cache files.
199    pub fn flush(&self) {
200        if !self.dirty {
201            return;
202        }
203        let dir = cache_dir(&self.root);
204        let _ = std::fs::create_dir_all(&dir);
205        if let Ok(bytes) = bincode::serialize(&self.meta) {
206            let _ = std::fs::write(dir.join("meta.bin"), bytes);
207        }
208        self.gc();
209    }
210
211    /// Remove orphan cache files not referenced by meta.
212    fn gc(&self) {
213        let hashes: std::collections::HashSet<String> = self
214            .meta
215            .files
216            .values()
217            .map(|e| format!("{:016x}.bin", e.content_hash))
218            .collect();
219        for subdir in &["parse", "findings"] {
220            let dir = cache_dir(&self.root).join(subdir);
221            let Ok(entries) = std::fs::read_dir(&dir) else {
222                continue;
223            };
224            for entry in entries.flatten() {
225                let name = entry.file_name().to_string_lossy().to_string();
226                if name.ends_with(".bin") && !hashes.contains(&name) {
227                    let _ = std::fs::remove_file(entry.path());
228                }
229            }
230        }
231        // Remove legacy analysis.json
232        let legacy = cache_dir(&self.root).join("analysis.json");
233        let _ = std::fs::remove_file(legacy);
234    }
235}
236
237/// Result of checking a file against cache.
238pub enum FileStatus {
239    /// File unchanged — content hash from cache.
240    Unchanged(u64),
241    /// File changed or not in cache.
242    Changed,
243}
244
245/// Compute a content hash.
246pub fn hash_content(s: &str) -> u64 {
247    content_hash(s)
248}
249
250/// Compute environment hash from config + plugins + cha version.
251pub fn env_hash(project_root: &Path, plugin_dirs: &[PathBuf]) -> u64 {
252    use std::hash::{Hash, Hasher};
253    let mut h = std::collections::hash_map::DefaultHasher::new();
254    env!("CARGO_PKG_VERSION").hash(&mut h);
255    hash_all_configs(project_root, &mut h);
256    for dir in plugin_dirs {
257        if let Ok(entries) = std::fs::read_dir(dir) {
258            for entry in entries.flatten() {
259                if let Ok(mtime) = entry.metadata().and_then(|m| m.modified()) {
260                    mtime.hash(&mut h);
261                }
262                entry.file_name().hash(&mut h);
263            }
264        }
265    }
266    h.finish()
267}