use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::OnceLock;
use mir_codebase::{FileId, FileIdMap};
use parking_lot::Mutex;
use serde::{Deserialize, Serialize};
use mir_issues::Issue;
pub type CacheHit = (Vec<Issue>, Vec<(String, u32, u16, u16)>);
pub fn hash_content(content: &str) -> String {
blake3::hash(content.as_bytes()).to_hex().to_string()
}
fn build_fingerprint() -> u64 {
static FP: OnceLock<u64> = OnceLock::new();
*FP.get_or_init(|| {
let exe_bytes = std::env::current_exe().and_then(std::fs::read).ok();
compute_build_fingerprint(exe_bytes.as_deref())
})
}
fn compute_build_fingerprint(exe_bytes: Option<&[u8]>) -> u64 {
let mut hasher = blake3::Hasher::new();
hasher.update(env!("CARGO_PKG_VERSION").as_bytes());
hasher.update(&[0]);
match exe_bytes {
Some(bytes) => {
hasher.update(bytes);
}
None => {
for (path, content) in crate::stubs::stub_files() {
hasher.update(path.as_bytes());
hasher.update(&[0]);
hasher.update(content.as_bytes());
hasher.update(&[0]);
}
}
}
let bytes = hasher.finalize();
u64::from_le_bytes(bytes.as_bytes()[..8].try_into().unwrap())
}
fn cache_epoch(php_version: u8, user_stub_fp: u64) -> u64 {
let mut hasher = blake3::Hasher::new();
hasher.update(&build_fingerprint().to_le_bytes());
hasher.update(&[php_version]);
hasher.update(&user_stub_fp.to_le_bytes());
let bytes = hasher.finalize();
u64::from_le_bytes(bytes.as_bytes()[..8].try_into().unwrap())
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct CacheEntry {
content_hash: String,
issues: Vec<Issue>,
#[serde(default)]
reference_locations: Vec<(String, u32, u16, u16)>,
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct CacheFile {
#[serde(default)]
version: u64,
#[serde(default)]
entries: HashMap<String, CacheEntry>,
#[serde(default)]
reverse_deps: HashMap<String, HashSet<String>>,
}
#[derive(Serialize)]
struct CacheFileView<'a> {
version: u64,
entries: &'a HashMap<String, CacheEntry>,
reverse_deps: &'a HashMap<String, HashSet<String>>,
}
pub struct AnalysisCache {
cache_dir: PathBuf,
file_id_map: Mutex<FileIdMap>,
entries: Mutex<HashMap<FileId, CacheEntry>>,
reverse_deps: Mutex<HashMap<FileId, HashSet<FileId>>>,
epoch: u64,
dirty: AtomicBool,
}
impl AnalysisCache {
pub fn open(cache_dir: &Path, php_version: u8, user_stub_fp: u64) -> Self {
std::fs::create_dir_all(cache_dir).ok();
let epoch = cache_epoch(php_version, user_stub_fp);
let disk = Self::load(cache_dir, epoch);
let mut id_map = FileIdMap::new();
let entries: HashMap<FileId, CacheEntry> = disk
.entries
.into_iter()
.map(|(path, entry)| (id_map.assign_or_get(&path), entry))
.collect();
let reverse_deps: HashMap<FileId, HashSet<FileId>> = disk
.reverse_deps
.into_iter()
.map(|(path, dep_paths)| {
let id = id_map.assign_or_get(&path);
let dep_ids = dep_paths.iter().map(|p| id_map.assign_or_get(p)).collect();
(id, dep_ids)
})
.collect();
Self {
cache_dir: cache_dir.to_path_buf(),
file_id_map: Mutex::new(id_map),
entries: Mutex::new(entries),
reverse_deps: Mutex::new(reverse_deps),
epoch,
dirty: AtomicBool::new(false),
}
}
pub fn open_default(project_root: &Path, php_version: u8, user_stub_fp: u64) -> Self {
Self::open(&project_root.join(".mir-cache"), php_version, user_stub_fp)
}
pub fn cache_dir(&self) -> &Path {
&self.cache_dir
}
pub fn get(&self, file_path: &str, content_hash: &str) -> Option<CacheHit> {
let id = self.file_id_map.lock().get(file_path)?;
let entries = self.entries.lock();
entries.get(&id).and_then(|e| {
if e.content_hash == content_hash {
Some((e.issues.clone(), e.reference_locations.clone()))
} else {
None
}
})
}
pub fn cached_files(&self) -> Vec<String> {
let id_map = self.file_id_map.lock();
let entries = self.entries.lock();
entries
.keys()
.filter_map(|&id| id_map.path(id).map(|p| p.to_string()))
.collect()
}
pub fn put(
&self,
file_path: &str,
content_hash: String,
issues: Vec<Issue>,
reference_locations: Vec<(String, u32, u16, u16)>,
) {
let id = self.file_id_map.lock().assign_or_get(file_path);
let mut entries = self.entries.lock();
entries.insert(
id,
CacheEntry {
content_hash,
issues,
reference_locations,
},
);
self.dirty.store(true, Ordering::Relaxed);
}
pub fn flush(&self) {
let was_dirty = self.dirty.swap(false, Ordering::Relaxed);
if !was_dirty {
return;
}
let cache_file = self.cache_dir.join("cache.bin");
let id_map = self.file_id_map.lock();
let entries_guard = self.entries.lock();
let deps_guard = self.reverse_deps.lock();
let entries: HashMap<String, CacheEntry> = entries_guard
.iter()
.filter_map(|(&id, entry)| id_map.path(id).map(|p| (p.to_string(), entry.clone())))
.collect();
let reverse_deps: HashMap<String, HashSet<String>> = deps_guard
.iter()
.filter_map(|(&id, dep_ids)| {
let path = id_map.path(id)?;
let dep_paths: HashSet<String> = dep_ids
.iter()
.filter_map(|&dep_id| id_map.path(dep_id))
.map(|s| s.to_string())
.collect();
Some((path.to_string(), dep_paths))
})
.collect();
let view = CacheFileView {
version: self.epoch,
entries: &entries,
reverse_deps: &reverse_deps,
};
if let Ok(bytes) = bincode::serialize(&view) {
std::fs::write(cache_file, bytes).ok();
}
}
pub fn set_reverse_deps(&self, deps: HashMap<String, HashSet<String>>) {
let mut id_map = self.file_id_map.lock();
let converted: HashMap<FileId, HashSet<FileId>> = deps
.into_iter()
.map(|(path, dep_paths)| {
let id = id_map.assign_or_get(&path);
let dep_ids = dep_paths.iter().map(|p| id_map.assign_or_get(p)).collect();
(id, dep_ids)
})
.collect();
drop(id_map);
*self.reverse_deps.lock() = converted;
self.dirty.store(true, Ordering::Relaxed);
}
pub fn update_reverse_deps_for_file(&self, file: &str, new_targets: &HashSet<String>) {
let file_id = self.file_id_map.lock().assign_or_get(file);
let target_ids: Vec<FileId> = {
let mut id_map = self.file_id_map.lock();
new_targets
.iter()
.map(|t| id_map.assign_or_get(t))
.collect()
};
let mut deps = self.reverse_deps.lock();
for dependents in deps.values_mut() {
dependents.remove(&file_id);
}
deps.retain(|_, dependents| !dependents.is_empty());
for target_id in target_ids {
if target_id != file_id {
deps.entry(target_id).or_default().insert(file_id);
}
}
self.dirty.store(true, Ordering::Relaxed);
}
pub fn evict_with_dependents(&self, changed_files: &[String]) -> usize {
let seed_ids: Vec<FileId> = {
let id_map = self.file_id_map.lock();
changed_files.iter().filter_map(|p| id_map.get(p)).collect()
};
if seed_ids.is_empty() {
return 0;
}
let to_evict: Vec<FileId> = {
let deps = self.reverse_deps.lock();
let mut visited: HashSet<FileId> = seed_ids.iter().copied().collect();
let mut queue: std::collections::VecDeque<FileId> = seed_ids.iter().copied().collect();
let mut result = Vec::new();
while let Some(id) = queue.pop_front() {
if let Some(dependents) = deps.get(&id) {
for &dep_id in dependents {
if visited.insert(dep_id) {
queue.push_back(dep_id);
result.push(dep_id);
}
}
}
}
result
};
let count = to_evict.len();
let mut entries = self.entries.lock();
for id in &to_evict {
entries.remove(id);
}
if count > 0 {
self.dirty.store(true, Ordering::Relaxed);
}
count
}
pub fn evict(&self, file_path: &str) {
let Some(id) = self.file_id_map.lock().get(file_path) else {
return;
};
let mut entries = self.entries.lock();
if entries.remove(&id).is_some() {
self.dirty.store(true, Ordering::Relaxed);
}
}
fn load(cache_dir: &Path, epoch: u64) -> CacheFile {
let fresh = |file: CacheFile| {
if file.version == epoch {
file
} else {
CacheFile::default()
}
};
if let Ok(bytes) = std::fs::read(cache_dir.join("cache.bin")) {
if let Ok(file) = bincode::deserialize::<CacheFile>(&bytes) {
return fresh(file);
}
}
if let Ok(bytes) = std::fs::read(cache_dir.join("cache.json")) {
if let Ok(file) = serde_json::from_slice::<CacheFile>(&bytes) {
return fresh(file);
}
}
CacheFile::default()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
const TEST_PHP_V: u8 = 0;
fn make_cache(dir: &TempDir) -> AnalysisCache {
AnalysisCache::open(dir.path(), TEST_PHP_V, 0)
}
fn seed(cache: &AnalysisCache, file: &str) {
cache.put(file, "hash".to_string(), vec![], vec![]);
}
#[test]
fn evict_with_dependents_linear_chain() {
let dir = TempDir::new().unwrap();
let cache = make_cache(&dir);
seed(&cache, "A");
seed(&cache, "B");
seed(&cache, "C");
let mut deps: HashMap<String, HashSet<String>> = HashMap::default();
deps.entry("A".into()).or_default().insert("B".into());
deps.entry("B".into()).or_default().insert("C".into());
cache.set_reverse_deps(deps);
let evicted = cache.evict_with_dependents(&["A".to_string()]);
assert_eq!(evicted, 2, "B and C should be evicted");
assert!(cache.get("A", "hash").is_some(), "A itself is not evicted");
assert!(cache.get("B", "hash").is_none(), "B should be evicted");
assert!(cache.get("C", "hash").is_none(), "C should be evicted");
}
#[test]
fn evict_with_dependents_diamond() {
let dir = TempDir::new().unwrap();
let cache = make_cache(&dir);
seed(&cache, "A");
seed(&cache, "B");
seed(&cache, "C");
seed(&cache, "D");
let mut deps: HashMap<String, HashSet<String>> = HashMap::default();
deps.entry("A".into()).or_default().insert("B".into());
deps.entry("A".into()).or_default().insert("C".into());
deps.entry("B".into()).or_default().insert("D".into());
deps.entry("C".into()).or_default().insert("D".into());
cache.set_reverse_deps(deps);
let evicted = cache.evict_with_dependents(&["A".to_string()]);
assert_eq!(evicted, 3, "B, C, D each evicted once");
assert!(cache.get("D", "hash").is_none());
}
#[test]
fn evict_with_dependents_cycle_safety() {
let dir = TempDir::new().unwrap();
let cache = make_cache(&dir);
seed(&cache, "A");
seed(&cache, "B");
let mut deps: HashMap<String, HashSet<String>> = HashMap::default();
deps.entry("A".into()).or_default().insert("B".into());
deps.entry("B".into()).or_default().insert("A".into());
cache.set_reverse_deps(deps);
let evicted = cache.evict_with_dependents(&["A".to_string()]);
assert_eq!(evicted, 1);
assert!(cache.get("B", "hash").is_none());
}
#[test]
fn evict_with_dependents_unrelated_file_untouched() {
let dir = TempDir::new().unwrap();
let cache = make_cache(&dir);
seed(&cache, "A");
seed(&cache, "B");
seed(&cache, "C");
let mut deps: HashMap<String, HashSet<String>> = HashMap::default();
deps.entry("A".into()).or_default().insert("B".into());
cache.set_reverse_deps(deps);
let evicted = cache.evict_with_dependents(&["C".to_string()]);
assert_eq!(evicted, 0);
assert!(
cache.get("B", "hash").is_some(),
"B unrelated, should survive"
);
}
#[test]
fn cache_entry_without_reference_locations_deserializes_to_empty() {
let dir = TempDir::new().unwrap();
let cache_file = dir.path().join("cache.json");
let json = format!(
r#"{{"version":{},"entries":{{"a.php":{{"content_hash":"abc","issues":[]}}}},"reverse_deps":{{}}}}"#,
cache_epoch(TEST_PHP_V, 0)
);
std::fs::write(&cache_file, json).unwrap();
let cache = AnalysisCache::open(dir.path(), TEST_PHP_V, 0);
let hit = cache
.get("a.php", "abc")
.expect("same-epoch cache entry should deserialize successfully");
assert!(hit.0.is_empty(), "no issues");
assert!(
hit.1.is_empty(),
"reference_locations should default to empty vec, not fail"
);
}
#[test]
fn entries_survive_reopen_with_matching_epoch() {
let dir = TempDir::new().unwrap();
{
let cache = make_cache(&dir);
cache.put("a.php", "h1".to_string(), vec![], vec![]);
cache.flush();
}
let cache = AnalysisCache::open(dir.path(), TEST_PHP_V, 0);
assert!(
cache.get("a.php", "h1").is_some(),
"entry written by the same build/stub set must survive a reopen"
);
}
#[test]
fn stale_epoch_discards_entire_cache() {
let dir = TempDir::new().unwrap();
let mut entries: HashMap<String, CacheEntry> = HashMap::default();
entries.insert(
"a.php".to_string(),
CacheEntry {
content_hash: "h1".to_string(),
issues: vec![],
reference_locations: vec![],
},
);
let reverse_deps: HashMap<String, HashSet<String>> = HashMap::default();
let view = CacheFileView {
version: cache_epoch(TEST_PHP_V, 0).wrapping_add(1), entries: &entries,
reverse_deps: &reverse_deps,
};
std::fs::write(
dir.path().join("cache.bin"),
bincode::serialize(&view).unwrap(),
)
.unwrap();
let cache = AnalysisCache::open(dir.path(), TEST_PHP_V, 0);
assert!(
cache.get("a.php", "h1").is_none(),
"entry from a mismatched epoch must not be served despite a matching content hash"
);
}
#[test]
fn switching_php_version_discards_cache() {
let dir = TempDir::new().unwrap();
{
let cache = AnalysisCache::open(dir.path(), 74, 0); cache.put("a.php", "h1".to_string(), vec![], vec![]);
cache.flush();
}
let same = AnalysisCache::open(dir.path(), 74, 0);
assert!(
same.get("a.php", "h1").is_some(),
"same PHP version must reuse the cache"
);
let other = AnalysisCache::open(dir.path(), 80, 0); assert!(
other.get("a.php", "h1").is_none(),
"a different PHP version must discard the cache, not serve stale results"
);
}
#[test]
fn changing_user_stub_fingerprint_discards_cache() {
let dir = TempDir::new().unwrap();
{
let cache = AnalysisCache::open(dir.path(), TEST_PHP_V, 0xAAAA);
cache.put("a.php", "h1".to_string(), vec![], vec![]);
cache.flush();
}
let same = AnalysisCache::open(dir.path(), TEST_PHP_V, 0xAAAA);
assert!(
same.get("a.php", "h1").is_some(),
"identical user-stub fingerprint must reuse the cache"
);
let changed = AnalysisCache::open(dir.path(), TEST_PHP_V, 0xBBBB);
assert!(
changed.get("a.php", "h1").is_none(),
"a changed user-stub fingerprint must discard the cache"
);
}
#[test]
fn legacy_versionless_cache_bin_is_discarded_not_paniced() {
let dir = TempDir::new().unwrap();
let mut entries: HashMap<String, CacheEntry> = HashMap::default();
entries.insert(
"a.php".to_string(),
CacheEntry {
content_hash: "h1".to_string(),
issues: vec![],
reference_locations: vec![],
},
);
let reverse_deps: HashMap<String, HashSet<String>> = HashMap::default();
let legacy_bytes = bincode::serialize(&(&entries, &reverse_deps)).unwrap();
std::fs::write(dir.path().join("cache.bin"), legacy_bytes).unwrap();
let cache = AnalysisCache::open(dir.path(), TEST_PHP_V, 0);
assert!(
cache.get("a.php", "h1").is_none(),
"legacy versionless entries must be discarded, not served"
);
}
#[test]
fn build_fingerprint_tracks_binary_identity() {
let build_a = compute_build_fingerprint(Some(b"mir-binary-image-A"));
let build_a_again = compute_build_fingerprint(Some(b"mir-binary-image-A"));
let build_b = compute_build_fingerprint(Some(b"mir-binary-image-B"));
assert_eq!(
build_a, build_a_again,
"same binary bytes → same fingerprint"
);
assert_ne!(
build_a, build_b,
"a new mir build (different bytes) must change the fingerprint"
);
assert_eq!(
compute_build_fingerprint(None),
compute_build_fingerprint(None)
);
}
}