use crate::core::NormalizedPath;
use std::collections::{HashMap, HashSet};
use std::path::Path;
use dashmap::DashMap;
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub(crate) struct WatchKey {
pub root: NormalizedPath,
pub cache_file: NormalizedPath,
}
#[derive(Debug, Clone)]
struct TrackedFile {
mtime_ns: u64,
size: u64,
hash_hex: String,
}
struct ChangedMeta {
canon: NormalizedPath,
mtime_ns: u64,
size: u64,
hash_hex: String,
}
#[allow(dead_code)]
struct WatchState {
files: HashMap<String, TrackedFile>,
dirty: bool,
dirty_files: HashSet<String>,
generation: u64,
checked_generation: u64,
status: String,
cache_type: String,
root: NormalizedPath,
}
pub(crate) struct FpCheckResult {
pub decision: String,
pub reason: Option<String>,
pub changed_files: Vec<String>,
}
pub(crate) struct FingerprintManager {
watches: DashMap<WatchKey, WatchState>,
}
fn strip_win_prefix(path: NormalizedPath) -> NormalizedPath {
#[cfg(windows)]
{
let s = path.to_string_lossy();
if let Some(stripped) = s.strip_prefix(r"\\?\") {
return NormalizedPath::from(stripped);
}
}
path
}
fn canon(path: &Path) -> NormalizedPath {
match path.canonicalize() {
Ok(c) => strip_win_prefix(c.into()),
Err(_) => path.into(),
}
}
fn canon_maybe_missing(path: &Path) -> NormalizedPath {
if let Ok(c) = path.canonicalize() {
return strip_win_prefix(c.into());
}
if let (Some(parent), Some(name)) = (path.parent(), path.file_name()) {
if let Ok(cp) = parent.canonicalize() {
return strip_win_prefix(cp.into()).join(name);
}
}
path.into()
}
impl FingerprintManager {
pub fn new() -> Self {
Self {
watches: DashMap::new(),
}
}
pub fn check(
&self,
cache_file: &Path,
cache_type: &str,
root: &Path,
extensions: &[String],
include_globs: &[String],
exclude: &[String],
) -> FpCheckResult {
let canon_root = canon(root);
let canon_cf = canon_maybe_missing(cache_file);
let key = WatchKey {
root: canon_root.clone(),
cache_file: canon_cf,
};
if let Some(watch) = self.watches.get(&key) {
let dirty = watch.dirty;
let status = watch.status.clone();
let changed_snapshot: Vec<String> = watch.dirty_files.iter().cloned().collect();
let gen = watch.generation;
drop(watch);
if !dirty && status == "success" {
if let Some(mut w) = self.watches.get_mut(&key) {
let changed = Self::verify_filesystem(&mut w);
if changed.is_empty() {
tracing::debug!("fingerprint check: skip (verified, not dirty)");
return FpCheckResult {
decision: "skip".into(),
reason: None,
changed_files: vec![],
};
}
let new_gen = w.generation + 1;
w.generation = new_gen;
w.dirty = true;
for f in &changed {
w.dirty_files.insert(f.clone());
}
w.status = "pending".into();
w.checked_generation = new_gen;
drop(w);
tracing::debug!("fingerprint check: run (verified, content changed)");
return FpCheckResult {
decision: "run".into(),
reason: Some("content changed".into()),
changed_files: changed,
};
}
} else if dirty {
if let Some(mut w) = self.watches.get_mut(&key) {
w.status = "pending".into();
w.checked_generation = gen;
}
tracing::debug!("fingerprint check: run (dirty)");
return FpCheckResult {
decision: "run".into(),
reason: Some("content changed".into()),
changed_files: changed_snapshot,
};
} else if status == "failure" {
if let Some(mut w) = self.watches.get_mut(&key) {
w.status = "pending".into();
w.checked_generation = gen;
}
tracing::debug!("fingerprint check: run (previous failure)");
return FpCheckResult {
decision: "run".into(),
reason: Some("previous failure".into()),
changed_files: vec![],
};
} else {
tracing::debug!("fingerprint check: run (pending)");
return FpCheckResult {
decision: "run".into(),
reason: Some("pending".into()),
changed_files: vec![],
};
}
}
tracing::debug!(
"fingerprint check: initial scan for {}",
canon_root.display()
);
let files = Self::scan_files(&canon_root, extensions, include_globs, exclude);
let files = match files {
Ok(f) => f,
Err(e) => {
tracing::warn!("fingerprint scan failed: {e}");
return FpCheckResult {
decision: "run".into(),
reason: Some(format!("scan error: {e}")),
changed_files: vec![],
};
}
};
let mut tracked = HashMap::new();
for file in &files {
let mtime = crate::fingerprint::persist::mtime_ns(&file.absolute).unwrap_or(0);
let size = crate::fingerprint::persist::file_size(&file.absolute).unwrap_or(0);
let hash_hex = match crate::hash::hash_file(&file.absolute) {
Ok(h) => h.to_hex(),
Err(_) => String::new(),
};
tracked.insert(
file.relative.clone(),
TrackedFile {
mtime_ns: mtime,
size,
hash_hex,
},
);
}
let watch = WatchState {
files: tracked,
dirty: false,
dirty_files: HashSet::new(),
generation: 0,
checked_generation: 0,
status: "pending".into(),
cache_type: cache_type.to_string(),
root: canon_root,
};
self.watches.insert(key, watch);
FpCheckResult {
decision: "run".into(),
reason: Some("no cache file".into()),
changed_files: vec![],
}
}
pub fn mark_success(&self, cache_file: &Path) {
let canon_cf = canon_maybe_missing(cache_file);
for mut entry in self.watches.iter_mut() {
if entry.key().cache_file == canon_cf {
let w = entry.value_mut();
if w.generation == w.checked_generation {
w.dirty = false;
w.dirty_files.clear();
}
w.status = "success".into();
tracing::debug!("fingerprint mark-success: {}", cache_file.display());
return;
}
}
tracing::debug!(
"fingerprint mark-success: no watch for {}",
cache_file.display()
);
}
pub fn mark_failure(&self, cache_file: &Path) {
let canon_cf = canon_maybe_missing(cache_file);
for mut entry in self.watches.iter_mut() {
if entry.key().cache_file == canon_cf {
entry.value_mut().status = "failure".into();
tracing::debug!("fingerprint mark-failure: {}", cache_file.display());
return;
}
}
}
pub fn invalidate(&self, cache_file: &Path) {
let canon_cf = canon_maybe_missing(cache_file);
self.watches.retain(|key, _| key.cache_file != canon_cf);
tracing::debug!("fingerprint invalidate: {}", cache_file.display());
}
pub fn on_batch(&self, changed: &[NormalizedPath], removed: &[NormalizedPath]) {
if changed.is_empty() && removed.is_empty() {
return;
}
let changed_meta: Vec<ChangedMeta> = changed
.iter()
.map(|path| {
let canon_path = canon(path);
let mtime_ns = crate::fingerprint::persist::mtime_ns(&canon_path).unwrap_or(0);
let size = crate::fingerprint::persist::file_size(&canon_path).unwrap_or(0);
let hash_hex = match crate::hash::hash_file(&canon_path) {
Ok(h) => h.to_hex(),
Err(_) => String::new(),
};
ChangedMeta {
canon: canon_path,
mtime_ns,
size,
hash_hex,
}
})
.collect();
let removed_canon: Vec<NormalizedPath> = removed
.iter()
.map(|path| canon_maybe_missing(path))
.collect();
for mut entry in self.watches.iter_mut() {
let watch = entry.value_mut();
let root = &watch.root;
for cm in &changed_meta {
if let Ok(rel) = cm.canon.strip_prefix(root) {
let rel_str = rel.to_string_lossy().replace('\\', "/");
let content_changed = match watch.files.get(&rel_str) {
Some(existing) => existing.hash_hex != cm.hash_hex,
None => true, };
if content_changed {
watch.dirty = true;
watch.dirty_files.insert(rel_str.clone());
watch.generation += 1;
watch.files.insert(
rel_str,
TrackedFile {
mtime_ns: cm.mtime_ns,
size: cm.size,
hash_hex: cm.hash_hex.clone(),
},
);
} else if let Some(tracked) = watch.files.get_mut(&rel_str) {
tracked.mtime_ns = cm.mtime_ns;
tracked.size = cm.size;
}
}
}
for canon_path in &removed_canon {
if let Ok(rel) = canon_path.strip_prefix(root) {
let rel_str = rel.to_string_lossy().replace('\\', "/");
if watch.files.remove(&rel_str).is_some() {
watch.dirty = true;
watch.dirty_files.insert(rel_str);
watch.generation += 1;
}
}
}
}
}
fn scan_files(
root: &Path,
extensions: &[String],
include_globs: &[String],
exclude: &[String],
) -> std::result::Result<
Vec<crate::fingerprint::ScannedFile>,
crate::fingerprint::FingerprintError,
> {
if !include_globs.is_empty() {
let include_refs: Vec<&str> = include_globs.iter().map(|s| s.as_str()).collect();
let exclude_refs: Vec<&str> = exclude.iter().map(|s| s.as_str()).collect();
crate::fingerprint::walk_files_glob(root, &include_refs, &exclude_refs)
} else {
let ext_refs: Vec<&str> = extensions.iter().map(|s| s.as_str()).collect();
let exclude_refs: Vec<&str> = exclude.iter().map(|s| s.as_str()).collect();
crate::fingerprint::walk_files(root, &ext_refs, &exclude_refs)
}
}
fn verify_filesystem(watch: &mut WatchState) -> Vec<String> {
let mut changed = Vec::new();
let root = watch.root.clone();
for (rel_path, tracked) in watch.files.iter_mut() {
let abs = root.join(rel_path);
let mtime = crate::fingerprint::persist::mtime_ns(&abs).unwrap_or(0);
let size = crate::fingerprint::persist::file_size(&abs).unwrap_or(0);
if mtime == tracked.mtime_ns && size == tracked.size {
continue; }
let hash_hex = match crate::hash::hash_file(&abs) {
Ok(h) => h.to_hex(),
Err(_) => {
changed.push(rel_path.clone());
continue;
}
};
if hash_hex != tracked.hash_hex {
tracked.mtime_ns = mtime;
tracked.size = size;
tracked.hash_hex = hash_hex;
changed.push(rel_path.clone());
} else {
tracked.mtime_ns = mtime;
tracked.size = size;
}
}
changed
}
#[allow(dead_code)]
pub fn watch_count(&self) -> usize {
self.watches.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
fn create_file(dir: &Path, rel: &str, content: &str) {
let path = dir.join(rel);
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(&path, content).unwrap();
}
#[test]
fn first_check_returns_run() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "fn main() {}");
let mgr = FingerprintManager::new();
let result = mgr.check(
&cache_dir.path().join("fp.json"),
"two-layer",
src.path(),
&[],
&[],
&[],
);
assert_eq!(result.decision, "run");
assert_eq!(result.reason.as_deref(), Some("no cache file"));
}
#[test]
fn check_then_mark_success_then_check_returns_skip() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "fn main() {}");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "run");
mgr.mark_success(&cache_file);
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "skip");
}
#[test]
fn on_batch_changed_sets_dirty() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "original");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_success(&cache_file);
std::thread::sleep(std::time::Duration::from_millis(50));
create_file(src.path(), "a.rs", "modified");
mgr.on_batch(&[src.path().join("a.rs").into()], &[]);
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "run");
assert_eq!(result.reason.as_deref(), Some("content changed"));
}
#[test]
fn on_batch_removed_sets_dirty() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "content");
create_file(src.path(), "b.rs", "content2");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_success(&cache_file);
mgr.on_batch(&[], &[src.path().join("b.rs").into()]);
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "run");
}
#[test]
fn smart_touch_does_not_set_dirty() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "stable");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_success(&cache_file);
std::thread::sleep(std::time::Duration::from_millis(50));
create_file(src.path(), "a.rs", "stable");
mgr.on_batch(&[src.path().join("a.rs").into()], &[]);
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "skip");
}
#[test]
fn mark_failure_forces_rerun() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "content");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_failure(&cache_file);
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "run");
assert_eq!(result.reason.as_deref(), Some("previous failure"));
}
#[test]
fn invalidate_removes_watch() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "content");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_success(&cache_file);
assert_eq!(mgr.watch_count(), 1);
mgr.invalidate(&cache_file);
assert_eq!(mgr.watch_count(), 0);
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "run");
assert_eq!(result.reason.as_deref(), Some("no cache file"));
}
#[test]
fn unrelated_watcher_event_ignored() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "content");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_success(&cache_file);
mgr.on_batch(&[NormalizedPath::from("/some/other/path.rs")], &[]);
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "skip");
}
#[test]
fn bug_a_changed_files_reported_when_dirty() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "original");
create_file(src.path(), "b.rs", "stable");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_success(&cache_file);
std::thread::sleep(std::time::Duration::from_millis(50));
create_file(src.path(), "a.rs", "modified");
mgr.on_batch(&[src.path().join("a.rs").into()], &[]);
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "run");
assert!(
!result.changed_files.is_empty(),
"changed_files must report which files changed, got empty"
);
assert!(
result.changed_files.iter().any(|f| f.contains("a.rs")),
"changed_files should contain a.rs, got {:?}",
result.changed_files
);
}
#[test]
fn bug_b_mark_success_does_not_swallow_concurrent_events() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "v1");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_success(&cache_file);
std::thread::sleep(std::time::Duration::from_millis(50));
create_file(src.path(), "a.rs", "v2");
mgr.on_batch(&[src.path().join("a.rs").into()], &[]);
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "run");
std::thread::sleep(std::time::Duration::from_millis(50));
create_file(src.path(), "a.rs", "v3");
mgr.on_batch(&[src.path().join("a.rs").into()], &[]);
mgr.mark_success(&cache_file);
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(
result.decision, "run",
"events arriving between check and mark_success must not be lost"
);
}
#[test]
fn bug_c_pending_status_does_not_rescan() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "content");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "run");
assert_eq!(result.reason.as_deref(), Some("no cache file"));
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "run");
assert_ne!(
result.reason.as_deref(),
Some("no cache file"),
"pending watch should not trigger a full rescan"
);
}
#[test]
fn bug_d_non_canonical_root_breaks_on_batch() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
fs::create_dir(src.path().join("sub")).unwrap();
create_file(src.path(), "a.rs", "original");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
let non_canonical_root = src.path().join("sub").join("..");
mgr.check(&cache_file, "two-layer", &non_canonical_root, &[], &[], &[]);
mgr.mark_success(&cache_file);
std::thread::sleep(std::time::Duration::from_millis(50));
create_file(src.path(), "a.rs", "modified");
let canonical_root = canon(src.path());
mgr.on_batch(&[canonical_root.join("a.rs")], &[]);
let result = mgr.check(&cache_file, "two-layer", &non_canonical_root, &[], &[], &[]);
assert_eq!(
result.decision, "run",
"on_batch with canonical paths must work even when root was non-canonical"
);
}
#[test]
fn bug_e_non_canonical_cache_file_breaks_mark_success() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
fs::create_dir(cache_dir.path().join("sub")).unwrap();
create_file(src.path(), "a.rs", "content");
let mgr = FingerprintManager::new();
let non_canonical_cache = cache_dir.path().join("sub").join("..").join("fp.json");
mgr.check(&non_canonical_cache, "two-layer", src.path(), &[], &[], &[]);
let canonical_cache = canon(cache_dir.path()).join("fp.json");
mgr.mark_success(&canonical_cache);
let result = mgr.check(&non_canonical_cache, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(
result.decision, "skip",
"mark_success with canonical path must match watch created with non-canonical path"
);
}
#[test]
fn verify_catches_missed_watcher_events() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "original");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_success(&cache_file);
std::thread::sleep(std::time::Duration::from_millis(50));
create_file(src.path(), "a.rs", "modified");
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(
result.decision, "run",
"must detect change without on_batch"
);
assert!(
result.changed_files.iter().any(|f| f.contains("a.rs")),
"changed_files should contain a.rs, got {:?}",
result.changed_files
);
}
#[test]
fn verify_smart_touch_still_skips() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "stable");
let cache_file = cache_dir.path().join("fp.json");
let mgr = FingerprintManager::new();
mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_success(&cache_file);
std::thread::sleep(std::time::Duration::from_millis(50));
create_file(src.path(), "a.rs", "stable");
let result = mgr.check(&cache_file, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(result.decision, "skip", "smart touch must not trigger run");
}
#[test]
fn two_watches_independent() {
let src = TempDir::new().unwrap();
let cache_dir = TempDir::new().unwrap();
create_file(src.path(), "a.rs", "content");
let cache1 = cache_dir.path().join("c1.json");
let cache2 = cache_dir.path().join("c2.json");
let mgr = FingerprintManager::new();
mgr.check(&cache1, "two-layer", src.path(), &[], &[], &[]);
mgr.mark_success(&cache1);
mgr.check(&cache2, "hash", src.path(), &[], &[], &[]);
mgr.mark_success(&cache2);
mgr.invalidate(&cache1);
let r2 = mgr.check(&cache2, "hash", src.path(), &[], &[], &[]);
assert_eq!(r2.decision, "skip");
let r1 = mgr.check(&cache1, "two-layer", src.path(), &[], &[], &[]);
assert_eq!(r1.decision, "run");
}
#[test]
fn on_batch_many_watches_completes_quickly() {
const ROOTS: usize = 200;
let cache_dir = TempDir::new().unwrap();
let mgr = FingerprintManager::new();
let mut roots = Vec::with_capacity(ROOTS);
let mut cache_files = Vec::with_capacity(ROOTS);
for i in 0..ROOTS {
let root = TempDir::new().unwrap();
create_file(root.path(), "src.cpp", "original");
let cache_file = cache_dir.path().join(format!("fp{i}.json"));
mgr.check(&cache_file, "two-layer", root.path(), &[], &[], &[]);
mgr.mark_success(&cache_file);
cache_files.push(cache_file);
roots.push(root);
}
std::thread::sleep(std::time::Duration::from_millis(50));
let mut changed = Vec::with_capacity(ROOTS);
for root in &roots {
create_file(root.path(), "src.cpp", "modified");
changed.push(canon(&root.path().join("src.cpp")));
}
let start = std::time::Instant::now();
mgr.on_batch(&changed, &[]);
let elapsed = start.elapsed();
assert!(
elapsed < std::time::Duration::from_secs(10),
"on_batch over {ROOTS} watches took {elapsed:?}, expected < 10s (issue #724 regression)"
);
for (i, cache_file) in cache_files.iter().enumerate() {
let result = mgr.check(cache_file, "two-layer", roots[i].path(), &[], &[], &[]);
assert_eq!(
result.decision, "run",
"watch {i} should be dirty after on_batch"
);
}
}
}