1use rayon::prelude::*;
2#[cfg(debug_assertions)]
3use std::cell::Cell;
4use std::fs;
5use std::path::{Path, PathBuf};
6#[cfg(debug_assertions)]
7use std::sync::atomic::{AtomicUsize, Ordering};
8use std::time::{SystemTime, UNIX_EPOCH};
9
10pub const CONTENT_HASH_SIZE_CAP: u64 = 4 * 1024 * 1024;
11
12#[cfg(debug_assertions)]
13static STRICT_VERIFY_FILE_CALLS: AtomicUsize = AtomicUsize::new(0);
14#[cfg(debug_assertions)]
15thread_local! {
16 static HASH_FILE_IF_SMALL_CALLS: Cell<usize> = const { Cell::new(0) };
17}
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub struct FileFreshness {
21 pub mtime: SystemTime,
22 pub size: u64,
23 pub content_hash: blake3::Hash,
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum FreshnessVerdict {
28 HotFresh,
29 ContentFresh {
30 new_mtime: SystemTime,
31 new_size: u64,
32 },
33 Stale,
34 Deleted,
35}
36
37pub fn hash_bytes(bytes: &[u8]) -> blake3::Hash {
38 blake3::hash(bytes)
39}
40
41pub fn hash_file_if_small(path: &Path, size: u64) -> std::io::Result<Option<blake3::Hash>> {
42 if size > CONTENT_HASH_SIZE_CAP {
43 return Ok(None);
44 }
45 #[cfg(debug_assertions)]
46 HASH_FILE_IF_SMALL_CALLS.with(|calls| calls.set(calls.get() + 1));
47 fs::read(path).map(|bytes| Some(hash_bytes(&bytes)))
48}
49
50pub fn metadata_matches(path: &Path, cached: &FileFreshness) -> std::io::Result<bool> {
51 let metadata = fs::metadata(path)?;
52 let new_size = metadata.len();
53 let new_mtime = metadata.modified().unwrap_or(UNIX_EPOCH);
54 Ok(new_size == cached.size && new_mtime == cached.mtime)
55}
56
57pub fn zero_hash() -> blake3::Hash {
58 blake3::Hash::from_bytes([0u8; 32])
59}
60
61pub fn collect(path: &Path) -> std::io::Result<FileFreshness> {
62 let metadata = fs::metadata(path)?;
63 let mtime = metadata.modified().unwrap_or(UNIX_EPOCH);
64 let size = metadata.len();
65 let content_hash = hash_file_if_small(path, size)?.unwrap_or_else(zero_hash);
66 Ok(FileFreshness {
67 mtime,
68 size,
69 content_hash,
70 })
71}
72
73pub fn verify_file(path: &Path, cached: &FileFreshness) -> FreshnessVerdict {
74 verify_file_inner(path, cached, false)
75}
76
77pub fn verify_file_strict(path: &Path, cached: &FileFreshness) -> FreshnessVerdict {
78 #[cfg(debug_assertions)]
79 STRICT_VERIFY_FILE_CALLS.fetch_add(1, Ordering::Relaxed);
80 verify_file_inner(path, cached, true)
81}
82
83pub(crate) fn verify_files_strict_bounded<K: Send>(
90 files: Vec<(K, PathBuf, FileFreshness)>,
91) -> Vec<(K, PathBuf, FreshnessVerdict)> {
92 fn verify_one<K>(
93 (key, path, cached): (K, PathBuf, FileFreshness),
94 ) -> (K, PathBuf, FreshnessVerdict) {
95 let verdict = verify_file_strict(&path, &cached);
96 (key, path, verdict)
97 }
98
99 if files.len() <= 1 {
100 return files.into_iter().map(verify_one::<K>).collect();
101 }
102
103 match rayon::ThreadPoolBuilder::new()
104 .num_threads(strict_verify_pool_size())
105 .thread_name(|index| format!("aft-semantic-verify-{index}"))
106 .build()
107 {
108 Ok(pool) => pool.install(|| files.into_par_iter().map(verify_one::<K>).collect()),
109 Err(_) => files.into_iter().map(verify_one::<K>).collect(),
110 }
111}
112
113fn strict_verify_pool_size() -> usize {
114 std::thread::available_parallelism()
115 .map(|parallelism| parallelism.get())
116 .unwrap_or(1)
117 .div_ceil(2)
118 .clamp(1, 8)
119}
120
121#[cfg(debug_assertions)]
122#[doc(hidden)]
123pub fn reset_verify_file_strict_count_for_debug() {
124 STRICT_VERIFY_FILE_CALLS.store(0, Ordering::Relaxed);
125}
126
127#[cfg(debug_assertions)]
128#[doc(hidden)]
129pub fn verify_file_strict_count_for_debug() -> usize {
130 STRICT_VERIFY_FILE_CALLS.load(Ordering::Relaxed)
131}
132
133#[cfg(debug_assertions)]
134#[doc(hidden)]
135pub fn reset_hash_file_if_small_count_for_debug() {
136 HASH_FILE_IF_SMALL_CALLS.with(|calls| calls.set(0));
137}
138
139#[cfg(debug_assertions)]
140#[doc(hidden)]
141pub fn hash_file_if_small_count_for_debug() -> usize {
142 HASH_FILE_IF_SMALL_CALLS.with(Cell::get)
143}
144
145fn verify_file_inner(
146 path: &Path,
147 cached: &FileFreshness,
148 hash_matching_metadata: bool,
149) -> FreshnessVerdict {
150 let Ok(metadata) = fs::metadata(path) else {
151 return FreshnessVerdict::Deleted;
152 };
153 let new_size = metadata.len();
154 let new_mtime = metadata.modified().unwrap_or(UNIX_EPOCH);
155 if new_size == cached.size && new_mtime == cached.mtime {
156 if hash_matching_metadata {
157 if new_size > CONTENT_HASH_SIZE_CAP || cached.content_hash == zero_hash() {
158 return FreshnessVerdict::Stale;
159 }
160 return match hash_file_if_small(path, new_size) {
161 Ok(Some(hash)) if hash == cached.content_hash => FreshnessVerdict::HotFresh,
162 _ => FreshnessVerdict::Stale,
163 };
164 }
165 return FreshnessVerdict::HotFresh;
166 }
167 if new_size != cached.size || new_size > CONTENT_HASH_SIZE_CAP {
168 return FreshnessVerdict::Stale;
169 }
170 match hash_file_if_small(path, new_size) {
171 Ok(Some(hash)) if hash == cached.content_hash => FreshnessVerdict::ContentFresh {
172 new_mtime,
173 new_size,
174 },
175 _ => FreshnessVerdict::Stale,
176 }
177}
178
179#[cfg(test)]
180mod tests {
181 use super::*;
182 use std::io::Write;
183
184 fn write(path: &Path, bytes: &[u8]) {
185 fs::write(path, bytes).unwrap();
186 }
187
188 #[test]
198 #[ignore = "manual benchmark; needs AFT_BENCH_REPO"]
199 fn freshness_stat_vs_hash_benchmark() {
200 use std::time::Instant;
201 let Ok(repo) = std::env::var("AFT_BENCH_REPO") else {
202 eprintln!("AFT_BENCH_REPO unset; skipping");
203 return;
204 };
205 let root = std::path::PathBuf::from(&repo);
206 let files: Vec<std::path::PathBuf> = crate::callgraph::walk_project_files(&root).collect();
207
208 let records: Vec<(std::path::PathBuf, FileFreshness)> = files
211 .iter()
212 .filter_map(|p| collect(p).ok().map(|f| (p.clone(), f)))
213 .collect();
214
215 eprintln!(
216 "\n=== freshness stat-vs-hash benchmark ===\nrepo: {}\nfiles walked: {} freshness records: {}",
217 root.display(),
218 files.len(),
219 records.len()
220 );
221
222 let mut stat_ms = Vec::new();
224 let mut hash_ms = Vec::new();
225 for _ in 0..3 {
226 let t = Instant::now();
227 let mut stat_hot = 0usize;
228 for (path, cached) in &records {
229 if matches!(verify_file(path, cached), FreshnessVerdict::HotFresh) {
232 stat_hot += 1;
233 }
234 }
235 stat_ms.push(t.elapsed().as_micros());
236
237 let t = Instant::now();
238 let mut hash_hot = 0usize;
239 for (path, cached) in &records {
240 if matches!(verify_file_strict(path, cached), FreshnessVerdict::HotFresh) {
242 hash_hot += 1;
243 }
244 }
245 hash_ms.push(t.elapsed().as_micros());
246
247 eprintln!(" iter: stat_hot={stat_hot} hash_hot={hash_hot}");
248 }
249 stat_ms.sort_unstable();
250 hash_ms.sort_unstable();
251 let stat_med = stat_ms[1] as f64 / 1000.0;
252 let hash_med = hash_ms[1] as f64 / 1000.0;
253 eprintln!(
254 "SUMMARY files={} stat_all_median={:.2}ms hash_all_median={:.2}ms speedup={:.1}x",
255 records.len(),
256 stat_med,
257 hash_med,
258 hash_med / stat_med.max(0.001)
259 );
260 }
261
262 #[test]
263 fn hot_fresh_when_mtime_size_match() {
264 let dir = tempfile::tempdir().unwrap();
265 let path = dir.path().join("a.txt");
266 write(&path, b"same");
267 let fresh = collect(&path).unwrap();
268 assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::HotFresh);
269 }
270
271 #[test]
272 fn strict_hashes_small_file_when_metadata_matches() {
273 let dir = tempfile::tempdir().unwrap();
274 let path = dir.path().join("a.txt");
275 let original_mtime = filetime::FileTime::from_unix_time(1_700_000_000, 0);
276 write(&path, b"alpha");
277 filetime::set_file_mtime(&path, original_mtime).unwrap();
278 let fresh = collect(&path).unwrap();
279
280 assert_eq!(
281 verify_file_strict(&path, &fresh),
282 FreshnessVerdict::HotFresh
283 );
284
285 write(&path, b"bravo");
286 filetime::set_file_mtime(&path, original_mtime).unwrap();
287
288 assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::HotFresh);
292 assert_eq!(verify_file_strict(&path, &fresh), FreshnessVerdict::Stale);
293 }
294
295 #[test]
296 fn strict_stale_when_large_file_hash_was_not_cached() {
297 let dir = tempfile::tempdir().unwrap();
298 let path = dir.path().join("big.bin");
299 let original_mtime = filetime::FileTime::from_unix_time(1_700_000_000, 0);
300 let file = fs::File::create(&path).unwrap();
301 file.set_len(CONTENT_HASH_SIZE_CAP + 1).unwrap();
302 filetime::set_file_mtime(&path, original_mtime).unwrap();
303 let fresh = collect(&path).unwrap();
304
305 assert_eq!(fresh.size, CONTENT_HASH_SIZE_CAP + 1);
306 assert_eq!(fresh.content_hash, zero_hash());
307 assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::HotFresh);
310 assert_eq!(verify_file_strict(&path, &fresh), FreshnessVerdict::Stale);
311 }
312
313 #[test]
314 fn content_fresh_when_only_mtime_changes() {
315 let dir = tempfile::tempdir().unwrap();
316 let path = dir.path().join("a.txt");
317 write(&path, b"same");
318 let fresh = collect(&path).unwrap();
319 let mut file = fs::OpenOptions::new().append(true).open(&path).unwrap();
320 file.write_all(b"").unwrap();
321 file.sync_all().unwrap();
322 filetime::set_file_mtime(&path, filetime::FileTime::from_unix_time(1, 0)).unwrap();
323 assert!(matches!(
324 verify_file(&path, &fresh),
325 FreshnessVerdict::ContentFresh { .. }
326 ));
327 }
328
329 #[test]
330 fn stale_when_size_changes() {
331 let dir = tempfile::tempdir().unwrap();
332 let path = dir.path().join("a.txt");
333 write(&path, b"same");
334 let fresh = collect(&path).unwrap();
335 write(&path, b"different");
336 assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::Stale);
337 }
338
339 #[test]
340 fn deleted_when_missing() {
341 let dir = tempfile::tempdir().unwrap();
342 let path = dir.path().join("a.txt");
343 write(&path, b"same");
344 let fresh = collect(&path).unwrap();
345 fs::remove_file(&path).unwrap();
346 assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::Deleted);
347 }
348
349 #[test]
350 fn over_cap_hash_is_not_computed() {
351 let dir = tempfile::tempdir().unwrap();
352 let path = dir.path().join("big.bin");
353 fs::write(&path, vec![0u8; CONTENT_HASH_SIZE_CAP as usize + 1]).unwrap();
354 assert!(hash_file_if_small(&path, CONTENT_HASH_SIZE_CAP + 1)
355 .unwrap()
356 .is_none());
357 }
358}