Skip to main content

aft/
cache_freshness.rs

1use rayon::prelude::*;
2use std::fs;
3use std::path::{Path, PathBuf};
4#[cfg(debug_assertions)]
5use std::sync::atomic::{AtomicUsize, Ordering};
6use std::time::{SystemTime, UNIX_EPOCH};
7
8pub const CONTENT_HASH_SIZE_CAP: u64 = 4 * 1024 * 1024;
9
10#[cfg(debug_assertions)]
11static STRICT_VERIFY_FILE_CALLS: AtomicUsize = AtomicUsize::new(0);
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub struct FileFreshness {
15    pub mtime: SystemTime,
16    pub size: u64,
17    pub content_hash: blake3::Hash,
18}
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum FreshnessVerdict {
22    HotFresh,
23    ContentFresh {
24        new_mtime: SystemTime,
25        new_size: u64,
26    },
27    Stale,
28    Deleted,
29}
30
31pub fn hash_bytes(bytes: &[u8]) -> blake3::Hash {
32    blake3::hash(bytes)
33}
34
35pub fn hash_file_if_small(path: &Path, size: u64) -> std::io::Result<Option<blake3::Hash>> {
36    if size > CONTENT_HASH_SIZE_CAP {
37        return Ok(None);
38    }
39    fs::read(path).map(|bytes| Some(hash_bytes(&bytes)))
40}
41
42pub fn zero_hash() -> blake3::Hash {
43    blake3::Hash::from_bytes([0u8; 32])
44}
45
46pub fn collect(path: &Path) -> std::io::Result<FileFreshness> {
47    let metadata = fs::metadata(path)?;
48    let mtime = metadata.modified().unwrap_or(UNIX_EPOCH);
49    let size = metadata.len();
50    let content_hash = hash_file_if_small(path, size)?.unwrap_or_else(zero_hash);
51    Ok(FileFreshness {
52        mtime,
53        size,
54        content_hash,
55    })
56}
57
58pub fn verify_file(path: &Path, cached: &FileFreshness) -> FreshnessVerdict {
59    verify_file_inner(path, cached, false)
60}
61
62pub fn verify_file_strict(path: &Path, cached: &FileFreshness) -> FreshnessVerdict {
63    #[cfg(debug_assertions)]
64    STRICT_VERIFY_FILE_CALLS.fetch_add(1, Ordering::Relaxed);
65    verify_file_inner(path, cached, true)
66}
67
68/// Verify semantic cache file freshness in a private bounded Rayon pool.
69///
70/// Do not use the global pool here: load-time strict verification can hash every
71/// indexed file, and the semantic load/build already runs beside the bridge's
72/// single dispatch thread. Match the half-cores/cap-8 policy used by the search
73/// and callgraph cold-build pools.
74pub(crate) fn verify_files_strict_bounded<K: Send>(
75    files: Vec<(K, PathBuf, FileFreshness)>,
76) -> Vec<(K, PathBuf, FreshnessVerdict)> {
77    fn verify_one<K>(
78        (key, path, cached): (K, PathBuf, FileFreshness),
79    ) -> (K, PathBuf, FreshnessVerdict) {
80        let verdict = verify_file_strict(&path, &cached);
81        (key, path, verdict)
82    }
83
84    if files.len() <= 1 {
85        return files.into_iter().map(verify_one::<K>).collect();
86    }
87
88    match rayon::ThreadPoolBuilder::new()
89        .num_threads(strict_verify_pool_size())
90        .thread_name(|index| format!("aft-semantic-verify-{index}"))
91        .build()
92    {
93        Ok(pool) => pool.install(|| files.into_par_iter().map(verify_one::<K>).collect()),
94        Err(_) => files.into_iter().map(verify_one::<K>).collect(),
95    }
96}
97
98fn strict_verify_pool_size() -> usize {
99    std::thread::available_parallelism()
100        .map(|parallelism| parallelism.get())
101        .unwrap_or(1)
102        .div_ceil(2)
103        .clamp(1, 8)
104}
105
106#[cfg(debug_assertions)]
107#[doc(hidden)]
108pub fn reset_verify_file_strict_count_for_debug() {
109    STRICT_VERIFY_FILE_CALLS.store(0, Ordering::Relaxed);
110}
111
112#[cfg(debug_assertions)]
113#[doc(hidden)]
114pub fn verify_file_strict_count_for_debug() -> usize {
115    STRICT_VERIFY_FILE_CALLS.load(Ordering::Relaxed)
116}
117
118fn verify_file_inner(
119    path: &Path,
120    cached: &FileFreshness,
121    hash_matching_metadata: bool,
122) -> FreshnessVerdict {
123    let Ok(metadata) = fs::metadata(path) else {
124        return FreshnessVerdict::Deleted;
125    };
126    let new_size = metadata.len();
127    let new_mtime = metadata.modified().unwrap_or(UNIX_EPOCH);
128    if new_size == cached.size && new_mtime == cached.mtime {
129        if hash_matching_metadata {
130            if new_size > CONTENT_HASH_SIZE_CAP || cached.content_hash == zero_hash() {
131                return FreshnessVerdict::Stale;
132            }
133            return match hash_file_if_small(path, new_size) {
134                Ok(Some(hash)) if hash == cached.content_hash => FreshnessVerdict::HotFresh,
135                _ => FreshnessVerdict::Stale,
136            };
137        }
138        return FreshnessVerdict::HotFresh;
139    }
140    if new_size != cached.size || new_size > CONTENT_HASH_SIZE_CAP {
141        return FreshnessVerdict::Stale;
142    }
143    match hash_file_if_small(path, new_size) {
144        Ok(Some(hash)) if hash == cached.content_hash => FreshnessVerdict::ContentFresh {
145            new_mtime,
146            new_size,
147        },
148        _ => FreshnessVerdict::Stale,
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155    use std::io::Write;
156
157    fn write(path: &Path, bytes: &[u8]) {
158        fs::write(path, bytes).unwrap();
159    }
160
161    #[test]
162    fn hot_fresh_when_mtime_size_match() {
163        let dir = tempfile::tempdir().unwrap();
164        let path = dir.path().join("a.txt");
165        write(&path, b"same");
166        let fresh = collect(&path).unwrap();
167        assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::HotFresh);
168    }
169
170    #[test]
171    fn strict_hashes_small_file_when_metadata_matches() {
172        let dir = tempfile::tempdir().unwrap();
173        let path = dir.path().join("a.txt");
174        let original_mtime = filetime::FileTime::from_unix_time(1_700_000_000, 0);
175        write(&path, b"alpha");
176        filetime::set_file_mtime(&path, original_mtime).unwrap();
177        let fresh = collect(&path).unwrap();
178
179        assert_eq!(
180            verify_file_strict(&path, &fresh),
181            FreshnessVerdict::HotFresh
182        );
183
184        write(&path, b"bravo");
185        filetime::set_file_mtime(&path, original_mtime).unwrap();
186
187        assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::HotFresh);
188        assert_eq!(verify_file_strict(&path, &fresh), FreshnessVerdict::Stale);
189    }
190
191    #[test]
192    fn strict_stale_when_large_file_hash_was_not_cached() {
193        let dir = tempfile::tempdir().unwrap();
194        let path = dir.path().join("big.bin");
195        let original_mtime = filetime::FileTime::from_unix_time(1_700_000_000, 0);
196        let file = fs::File::create(&path).unwrap();
197        file.set_len(CONTENT_HASH_SIZE_CAP + 1).unwrap();
198        filetime::set_file_mtime(&path, original_mtime).unwrap();
199        let fresh = collect(&path).unwrap();
200
201        assert_eq!(fresh.size, CONTENT_HASH_SIZE_CAP + 1);
202        assert_eq!(fresh.content_hash, zero_hash());
203        assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::HotFresh);
204        assert_eq!(verify_file_strict(&path, &fresh), FreshnessVerdict::Stale);
205    }
206
207    #[test]
208    fn content_fresh_when_only_mtime_changes() {
209        let dir = tempfile::tempdir().unwrap();
210        let path = dir.path().join("a.txt");
211        write(&path, b"same");
212        let fresh = collect(&path).unwrap();
213        let mut file = fs::OpenOptions::new().append(true).open(&path).unwrap();
214        file.write_all(b"").unwrap();
215        file.sync_all().unwrap();
216        filetime::set_file_mtime(&path, filetime::FileTime::from_unix_time(1, 0)).unwrap();
217        assert!(matches!(
218            verify_file(&path, &fresh),
219            FreshnessVerdict::ContentFresh { .. }
220        ));
221    }
222
223    #[test]
224    fn stale_when_size_changes() {
225        let dir = tempfile::tempdir().unwrap();
226        let path = dir.path().join("a.txt");
227        write(&path, b"same");
228        let fresh = collect(&path).unwrap();
229        write(&path, b"different");
230        assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::Stale);
231    }
232
233    #[test]
234    fn deleted_when_missing() {
235        let dir = tempfile::tempdir().unwrap();
236        let path = dir.path().join("a.txt");
237        write(&path, b"same");
238        let fresh = collect(&path).unwrap();
239        fs::remove_file(&path).unwrap();
240        assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::Deleted);
241    }
242
243    #[test]
244    fn over_cap_hash_is_not_computed() {
245        let dir = tempfile::tempdir().unwrap();
246        let path = dir.path().join("big.bin");
247        fs::write(&path, vec![0u8; CONTENT_HASH_SIZE_CAP as usize + 1]).unwrap();
248        assert!(hash_file_if_small(&path, CONTENT_HASH_SIZE_CAP + 1)
249            .unwrap()
250            .is_none());
251    }
252}