Skip to main content

aft/
cache_freshness.rs

1use std::fs;
2use std::path::Path;
3#[cfg(debug_assertions)]
4use std::sync::atomic::{AtomicUsize, Ordering};
5use std::time::{SystemTime, UNIX_EPOCH};
6
7pub const CONTENT_HASH_SIZE_CAP: u64 = 4 * 1024 * 1024;
8
9#[cfg(debug_assertions)]
10static STRICT_VERIFY_FILE_CALLS: AtomicUsize = AtomicUsize::new(0);
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub struct FileFreshness {
14    pub mtime: SystemTime,
15    pub size: u64,
16    pub content_hash: blake3::Hash,
17}
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum FreshnessVerdict {
21    HotFresh,
22    ContentFresh {
23        new_mtime: SystemTime,
24        new_size: u64,
25    },
26    Stale,
27    Deleted,
28}
29
30pub fn hash_bytes(bytes: &[u8]) -> blake3::Hash {
31    blake3::hash(bytes)
32}
33
34pub fn hash_file_if_small(path: &Path, size: u64) -> std::io::Result<Option<blake3::Hash>> {
35    if size > CONTENT_HASH_SIZE_CAP {
36        return Ok(None);
37    }
38    fs::read(path).map(|bytes| Some(hash_bytes(&bytes)))
39}
40
41pub fn zero_hash() -> blake3::Hash {
42    blake3::Hash::from_bytes([0u8; 32])
43}
44
45pub fn collect(path: &Path) -> std::io::Result<FileFreshness> {
46    let metadata = fs::metadata(path)?;
47    let mtime = metadata.modified().unwrap_or(UNIX_EPOCH);
48    let size = metadata.len();
49    let content_hash = hash_file_if_small(path, size)?.unwrap_or_else(zero_hash);
50    Ok(FileFreshness {
51        mtime,
52        size,
53        content_hash,
54    })
55}
56
57pub fn verify_file(path: &Path, cached: &FileFreshness) -> FreshnessVerdict {
58    verify_file_inner(path, cached, false)
59}
60
61pub fn verify_file_strict(path: &Path, cached: &FileFreshness) -> FreshnessVerdict {
62    #[cfg(debug_assertions)]
63    STRICT_VERIFY_FILE_CALLS.fetch_add(1, Ordering::Relaxed);
64    verify_file_inner(path, cached, true)
65}
66
67#[cfg(debug_assertions)]
68#[doc(hidden)]
69pub fn reset_verify_file_strict_count_for_debug() {
70    STRICT_VERIFY_FILE_CALLS.store(0, Ordering::Relaxed);
71}
72
73#[cfg(debug_assertions)]
74#[doc(hidden)]
75pub fn verify_file_strict_count_for_debug() -> usize {
76    STRICT_VERIFY_FILE_CALLS.load(Ordering::Relaxed)
77}
78
79fn verify_file_inner(
80    path: &Path,
81    cached: &FileFreshness,
82    hash_matching_metadata: bool,
83) -> FreshnessVerdict {
84    let Ok(metadata) = fs::metadata(path) else {
85        return FreshnessVerdict::Deleted;
86    };
87    let new_size = metadata.len();
88    let new_mtime = metadata.modified().unwrap_or(UNIX_EPOCH);
89    if new_size == cached.size && new_mtime == cached.mtime {
90        if hash_matching_metadata {
91            if new_size > CONTENT_HASH_SIZE_CAP || cached.content_hash == zero_hash() {
92                return FreshnessVerdict::Stale;
93            }
94            return match hash_file_if_small(path, new_size) {
95                Ok(Some(hash)) if hash == cached.content_hash => FreshnessVerdict::HotFresh,
96                _ => FreshnessVerdict::Stale,
97            };
98        }
99        return FreshnessVerdict::HotFresh;
100    }
101    if new_size != cached.size || new_size > CONTENT_HASH_SIZE_CAP {
102        return FreshnessVerdict::Stale;
103    }
104    match hash_file_if_small(path, new_size) {
105        Ok(Some(hash)) if hash == cached.content_hash => FreshnessVerdict::ContentFresh {
106            new_mtime,
107            new_size,
108        },
109        _ => FreshnessVerdict::Stale,
110    }
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    use std::io::Write;
117
118    fn write(path: &Path, bytes: &[u8]) {
119        fs::write(path, bytes).unwrap();
120    }
121
122    #[test]
123    fn hot_fresh_when_mtime_size_match() {
124        let dir = tempfile::tempdir().unwrap();
125        let path = dir.path().join("a.txt");
126        write(&path, b"same");
127        let fresh = collect(&path).unwrap();
128        assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::HotFresh);
129    }
130
131    #[test]
132    fn strict_hashes_small_file_when_metadata_matches() {
133        let dir = tempfile::tempdir().unwrap();
134        let path = dir.path().join("a.txt");
135        let original_mtime = filetime::FileTime::from_unix_time(1_700_000_000, 0);
136        write(&path, b"alpha");
137        filetime::set_file_mtime(&path, original_mtime).unwrap();
138        let fresh = collect(&path).unwrap();
139
140        assert_eq!(
141            verify_file_strict(&path, &fresh),
142            FreshnessVerdict::HotFresh
143        );
144
145        write(&path, b"bravo");
146        filetime::set_file_mtime(&path, original_mtime).unwrap();
147
148        assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::HotFresh);
149        assert_eq!(verify_file_strict(&path, &fresh), FreshnessVerdict::Stale);
150    }
151
152    #[test]
153    fn strict_stale_when_large_file_hash_was_not_cached() {
154        let dir = tempfile::tempdir().unwrap();
155        let path = dir.path().join("big.bin");
156        let original_mtime = filetime::FileTime::from_unix_time(1_700_000_000, 0);
157        let file = fs::File::create(&path).unwrap();
158        file.set_len(CONTENT_HASH_SIZE_CAP + 1).unwrap();
159        filetime::set_file_mtime(&path, original_mtime).unwrap();
160        let fresh = collect(&path).unwrap();
161
162        assert_eq!(fresh.size, CONTENT_HASH_SIZE_CAP + 1);
163        assert_eq!(fresh.content_hash, zero_hash());
164        assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::HotFresh);
165        assert_eq!(verify_file_strict(&path, &fresh), FreshnessVerdict::Stale);
166    }
167
168    #[test]
169    fn content_fresh_when_only_mtime_changes() {
170        let dir = tempfile::tempdir().unwrap();
171        let path = dir.path().join("a.txt");
172        write(&path, b"same");
173        let fresh = collect(&path).unwrap();
174        let mut file = fs::OpenOptions::new().append(true).open(&path).unwrap();
175        file.write_all(b"").unwrap();
176        file.sync_all().unwrap();
177        filetime::set_file_mtime(&path, filetime::FileTime::from_unix_time(1, 0)).unwrap();
178        assert!(matches!(
179            verify_file(&path, &fresh),
180            FreshnessVerdict::ContentFresh { .. }
181        ));
182    }
183
184    #[test]
185    fn stale_when_size_changes() {
186        let dir = tempfile::tempdir().unwrap();
187        let path = dir.path().join("a.txt");
188        write(&path, b"same");
189        let fresh = collect(&path).unwrap();
190        write(&path, b"different");
191        assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::Stale);
192    }
193
194    #[test]
195    fn deleted_when_missing() {
196        let dir = tempfile::tempdir().unwrap();
197        let path = dir.path().join("a.txt");
198        write(&path, b"same");
199        let fresh = collect(&path).unwrap();
200        fs::remove_file(&path).unwrap();
201        assert_eq!(verify_file(&path, &fresh), FreshnessVerdict::Deleted);
202    }
203
204    #[test]
205    fn over_cap_hash_is_not_computed() {
206        let dir = tempfile::tempdir().unwrap();
207        let path = dir.path().join("big.bin");
208        fs::write(&path, vec![0u8; CONTENT_HASH_SIZE_CAP as usize + 1]).unwrap();
209        assert!(hash_file_if_small(&path, CONTENT_HASH_SIZE_CAP + 1)
210            .unwrap()
211            .is_none());
212    }
213}