ripvec_core/cache/
diff.rs1use std::collections::HashSet;
7use std::path::{Path, PathBuf};
8use std::time::UNIX_EPOCH;
9
10use crate::cache::manifest::Manifest;
11
12#[derive(Debug)]
14pub struct DiffResult {
15 pub dirty: Vec<PathBuf>,
17 pub deleted: Vec<String>,
19 pub unchanged: usize,
21}
22
23pub fn compute_diff(root: &Path, manifest: &Manifest) -> crate::Result<DiffResult> {
33 compute_diff_with_options(root, manifest, &crate::walk::WalkOptions::default())
34}
35
36pub fn compute_diff_with_options(
44 root: &Path,
45 manifest: &Manifest,
46 options: &crate::walk::WalkOptions,
47) -> crate::Result<DiffResult> {
48 let mut dirty = Vec::new();
49 let mut unchanged = 0;
50
51 let mut seen_files: HashSet<String> = HashSet::new();
53
54 let files = crate::walk::collect_files_with_options(root, options);
56
57 for file_path in &files {
58 let relative = file_path
59 .strip_prefix(root)
60 .unwrap_or(file_path)
61 .to_string_lossy()
62 .to_string();
63
64 seen_files.insert(relative.clone());
65
66 let Some(entry) = manifest.files.get(&relative) else {
68 dirty.push(file_path.clone());
70 continue;
71 };
72
73 let Ok(metadata) = std::fs::metadata(file_path) else {
75 dirty.push(file_path.clone());
76 continue;
77 };
78
79 let mtime_secs = metadata
80 .modified()
81 .ok()
82 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
83 .map_or(0, |d| d.as_secs());
84 let size = metadata.len();
85
86 if mtime_secs == entry.mtime_secs && size == entry.size {
87 unchanged += 1;
89 continue;
90 }
91
92 let Ok(content) = std::fs::read(file_path) else {
94 dirty.push(file_path.clone());
95 continue;
96 };
97 let content_hash = blake3::hash(&content).to_hex().to_string();
98
99 if content_hash == entry.content_hash {
100 unchanged += 1;
102 } else {
103 dirty.push(file_path.clone());
104 }
105 }
106
107 let deleted: Vec<String> = manifest
109 .files
110 .keys()
111 .filter(|k| !seen_files.contains(k.as_str()))
112 .cloned()
113 .collect();
114
115 Ok(DiffResult {
116 dirty,
117 deleted,
118 unchanged,
119 })
120}
121
122pub fn hash_file(path: &Path) -> crate::Result<String> {
128 let content = std::fs::read(path).map_err(|e| crate::Error::Io {
129 path: path.display().to_string(),
130 source: e,
131 })?;
132 Ok(blake3::hash(&content).to_hex().to_string())
133}
134
135#[must_use]
139pub fn mtime_secs(path: &Path) -> u64 {
140 std::fs::metadata(path)
141 .ok()
142 .and_then(|m| m.modified().ok())
143 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
144 .map_or(0, |d| d.as_secs())
145}
146
147#[cfg(test)]
148mod tests {
149 use super::*;
150 use std::io::Write;
151 use tempfile::TempDir;
152
153 fn create_file(dir: &Path, relative: &str, content: &str) -> PathBuf {
154 let path = dir.join(relative);
155 if let Some(parent) = path.parent() {
156 std::fs::create_dir_all(parent).unwrap();
157 }
158 let mut f = std::fs::File::create(&path).unwrap();
159 f.write_all(content.as_bytes()).unwrap();
160 path
161 }
162
163 fn manifest_with_file(root: &Path, relative: &str, content: &str) -> Manifest {
164 let path = root.join(relative);
165 let metadata = std::fs::metadata(&path).unwrap();
166 let mtime = metadata
167 .modified()
168 .unwrap()
169 .duration_since(UNIX_EPOCH)
170 .unwrap()
171 .as_secs();
172 let hash = blake3::hash(content.as_bytes()).to_hex().to_string();
173
174 let mut m = Manifest::new("test-model");
175 m.add_file(relative, mtime, metadata.len(), &hash, 1);
176 m
177 }
178
179 #[test]
180 fn detects_new_file() {
181 let dir = TempDir::new().unwrap();
182 create_file(dir.path(), "existing.rs", "fn existing() {}");
183 create_file(dir.path(), "new_file.rs", "fn new() {}");
184
185 let manifest = manifest_with_file(dir.path(), "existing.rs", "fn existing() {}");
186
187 let diff = compute_diff(dir.path(), &manifest).unwrap();
188 assert_eq!(diff.dirty.len(), 1);
189 assert!(diff.dirty[0].ends_with("new_file.rs"));
190 assert_eq!(diff.unchanged, 1);
191 assert!(diff.deleted.is_empty());
192 }
193
194 #[test]
195 fn detects_modified_file() {
196 let dir = TempDir::new().unwrap();
197 create_file(dir.path(), "main.rs", "fn main() {}");
198
199 let manifest = manifest_with_file(dir.path(), "main.rs", "fn main() {}");
200
201 std::thread::sleep(std::time::Duration::from_millis(50));
203 create_file(
204 dir.path(),
205 "main.rs",
206 "fn main() { println!(\"changed\"); }",
207 );
208
209 let diff = compute_diff(dir.path(), &manifest).unwrap();
210 assert_eq!(diff.dirty.len(), 1);
211 assert_eq!(diff.unchanged, 0);
212 }
213
214 #[test]
215 fn detects_deleted_file() {
216 let dir = TempDir::new().unwrap();
217 create_file(dir.path(), "keep.rs", "fn keep() {}");
218
219 let mut manifest = manifest_with_file(dir.path(), "keep.rs", "fn keep() {}");
220 manifest.add_file("deleted.rs", 1000, 100, "oldhash", 1);
221
222 let diff = compute_diff(dir.path(), &manifest).unwrap();
223 assert_eq!(diff.deleted.len(), 1);
224 assert_eq!(diff.deleted[0], "deleted.rs");
225 assert_eq!(diff.unchanged, 1);
226 }
227
228 #[test]
229 fn unchanged_file_detected() {
230 let dir = TempDir::new().unwrap();
231 create_file(dir.path(), "stable.rs", "fn stable() {}");
232
233 let manifest = manifest_with_file(dir.path(), "stable.rs", "fn stable() {}");
234
235 let diff = compute_diff(dir.path(), &manifest).unwrap();
236 assert!(diff.dirty.is_empty());
237 assert!(diff.deleted.is_empty());
238 assert_eq!(diff.unchanged, 1);
239 }
240
241 #[test]
242 fn excluded_files_are_reported_deleted_from_manifest() {
243 let dir = TempDir::new().unwrap();
244 create_file(dir.path(), "keep.rs", "fn keep() {}");
245 create_file(dir.path(), "events.jsonl", "{\"event\":\"x\"}\n");
246
247 let mut manifest = manifest_with_file(dir.path(), "keep.rs", "fn keep() {}");
248 let ignored_path = dir.path().join("events.jsonl");
249 let metadata = std::fs::metadata(&ignored_path).unwrap();
250 let mtime = metadata
251 .modified()
252 .unwrap()
253 .duration_since(UNIX_EPOCH)
254 .unwrap()
255 .as_secs();
256 let hash = blake3::hash("{\"event\":\"x\"}\n".as_bytes())
257 .to_hex()
258 .to_string();
259 manifest.add_file("events.jsonl", mtime, metadata.len(), &hash, 1);
260
261 let diff = compute_diff_with_options(
262 dir.path(),
263 &manifest,
264 &crate::walk::WalkOptions {
265 exclude_extensions: vec!["jsonl".to_string()],
266 ..crate::walk::WalkOptions::default()
267 },
268 )
269 .unwrap();
270
271 assert!(diff.dirty.is_empty());
272 assert_eq!(diff.deleted, ["events.jsonl"]);
273 assert_eq!(diff.unchanged, 1);
274 }
275}