1use crate::manifest::{Diff, DirEntry, FileEntry, Full, Manifest};
6use std::collections::HashMap;
7
8#[derive(Default)]
10pub struct DiffOptions {
11 pub parent_manifest_hash: Option<String>,
15
16 pub ignore_hashes: bool,
20
21 pub preserve_runnable: bool,
35}
36
37pub fn entries_differ(
57 parent: &FileEntry,
58 current: &FileEntry,
59 ignore_hashes: bool,
60 ignore_runnable: bool,
61) -> bool {
62 let parent_is_symlink = parent.symlink_target.is_some();
63 let current_is_symlink = current.symlink_target.is_some();
64
65 if parent_is_symlink != current_is_symlink {
67 return true;
68 }
69
70 if current_is_symlink {
72 return parent.symlink_target != current.symlink_target;
73 }
74
75 if parent.size != current.size || parent.mtime != current.mtime {
77 return true;
78 }
79 if !ignore_hashes
80 && (parent.hash != current.hash || parent.chunk_hashes != current.chunk_hashes)
81 {
82 return true;
83 }
84 if !ignore_runnable && parent.runnable != current.runnable {
85 return true;
86 }
87 false
88}
89
90fn has_hashed_files<P, K>(manifest: &Manifest<P, K>) -> Option<bool> {
92 let regular_files: Vec<_> = manifest
93 .files
94 .iter()
95 .filter(|f| f.symlink_target.is_none() && !f.deleted)
96 .collect();
97 if regular_files.is_empty() {
98 return None;
99 }
100 Some(
101 regular_files
102 .iter()
103 .any(|f| f.hash.is_some() || f.chunk_hashes.is_some()),
104 )
105}
106
107pub fn diff_snapshots<P: Clone>(
113 parent: &Manifest<P, Full>,
114 current: &Manifest<P, Full>,
115 options: &DiffOptions,
116) -> crate::Result<Manifest<P, Diff>> {
117 if !options.ignore_hashes {
118 let parent_hashed = has_hashed_files(parent);
119 let current_hashed = has_hashed_files(current);
120 if let (Some(ph), Some(ch)) = (parent_hashed, current_hashed) {
121 if ph && !ch {
122 return Err(crate::SnapshotError::Validation(
123 "cannot diff hashed parent manifest against unhashed current manifest when ignore_hashes=false".into(),
124 ));
125 }
126 if !ph && ch {
127 return Err(crate::SnapshotError::Validation(
128 "cannot diff unhashed parent manifest against hashed current manifest when ignore_hashes=false".into(),
129 ));
130 }
131 }
132 }
133
134 let parent_files: HashMap<&str, &FileEntry> =
135 parent.files.iter().map(|f| (f.path.as_str(), f)).collect();
136 let parent_dirs: HashMap<&str, &DirEntry> =
137 parent.dirs.iter().map(|d| (d.path.as_str(), d)).collect();
138 let current_files: HashMap<&str, &FileEntry> =
139 current.files.iter().map(|f| (f.path.as_str(), f)).collect();
140 let current_dirs: HashMap<&str, &DirEntry> =
141 current.dirs.iter().map(|d| (d.path.as_str(), d)).collect();
142
143 let mut files = Vec::new();
144
145 for cf in ¤t.files {
147 match parent_files.get(cf.path.as_str()) {
148 None => files.push(cf.clone()),
149 Some(pf) => {
150 if entries_differ(
156 pf,
157 cf,
158 options.ignore_hashes,
159 options.preserve_runnable,
160 ) {
161 let mut entry = cf.clone();
162 if options.preserve_runnable && cf.symlink_target.is_none() {
163 entry.runnable = pf.runnable;
164 }
165 files.push(entry);
166 }
167 }
168 }
169 }
170
171 let mut deleted_file_paths: std::collections::HashSet<&str> = std::collections::HashSet::new();
173 for pf in &parent.files {
174 if !current_files.contains_key(pf.path.as_str()) {
175 deleted_file_paths.insert(&pf.path);
176 }
177 }
178
179 let mut dirs = Vec::new();
180
181 for cd in ¤t.dirs {
183 if !parent_dirs.contains_key(cd.path.as_str()) {
184 dirs.push(cd.clone());
185 }
186 }
187
188 let mut deleted_dir_paths: std::collections::HashSet<&str> = std::collections::HashSet::new();
190 for pd in &parent.dirs {
191 if !current_dirs.contains_key(pd.path.as_str()) {
192 deleted_dir_paths.insert(&pd.path);
193 }
194 }
195
196 for deleted_dir in deleted_dir_paths.iter().copied().collect::<Vec<_>>() {
198 let dir_prefix = format!("{}/", deleted_dir);
199 for pf in &parent.files {
200 if pf.path.starts_with(&dir_prefix) && !current_files.contains_key(pf.path.as_str()) {
201 deleted_file_paths.insert(&pf.path);
202 }
203 }
204 for pd in &parent.dirs {
205 if pd.path.starts_with(&dir_prefix) && !current_dirs.contains_key(pd.path.as_str()) {
206 deleted_dir_paths.insert(&pd.path);
207 }
208 }
209 }
210
211 for path in &deleted_file_paths {
213 files.push(FileEntry::deleted(*path));
214 }
215
216 let sorted_deleted_dirs: Vec<&str> = deleted_dir_paths.into_iter().collect();
218 for path in sorted_deleted_dirs {
219 dirs.push(DirEntry::deleted(path));
220 }
221
222 files.sort_by(|a, b| match (a.deleted, b.deleted) {
224 (false, true) => std::cmp::Ordering::Less,
225 (true, false) => std::cmp::Ordering::Greater,
226 _ => a.path.cmp(&b.path),
227 });
228
229 dirs.sort_by(|a, b| match (a.deleted, b.deleted) {
231 (false, true) => std::cmp::Ordering::Less,
232 (true, false) => std::cmp::Ordering::Greater,
233 (false, false) => a.path.cmp(&b.path),
234 (true, true) => b.path.len().cmp(&a.path.len()).then(a.path.cmp(&b.path)),
235 });
236
237 let mut result = Manifest::new(parent.hash_alg, parent.file_chunk_size_bytes);
238 result.files = files;
239 result.dirs = dirs;
240 result.parent_manifest_hash = options.parent_manifest_hash.clone();
241 result.recompute_total_size();
242 Ok(result)
243}
244
245#[cfg(test)]
246mod tests {
247 use super::*;
248 use crate::hash::HashAlgorithm;
249 use crate::manifest::{Full, Rel};
250 use crate::{DirEntry, FileEntry, Manifest, DEFAULT_FILE_CHUNK_SIZE};
251
252 type RelSnapshot = Manifest<Rel, Full>;
253
254 fn make(files: Vec<FileEntry>, dirs: Vec<DirEntry>) -> RelSnapshot {
255 Manifest::new(HashAlgorithm::Xxh128, DEFAULT_FILE_CHUNK_SIZE)
256 .with_files(files)
257 .with_dirs(dirs)
258 }
259
260 fn default_opts() -> DiffOptions {
261 DiffOptions::default()
262 }
263
264 #[test]
265 fn no_changes_empty_diff() {
266 let m = make(vec![FileEntry::file("a.txt", 100, 1000)], vec![]);
267 let diff = diff_snapshots(&m, &m, &default_opts()).unwrap();
268 assert!(diff.files.is_empty());
269 assert!(diff.dirs.is_empty());
270 }
271
272 #[test]
273 fn new_file_detected() {
274 let parent = make(vec![], vec![]);
275 let current = make(vec![FileEntry::file("new.txt", 50, 1)], vec![]);
276 let diff = diff_snapshots(&parent, ¤t, &default_opts()).unwrap();
277 assert_eq!(diff.files.len(), 1);
278 assert_eq!(diff.files[0].path, "new.txt");
279 assert!(!diff.files[0].deleted);
280 }
281
282 #[test]
283 fn modified_file_by_mtime() {
284 let parent = make(vec![FileEntry::file("a.txt", 100, 1000)], vec![]);
285 let current = make(vec![FileEntry::file("a.txt", 100, 2000)], vec![]);
286 let diff = diff_snapshots(&parent, ¤t, &default_opts()).unwrap();
287 assert_eq!(diff.files.len(), 1);
288 assert_eq!(diff.files[0].mtime, Some(2000));
289 }
290
291 #[test]
292 fn modified_file_by_hash() {
293 let mut pf = FileEntry::file("a.txt", 100, 1000);
294 pf.hash = Some("aaa".into());
295 let mut cf = FileEntry::file("a.txt", 100, 1000);
296 cf.hash = Some("bbb".into());
297 let parent = make(vec![pf], vec![]);
298 let current = make(vec![cf], vec![]);
299 let diff = diff_snapshots(&parent, ¤t, &default_opts()).unwrap();
300 assert_eq!(diff.files.len(), 1);
301 assert_eq!(diff.files[0].hash.as_deref(), Some("bbb"));
302 }
303
304 #[test]
305 fn deleted_file_marker() {
306 let parent = make(vec![FileEntry::file("gone.txt", 100, 1)], vec![]);
307 let current = make(vec![], vec![]);
308 let diff = diff_snapshots(&parent, ¤t, &default_opts()).unwrap();
309 assert_eq!(diff.files.len(), 1);
310 assert!(diff.files[0].deleted);
311 assert_eq!(diff.files[0].path, "gone.txt");
312 }
313
314 #[test]
315 fn ignore_hashes_mode() {
316 let mut pf = FileEntry::file("a.txt", 100, 1000);
317 pf.hash = Some("aaa".into());
318 let mut cf = FileEntry::file("a.txt", 100, 1000);
319 cf.hash = Some("bbb".into());
320 let parent = make(vec![pf], vec![]);
321 let current = make(vec![cf], vec![]);
322 let opts = DiffOptions {
323 ignore_hashes: true,
324 ..default_opts()
325 };
326 let diff = diff_snapshots(&parent, ¤t, &opts).unwrap();
327 assert!(diff.files.is_empty(), "hash-only change should be ignored");
328 }
329
330 #[test]
331 fn preserve_runnable_copies_from_parent() {
332 let mut pf = FileEntry::file("script.sh", 100, 1000);
333 pf.runnable = true;
334 let cf = FileEntry::file("script.sh", 100, 2000);
336 let parent = make(vec![pf], vec![]);
337 let current = make(vec![cf], vec![]);
338 let opts = DiffOptions {
339 preserve_runnable: true,
340 ..default_opts()
341 };
342 let diff = diff_snapshots(&parent, ¤t, &opts).unwrap();
343 assert_eq!(diff.files.len(), 1);
344 assert!(
345 diff.files[0].runnable,
346 "runnable should be copied from parent"
347 );
348 }
349
350 #[test]
351 fn symlink_change_detected() {
352 let parent = make(vec![FileEntry::symlink("link", "target_a")], vec![]);
353 let current = make(vec![FileEntry::symlink("link", "target_b")], vec![]);
354 let diff = diff_snapshots(&parent, ¤t, &default_opts()).unwrap();
355 assert_eq!(diff.files.len(), 1);
356 assert_eq!(diff.files[0].symlink_target.as_deref(), Some("target_b"));
357 }
358
359 #[test]
360 fn dir_additions_and_deletions() {
361 let parent = make(vec![], vec![DirEntry::new("old_dir")]);
362 let current = make(vec![], vec![DirEntry::new("new_dir")]);
363 let diff = diff_snapshots(&parent, ¤t, &default_opts()).unwrap();
364 assert_eq!(diff.dirs.len(), 2);
365 let new = diff.dirs.iter().find(|d| d.path == "new_dir").unwrap();
366 assert!(!new.deleted);
367 let old = diff.dirs.iter().find(|d| d.path == "old_dir").unwrap();
368 assert!(old.deleted);
369 }
370
371 #[test]
372 fn hash_state_mismatch_hashed_parent_unhashed_current() {
373 let mut pf = FileEntry::file("a.txt", 100, 1000);
374 pf.hash = Some("aaa".into());
375 let parent = make(vec![pf], vec![]);
376 let current = make(vec![FileEntry::file("a.txt", 100, 1000)], vec![]);
377 let result = diff_snapshots(&parent, ¤t, &default_opts());
378 assert!(result.is_err());
379 let msg = result.unwrap_err().to_string();
380 assert!(msg.contains("hashed parent"));
381 }
382
383 #[test]
384 fn hash_state_mismatch_unhashed_parent_hashed_current() {
385 let parent = make(vec![FileEntry::file("a.txt", 100, 1000)], vec![]);
386 let mut cf = FileEntry::file("a.txt", 100, 1000);
387 cf.hash = Some("bbb".into());
388 let current = make(vec![cf], vec![]);
389 let result = diff_snapshots(&parent, ¤t, &default_opts());
390 assert!(result.is_err());
391 let msg = result.unwrap_err().to_string();
392 assert!(msg.contains("unhashed parent"));
393 }
394
395 #[test]
396 fn hash_state_mismatch_allowed_with_ignore_hashes() {
397 let mut pf = FileEntry::file("a.txt", 100, 1000);
398 pf.hash = Some("aaa".into());
399 let parent = make(vec![pf], vec![]);
400 let current = make(vec![FileEntry::file("a.txt", 100, 1000)], vec![]);
401 let opts = DiffOptions {
402 ignore_hashes: true,
403 ..default_opts()
404 };
405 assert!(diff_snapshots(&parent, ¤t, &opts).is_ok());
406 }
407
408 #[test]
409 fn hash_state_empty_manifests_compatible() {
410 let parent = make(vec![], vec![]);
411 let mut cf = FileEntry::file("a.txt", 100, 1000);
412 cf.hash = Some("aaa".into());
413 let current = make(vec![cf], vec![]);
414 assert!(diff_snapshots(&parent, ¤t, &default_opts()).is_ok());
416 }
417
418 #[test]
419 fn hash_state_symlink_only_compatible() {
420 let parent = make(vec![FileEntry::symlink("link", "target")], vec![]);
421 let mut cf = FileEntry::file("a.txt", 100, 1000);
422 cf.hash = Some("aaa".into());
423 let current = make(vec![cf], vec![]);
424 assert!(diff_snapshots(&parent, ¤t, &default_opts()).is_ok());
426 }
427
428 #[test]
429 fn deleted_dir_cascades_to_contents() {
430 let parent = make(
431 vec![
432 FileEntry::file("dir/a.txt", 10, 1),
433 FileEntry::file("dir/sub/b.txt", 20, 2),
434 ],
435 vec![DirEntry::new("dir"), DirEntry::new("dir/sub")],
436 );
437 let current = make(vec![], vec![]);
438 let diff = diff_snapshots(&parent, ¤t, &default_opts()).unwrap();
439 let deleted_files: Vec<&str> = diff
441 .files
442 .iter()
443 .filter(|f| f.deleted)
444 .map(|f| f.path.as_str())
445 .collect();
446 assert!(deleted_files.contains(&"dir/a.txt"));
447 assert!(deleted_files.contains(&"dir/sub/b.txt"));
448 let deleted_dirs: Vec<&str> = diff
450 .dirs
451 .iter()
452 .filter(|d| d.deleted)
453 .map(|d| d.path.as_str())
454 .collect();
455 assert!(deleted_dirs.contains(&"dir"));
456 assert!(deleted_dirs.contains(&"dir/sub"));
457 }
458
459 #[test]
460 fn parent_manifest_hash_set() {
461 let m = make(vec![], vec![]);
462 let opts = DiffOptions {
463 parent_manifest_hash: Some("hash123".into()),
464 ..default_opts()
465 };
466 let diff = diff_snapshots(&m, &m, &opts).unwrap();
467 assert_eq!(diff.parent_manifest_hash.as_deref(), Some("hash123"));
468 }
469}