1use std::collections::{HashMap, HashSet};
3use std::fs;
4use std::path::Path;
5
6use crate::utils::io_error_with_path;
7
8use super::build::collect_files;
9use super::serial::parse_snapshot;
10use super::{BuildOptions, Result, SnapshotFile};
11
12#[derive(Debug, Clone, PartialEq, Eq)]
16#[non_exhaustive]
17pub enum DiffEntry {
18 Added { path: String },
20 Removed { path: String },
22 Modified {
24 path: String,
25 old_sha256: String,
26 new_sha256: String,
27 },
28 Renamed { old_path: String, new_path: String },
31 ModeChanged {
33 path: String,
34 old_mode: String,
35 new_mode: String,
36 },
37 SymlinkTargetChanged {
39 path: String,
40 old_target: String,
41 new_target: String,
42 },
43}
44
45#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct DiffResult {
48 pub entries: Vec<DiffEntry>,
51 pub identical: bool,
53}
54
55pub fn diff_snapshots(left: &Path, right: &Path) -> Result<DiffResult> {
74 let left_text = fs::read_to_string(left).map_err(|err| io_error_with_path(err, left))?;
75 let right_text = fs::read_to_string(right).map_err(|err| io_error_with_path(err, right))?;
76
77 let (_, left_files) = parse_snapshot(&left_text)?;
78 let (_, right_files) = parse_snapshot(&right_text)?;
79
80 Ok(compute_diff(&left_files, &right_files))
81}
82
83pub fn diff_snapshot_to_source(
88 snapshot: &Path,
89 source: &Path,
90 options: &BuildOptions,
91) -> Result<DiffResult> {
92 let snapshot_text =
93 fs::read_to_string(snapshot).map_err(|err| io_error_with_path(err, snapshot))?;
94 let (_header, left_files) = parse_snapshot(&snapshot_text)?;
95
96 let source = fs::canonicalize(source).map_err(|err| io_error_with_path(err, source))?;
97 if !source.is_dir() {
98 return Err(crate::error::GitClosureError::Parse(format!(
99 "source is not a directory: {}",
100 source.display()
101 )));
102 }
103
104 let mut right_files = collect_files(&source, options)?;
105 right_files.sort_by(|a, b| a.path.cmp(&b.path));
106 Ok(compute_diff(&left_files, &right_files))
107}
108
109fn compute_diff(left: &[SnapshotFile], right: &[SnapshotFile]) -> DiffResult {
112 fn content_key(f: &SnapshotFile) -> String {
114 if let Some(target) = &f.symlink_target {
115 format!("symlink:{target}")
116 } else {
117 f.sha256.clone()
118 }
119 }
120
121 let left_map: HashMap<&str, &SnapshotFile> =
122 left.iter().map(|f| (f.path.as_str(), f)).collect();
123 let right_map: HashMap<&str, &SnapshotFile> =
124 right.iter().map(|f| (f.path.as_str(), f)).collect();
125
126 let mut candidates_removed: Vec<&SnapshotFile> = Vec::new();
129 let mut candidates_added: Vec<&SnapshotFile> = Vec::new();
130 let mut mode_changed: Vec<DiffEntry> = Vec::new();
131 let mut modified: Vec<DiffEntry> = Vec::new();
132 let mut forced_added_paths: HashSet<&str> = HashSet::new();
133
134 for lf in left {
135 match right_map.get(lf.path.as_str()) {
136 None => candidates_removed.push(lf),
137 Some(&rf) => {
138 let left_is_symlink = lf.symlink_target.is_some();
139 let right_is_symlink = rf.symlink_target.is_some();
140
141 if left_is_symlink != right_is_symlink {
145 candidates_removed.push(lf);
146 forced_added_paths.insert(rf.path.as_str());
147 continue;
148 }
149
150 if content_key(lf) != content_key(rf) {
151 if let (Some(old_target), Some(new_target)) =
152 (&lf.symlink_target, &rf.symlink_target)
153 {
154 modified.push(DiffEntry::SymlinkTargetChanged {
155 path: lf.path.clone(),
156 old_target: old_target.clone(),
157 new_target: new_target.clone(),
158 });
159 } else {
160 modified.push(DiffEntry::Modified {
161 path: lf.path.clone(),
162 old_sha256: lf.sha256.clone(),
163 new_sha256: rf.sha256.clone(),
164 });
165 }
166 } else if lf.mode != rf.mode {
167 mode_changed.push(DiffEntry::ModeChanged {
168 path: lf.path.clone(),
169 old_mode: lf.mode.clone(),
170 new_mode: rf.mode.clone(),
171 });
172 }
173 }
175 }
176 }
177
178 for rf in right {
179 if !left_map.contains_key(rf.path.as_str()) || forced_added_paths.contains(rf.path.as_str())
180 {
181 candidates_added.push(rf);
182 }
183 }
184
185 let mut added_by_key: HashMap<String, Vec<&str>> = HashMap::new();
187 for rf in &candidates_added {
188 added_by_key
189 .entry(content_key(rf))
190 .or_default()
191 .push(&rf.path);
192 }
193 for v in added_by_key.values_mut() {
195 v.sort_unstable();
196 }
197
198 let mut renames: Vec<DiffEntry> = Vec::new();
199 let mut renamed_old_paths: std::collections::HashSet<String> = Default::default();
200 let mut renamed_new_paths: std::collections::HashSet<String> = Default::default();
201
202 let mut consumed: std::collections::HashSet<String> = Default::default();
205
206 let mut candidates_removed_sorted = candidates_removed.to_vec();
208 candidates_removed_sorted.sort_by(|a, b| a.path.cmp(&b.path));
209
210 for lf in &candidates_removed_sorted {
211 let key = content_key(lf);
212 if let Some(new_paths) = added_by_key.get(&key) {
213 if let Some(&new_path) = new_paths.iter().find(|&&p| !consumed.contains(p)) {
214 consumed.insert(new_path.to_string());
215 renamed_old_paths.insert(lf.path.clone());
216 renamed_new_paths.insert(new_path.to_string());
217 renames.push(DiffEntry::Renamed {
218 old_path: lf.path.clone(),
219 new_path: new_path.to_string(),
220 });
221 }
222 }
223 }
224
225 renames.sort_by(|a, b| {
226 let ap = if let DiffEntry::Renamed { new_path, .. } = a {
227 new_path
228 } else {
229 unreachable!()
230 };
231 let bp = if let DiffEntry::Renamed { new_path, .. } = b {
232 new_path
233 } else {
234 unreachable!()
235 };
236 ap.cmp(bp)
237 });
238
239 let mut removed: Vec<DiffEntry> = candidates_removed_sorted
240 .iter()
241 .filter(|f| !renamed_old_paths.contains(&f.path))
242 .map(|f| DiffEntry::Removed {
243 path: f.path.clone(),
244 })
245 .collect();
246 removed.sort_by(|a, b| {
247 let ap = if let DiffEntry::Removed { path } = a {
248 path
249 } else {
250 unreachable!()
251 };
252 let bp = if let DiffEntry::Removed { path } = b {
253 path
254 } else {
255 unreachable!()
256 };
257 ap.cmp(bp)
258 });
259
260 let mut added: Vec<DiffEntry> = candidates_added
261 .iter()
262 .filter(|f| !renamed_new_paths.contains(&f.path))
263 .map(|f| DiffEntry::Added {
264 path: f.path.clone(),
265 })
266 .collect();
267 added.sort_by(|a, b| {
268 let ap = if let DiffEntry::Added { path } = a {
269 path
270 } else {
271 unreachable!()
272 };
273 let bp = if let DiffEntry::Added { path } = b {
274 path
275 } else {
276 unreachable!()
277 };
278 ap.cmp(bp)
279 });
280
281 modified.sort_by(|a, b| {
282 let ap = match a {
283 DiffEntry::Modified { path, .. } => path,
284 DiffEntry::SymlinkTargetChanged { path, .. } => path,
285 _ => unreachable!(),
286 };
287 let bp = match b {
288 DiffEntry::Modified { path, .. } => path,
289 DiffEntry::SymlinkTargetChanged { path, .. } => path,
290 _ => unreachable!(),
291 };
292 ap.cmp(bp)
293 });
294 mode_changed.sort_by(|a, b| {
295 let ap = if let DiffEntry::ModeChanged { path, .. } = a {
296 path
297 } else {
298 unreachable!()
299 };
300 let bp = if let DiffEntry::ModeChanged { path, .. } = b {
301 path
302 } else {
303 unreachable!()
304 };
305 ap.cmp(bp)
306 });
307 let mut entries = Vec::new();
308 entries.extend(renames);
309 entries.extend(removed);
310 entries.extend(added);
311 entries.extend(mode_changed);
312 entries.extend(modified);
313
314 let identical = entries.is_empty();
315 DiffResult { entries, identical }
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321 use crate::snapshot::hash::{compute_snapshot_hash, sha256_hex};
322 use crate::snapshot::serial::serialize_snapshot;
323 use crate::snapshot::SnapshotFile;
324 use std::fs;
325 use tempfile::TempDir;
326
327 fn text_file(path: &str, content: &str) -> SnapshotFile {
328 text_file_mode(path, content, "644")
329 }
330
331 fn text_file_mode(path: &str, content: &str, mode: &str) -> SnapshotFile {
332 let bytes = content.as_bytes().to_vec();
333 SnapshotFile {
334 path: path.to_string(),
335 sha256: sha256_hex(&bytes),
336 mode: mode.to_string(),
337 size: bytes.len() as u64,
338 encoding: None,
339 symlink_target: None,
340 content: bytes,
341 }
342 }
343
344 fn symlink_file(path: &str, target: &str) -> SnapshotFile {
345 SnapshotFile {
346 path: path.to_string(),
347 sha256: String::new(),
348 mode: "120000".to_string(),
349 size: 0,
350 encoding: None,
351 symlink_target: Some(target.to_string()),
352 content: Vec::new(),
353 }
354 }
355
356 fn write_snap(dir: &TempDir, name: &str, files: &[SnapshotFile]) -> std::path::PathBuf {
357 use crate::snapshot::SnapshotHeader;
358 let mut sorted = files.to_vec();
359 sorted.sort_by(|a, b| a.path.cmp(&b.path));
360 let snapshot_hash = compute_snapshot_hash(&sorted);
361 let header = SnapshotHeader {
362 snapshot_hash,
363 file_count: sorted.len(),
364 git_rev: None,
365 git_branch: None,
366 extra_headers: Vec::new(),
367 };
368 let text = serialize_snapshot(&sorted, &header);
369 let path = dir.path().join(name);
370 fs::write(&path, text.as_bytes()).unwrap();
371 path
372 }
373
374 #[test]
375 fn diff_identical_snapshots_is_empty() {
376 let dir = TempDir::new().unwrap();
377 let files = vec![text_file("a.txt", "hello"), text_file("b.txt", "world")];
378 let left = write_snap(&dir, "left.gcl", &files);
379 let right = write_snap(&dir, "right.gcl", &files);
380 let result = diff_snapshots(&left, &right).unwrap();
381 assert!(result.identical);
382 assert!(result.entries.is_empty());
383 }
384
385 #[test]
386 fn diff_detects_added_file() {
387 let dir = TempDir::new().unwrap();
388 let left_files = vec![text_file("a.txt", "a")];
389 let right_files = vec![text_file("a.txt", "a"), text_file("b.txt", "b")];
390 let left = write_snap(&dir, "left.gcl", &left_files);
391 let right = write_snap(&dir, "right.gcl", &right_files);
392 let result = diff_snapshots(&left, &right).unwrap();
393 assert!(!result.identical);
394 assert!(result.entries.contains(&DiffEntry::Added {
395 path: "b.txt".to_string()
396 }));
397 }
398
399 #[test]
400 fn diff_detects_removed_file() {
401 let dir = TempDir::new().unwrap();
402 let left_files = vec![text_file("a.txt", "a"), text_file("b.txt", "b")];
403 let right_files = vec![text_file("a.txt", "a")];
404 let left = write_snap(&dir, "left.gcl", &left_files);
405 let right = write_snap(&dir, "right.gcl", &right_files);
406 let result = diff_snapshots(&left, &right).unwrap();
407 assert!(result.entries.contains(&DiffEntry::Removed {
408 path: "b.txt".to_string()
409 }));
410 }
411
412 #[test]
413 fn diff_detects_modified_file() {
414 let dir = TempDir::new().unwrap();
415 let left_files = vec![text_file("a.txt", "old content")];
416 let right_files = vec![text_file("a.txt", "new content")];
417 let left = write_snap(&dir, "left.gcl", &left_files);
418 let right = write_snap(&dir, "right.gcl", &right_files);
419 let result = diff_snapshots(&left, &right).unwrap();
420 assert!(result.entries.iter().any(|entry| {
421 matches!(
422 entry,
423 DiffEntry::Modified {
424 path,
425 old_sha256,
426 new_sha256
427 } if path == "a.txt" && old_sha256 != new_sha256
428 )
429 }));
430 }
431
432 #[test]
433 fn diff_detects_rename() {
434 let dir = TempDir::new().unwrap();
435 let left_files = vec![text_file("old/name.txt", "content")];
437 let right_files = vec![text_file("new/name.txt", "content")];
438 let left = write_snap(&dir, "left.gcl", &left_files);
439 let right = write_snap(&dir, "right.gcl", &right_files);
440 let result = diff_snapshots(&left, &right).unwrap();
441 assert!(
442 result.entries.contains(&DiffEntry::Renamed {
443 old_path: "old/name.txt".to_string(),
444 new_path: "new/name.txt".to_string(),
445 }),
446 "expected Renamed, got {:?}",
447 result.entries
448 );
449 assert!(!result.entries.contains(&DiffEntry::Added {
451 path: "new/name.txt".to_string()
452 }));
453 assert!(!result.entries.contains(&DiffEntry::Removed {
454 path: "old/name.txt".to_string()
455 }));
456 }
457
458 #[test]
459 fn diff_symlink_target_change_uses_dedicated_variant() {
460 let dir = TempDir::new().unwrap();
461 let left_files = vec![symlink_file("link", "target_a.txt")];
462 let right_files = vec![symlink_file("link", "target_b.txt")];
463 let left = write_snap(&dir, "left.gcl", &left_files);
464 let right = write_snap(&dir, "right.gcl", &right_files);
465 let result = diff_snapshots(&left, &right).unwrap();
466 assert!(result.entries.contains(&DiffEntry::SymlinkTargetChanged {
467 path: "link".to_string(),
468 old_target: "target_a.txt".to_string(),
469 new_target: "target_b.txt".to_string(),
470 }));
471 assert!(
472 !result
473 .entries
474 .iter()
475 .any(|entry| matches!(entry, DiffEntry::Modified { path, .. } if path == "link")),
476 "symlink-vs-symlink changes must not emit Modified"
477 );
478 }
479
480 #[test]
481 fn diff_output_ordering_renames_first() {
482 let dir = TempDir::new().unwrap();
483 let left_files = vec![
485 text_file("old.txt", "renamed content"),
486 text_file("removed.txt", "gone"),
487 ];
488 let right_files = vec![
489 text_file("new.txt", "renamed content"),
490 text_file("added.txt", "new"),
491 ];
492 let left = write_snap(&dir, "left.gcl", &left_files);
493 let right = write_snap(&dir, "right.gcl", &right_files);
494 let result = diff_snapshots(&left, &right).unwrap();
495 assert_eq!(result.entries.len(), 3);
496 assert!(
497 matches!(result.entries[0], DiffEntry::Renamed { .. }),
498 "first entry must be Renamed, got {:?}",
499 result.entries[0]
500 );
501 }
502
503 #[test]
504 fn diff_detects_mode_change_without_modified() {
505 let dir = TempDir::new().unwrap();
506 let left_files = vec![text_file_mode("bin/tool.sh", "echo hi\n", "644")];
507 let right_files = vec![text_file_mode("bin/tool.sh", "echo hi\n", "755")];
508 let left = write_snap(&dir, "left.gcl", &left_files);
509 let right = write_snap(&dir, "right.gcl", &right_files);
510
511 let result = diff_snapshots(&left, &right).unwrap();
512 assert!(
513 result.entries.contains(&DiffEntry::ModeChanged {
514 path: "bin/tool.sh".to_string(),
515 old_mode: "644".to_string(),
516 new_mode: "755".to_string(),
517 }),
518 "expected ModeChanged entry, got {:?}",
519 result.entries
520 );
521 assert!(
522 !result.entries.iter().any(
523 |entry| matches!(entry, DiffEntry::Modified { path, .. } if path == "bin/tool.sh")
524 ),
525 "mode-only change must not be reported as Modified"
526 );
527 }
528
529 #[test]
530 fn diff_rename_with_mode_change_stays_single_rename() {
531 let dir = TempDir::new().unwrap();
532 let left_files = vec![text_file_mode("old.sh", "echo hi\n", "644")];
533 let right_files = vec![text_file_mode("new.sh", "echo hi\n", "755")];
534 let left = write_snap(&dir, "left.gcl", &left_files);
535 let right = write_snap(&dir, "right.gcl", &right_files);
536
537 let result = diff_snapshots(&left, &right).unwrap();
538 assert!(
539 result.entries.contains(&DiffEntry::Renamed {
540 old_path: "old.sh".to_string(),
541 new_path: "new.sh".to_string(),
542 }),
543 "rename+mode-change should still report a rename"
544 );
545 assert!(
546 !result
547 .entries
548 .iter()
549 .any(|entry| matches!(entry, DiffEntry::ModeChanged { .. })),
550 "rename+mode-change should not emit an extra ModeChanged entry"
551 );
552 }
553
554 #[test]
555 fn diff_regular_to_symlink_is_reported_as_removed_plus_added() {
556 let dir = TempDir::new().unwrap();
557 let left_files = vec![text_file("path", "payload")];
558 let right_files = vec![symlink_file("path", "target.txt")];
559 let left = write_snap(&dir, "left.gcl", &left_files);
560 let right = write_snap(&dir, "right.gcl", &right_files);
561
562 let result = diff_snapshots(&left, &right).unwrap();
563 assert!(
564 result.entries.contains(&DiffEntry::Removed {
565 path: "path".to_string()
566 }),
567 "type change should include Removed"
568 );
569 assert!(
570 result.entries.contains(&DiffEntry::Added {
571 path: "path".to_string()
572 }),
573 "type change should include Added"
574 );
575 }
576
577 #[test]
578 fn diff_snapshot_to_source_identical_tree_is_identical() {
579 let source = TempDir::new().unwrap();
580 let snapshots = TempDir::new().unwrap();
581 fs::write(source.path().join("a.txt"), b"alpha\n").unwrap();
582
583 let snapshot = snapshots.path().join("snap.gcl");
584 crate::snapshot::build::build_snapshot(source.path(), &snapshot).unwrap();
585
586 let result = diff_snapshot_to_source(
587 &snapshot,
588 source.path(),
589 &crate::snapshot::BuildOptions::default(),
590 )
591 .unwrap();
592 assert!(result.identical);
593 assert!(result.entries.is_empty());
594 }
595
596 #[test]
597 fn diff_snapshot_to_source_detects_modified_file() {
598 let source = TempDir::new().unwrap();
599 let snapshots = TempDir::new().unwrap();
600 fs::write(source.path().join("a.txt"), b"alpha\n").unwrap();
601
602 let snapshot = snapshots.path().join("snap.gcl");
603 crate::snapshot::build::build_snapshot(source.path(), &snapshot).unwrap();
604
605 fs::write(source.path().join("a.txt"), b"beta\n").unwrap();
606
607 let result = diff_snapshot_to_source(
608 &snapshot,
609 source.path(),
610 &crate::snapshot::BuildOptions::default(),
611 )
612 .unwrap();
613 assert!(result
614 .entries
615 .iter()
616 .any(|entry| matches!(entry, DiffEntry::Modified { path, .. } if path == "a.txt")));
617 }
618
619 #[test]
620 fn diff_snapshot_to_source_detects_added_file() {
621 let source = TempDir::new().unwrap();
622 let snapshots = TempDir::new().unwrap();
623 fs::write(source.path().join("a.txt"), b"alpha\n").unwrap();
624
625 let snapshot = snapshots.path().join("snap.gcl");
626 crate::snapshot::build::build_snapshot(source.path(), &snapshot).unwrap();
627
628 fs::write(source.path().join("b.txt"), b"new\n").unwrap();
629
630 let result = diff_snapshot_to_source(
631 &snapshot,
632 source.path(),
633 &crate::snapshot::BuildOptions::default(),
634 )
635 .unwrap();
636 assert!(result.entries.contains(&DiffEntry::Added {
637 path: "b.txt".to_string(),
638 }));
639 }
640}