Skip to main content

git_closure/snapshot/
diff.rs

1/// Snapshot diffing: compare two `.gcl` files and report changes.
2use std::collections::{HashMap, HashSet};
3use std::fs;
4use std::path::Path;
5
6use crate::utils::io_error_with_path;
7
8use super::build::collect_files;
9use super::serial::parse_snapshot;
10use super::{BuildOptions, Result, SnapshotFile};
11
12// ── Public types ──────────────────────────────────────────────────────────────
13
14/// A single change entry produced by [`diff_snapshots`].
15#[derive(Debug, Clone, PartialEq, Eq)]
16#[non_exhaustive]
17pub enum DiffEntry {
18    /// File exists in the right snapshot but not the left.
19    Added { path: String },
20    /// File exists in the left snapshot but not the right.
21    Removed { path: String },
22    /// File exists in both snapshots but its content identity changed.
23    Modified {
24        path: String,
25        old_sha256: String,
26        new_sha256: String,
27    },
28    /// File was removed from `old_path` and appeared at `new_path` with the
29    /// same SHA-256.  This is a heuristic: a true rename has identical content.
30    Renamed { old_path: String, new_path: String },
31    /// File content is unchanged but Unix mode changed.
32    ModeChanged {
33        path: String,
34        old_mode: String,
35        new_mode: String,
36    },
37    /// Symlink exists in both snapshots but points to a different target.
38    SymlinkTargetChanged {
39        path: String,
40        old_target: String,
41        new_target: String,
42    },
43}
44
45/// Result of comparing two snapshots.
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct DiffResult {
48    /// Ordered list of changes.  Entries are sorted: renames first (by new
49    /// path), then removed, added, modified — each group in path order.
50    pub entries: Vec<DiffEntry>,
51    /// Convenience flag: `true` when `entries` is empty.
52    pub identical: bool,
53}
54
55// ── Public API ────────────────────────────────────────────────────────────────
56
57/// Compares two `.gcl` snapshot files and returns the set of differences.
58///
59/// The returned [`DiffResult`] is in a deterministic, sorted order suitable
60/// for human display and golden-file tests.
61///
62/// # Rename detection
63///
64/// A file is reported as `Renamed` when a path disappears from the left
65/// snapshot and a path with the **same SHA-256** appears in the right
66/// snapshot.  When there are multiple candidates (duplicate content), the
67/// lexicographically smallest new path is chosen.  This is O(n log n) via a
68/// reverse-index over the right snapshot's sha256 values.
69///
70/// Symlinks are compared by target string, not sha256 (which is empty for
71/// symlinks).  Two symlinks with the same target pointing to the same path
72/// are considered identical; different targets are `SymlinkTargetChanged`.
73pub fn diff_snapshots(left: &Path, right: &Path) -> Result<DiffResult> {
74    let left_text = fs::read_to_string(left).map_err(|err| io_error_with_path(err, left))?;
75    let right_text = fs::read_to_string(right).map_err(|err| io_error_with_path(err, right))?;
76
77    let (_, left_files) = parse_snapshot(&left_text)?;
78    let (_, right_files) = parse_snapshot(&right_text)?;
79
80    Ok(compute_diff(&left_files, &right_files))
81}
82
83/// Compares a snapshot file against a live source directory.
84///
85/// This parses the left `.gcl` snapshot and collects the right-hand entries
86/// directly from `source` using the same build-time file selection rules.
87pub fn diff_snapshot_to_source(
88    snapshot: &Path,
89    source: &Path,
90    options: &BuildOptions,
91) -> Result<DiffResult> {
92    let snapshot_text =
93        fs::read_to_string(snapshot).map_err(|err| io_error_with_path(err, snapshot))?;
94    let (_header, left_files) = parse_snapshot(&snapshot_text)?;
95
96    let source = fs::canonicalize(source).map_err(|err| io_error_with_path(err, source))?;
97    if !source.is_dir() {
98        return Err(crate::error::GitClosureError::Parse(format!(
99            "source is not a directory: {}",
100            source.display()
101        )));
102    }
103
104    let mut right_files = collect_files(&source, options)?;
105    right_files.sort_by(|a, b| a.path.cmp(&b.path));
106    Ok(compute_diff(&left_files, &right_files))
107}
108
109// ── Core algorithm ────────────────────────────────────────────────────────────
110
111fn compute_diff(left: &[SnapshotFile], right: &[SnapshotFile]) -> DiffResult {
112    // Key for content-equality: sha256 for regular files, target for symlinks.
113    fn content_key(f: &SnapshotFile) -> String {
114        if let Some(target) = &f.symlink_target {
115            format!("symlink:{target}")
116        } else {
117            f.sha256.clone()
118        }
119    }
120
121    let left_map: HashMap<&str, &SnapshotFile> =
122        left.iter().map(|f| (f.path.as_str(), f)).collect();
123    let right_map: HashMap<&str, &SnapshotFile> =
124        right.iter().map(|f| (f.path.as_str(), f)).collect();
125
126    // Build a reverse index: content_key → sorted list of new paths (right-only).
127    // We build this incrementally after we know which right paths are "added".
128    let mut candidates_removed: Vec<&SnapshotFile> = Vec::new();
129    let mut candidates_added: Vec<&SnapshotFile> = Vec::new();
130    let mut mode_changed: Vec<DiffEntry> = Vec::new();
131    let mut modified: Vec<DiffEntry> = Vec::new();
132    let mut forced_added_paths: HashSet<&str> = HashSet::new();
133
134    for lf in left {
135        match right_map.get(lf.path.as_str()) {
136            None => candidates_removed.push(lf),
137            Some(&rf) => {
138                let left_is_symlink = lf.symlink_target.is_some();
139                let right_is_symlink = rf.symlink_target.is_some();
140
141                // Explicit design decision: regular<->symlink transitions are
142                // represented as Removed + Added (type replacement), not
143                // Modified/ModeChanged.
144                if left_is_symlink != right_is_symlink {
145                    candidates_removed.push(lf);
146                    forced_added_paths.insert(rf.path.as_str());
147                    continue;
148                }
149
150                if content_key(lf) != content_key(rf) {
151                    if let (Some(old_target), Some(new_target)) =
152                        (&lf.symlink_target, &rf.symlink_target)
153                    {
154                        modified.push(DiffEntry::SymlinkTargetChanged {
155                            path: lf.path.clone(),
156                            old_target: old_target.clone(),
157                            new_target: new_target.clone(),
158                        });
159                    } else {
160                        modified.push(DiffEntry::Modified {
161                            path: lf.path.clone(),
162                            old_sha256: lf.sha256.clone(),
163                            new_sha256: rf.sha256.clone(),
164                        });
165                    }
166                } else if lf.mode != rf.mode {
167                    mode_changed.push(DiffEntry::ModeChanged {
168                        path: lf.path.clone(),
169                        old_mode: lf.mode.clone(),
170                        new_mode: rf.mode.clone(),
171                    });
172                }
173                // identical — skip
174            }
175        }
176    }
177
178    for rf in right {
179        if !left_map.contains_key(rf.path.as_str()) || forced_added_paths.contains(rf.path.as_str())
180        {
181            candidates_added.push(rf);
182        }
183    }
184
185    // Build reverse index for rename detection.
186    let mut added_by_key: HashMap<String, Vec<&str>> = HashMap::new();
187    for rf in &candidates_added {
188        added_by_key
189            .entry(content_key(rf))
190            .or_default()
191            .push(&rf.path);
192    }
193    // Sort each bucket so we pick the lexicographically smallest new path.
194    for v in added_by_key.values_mut() {
195        v.sort_unstable();
196    }
197
198    let mut renames: Vec<DiffEntry> = Vec::new();
199    let mut renamed_old_paths: std::collections::HashSet<String> = Default::default();
200    let mut renamed_new_paths: std::collections::HashSet<String> = Default::default();
201
202    // Match removals to additions by content key.  Each addition can only be
203    // consumed once, so we track which new paths have already been claimed.
204    let mut consumed: std::collections::HashSet<String> = Default::default();
205
206    // Sort candidates_removed by path for determinism.
207    let mut candidates_removed_sorted = candidates_removed.to_vec();
208    candidates_removed_sorted.sort_by(|a, b| a.path.cmp(&b.path));
209
210    for lf in &candidates_removed_sorted {
211        let key = content_key(lf);
212        if let Some(new_paths) = added_by_key.get(&key) {
213            if let Some(&new_path) = new_paths.iter().find(|&&p| !consumed.contains(p)) {
214                consumed.insert(new_path.to_string());
215                renamed_old_paths.insert(lf.path.clone());
216                renamed_new_paths.insert(new_path.to_string());
217                renames.push(DiffEntry::Renamed {
218                    old_path: lf.path.clone(),
219                    new_path: new_path.to_string(),
220                });
221            }
222        }
223    }
224
225    renames.sort_by(|a, b| {
226        let ap = if let DiffEntry::Renamed { new_path, .. } = a {
227            new_path
228        } else {
229            unreachable!()
230        };
231        let bp = if let DiffEntry::Renamed { new_path, .. } = b {
232            new_path
233        } else {
234            unreachable!()
235        };
236        ap.cmp(bp)
237    });
238
239    let mut removed: Vec<DiffEntry> = candidates_removed_sorted
240        .iter()
241        .filter(|f| !renamed_old_paths.contains(&f.path))
242        .map(|f| DiffEntry::Removed {
243            path: f.path.clone(),
244        })
245        .collect();
246    removed.sort_by(|a, b| {
247        let ap = if let DiffEntry::Removed { path } = a {
248            path
249        } else {
250            unreachable!()
251        };
252        let bp = if let DiffEntry::Removed { path } = b {
253            path
254        } else {
255            unreachable!()
256        };
257        ap.cmp(bp)
258    });
259
260    let mut added: Vec<DiffEntry> = candidates_added
261        .iter()
262        .filter(|f| !renamed_new_paths.contains(&f.path))
263        .map(|f| DiffEntry::Added {
264            path: f.path.clone(),
265        })
266        .collect();
267    added.sort_by(|a, b| {
268        let ap = if let DiffEntry::Added { path } = a {
269            path
270        } else {
271            unreachable!()
272        };
273        let bp = if let DiffEntry::Added { path } = b {
274            path
275        } else {
276            unreachable!()
277        };
278        ap.cmp(bp)
279    });
280
281    modified.sort_by(|a, b| {
282        let ap = match a {
283            DiffEntry::Modified { path, .. } => path,
284            DiffEntry::SymlinkTargetChanged { path, .. } => path,
285            _ => unreachable!(),
286        };
287        let bp = match b {
288            DiffEntry::Modified { path, .. } => path,
289            DiffEntry::SymlinkTargetChanged { path, .. } => path,
290            _ => unreachable!(),
291        };
292        ap.cmp(bp)
293    });
294    mode_changed.sort_by(|a, b| {
295        let ap = if let DiffEntry::ModeChanged { path, .. } = a {
296            path
297        } else {
298            unreachable!()
299        };
300        let bp = if let DiffEntry::ModeChanged { path, .. } = b {
301            path
302        } else {
303            unreachable!()
304        };
305        ap.cmp(bp)
306    });
307    let mut entries = Vec::new();
308    entries.extend(renames);
309    entries.extend(removed);
310    entries.extend(added);
311    entries.extend(mode_changed);
312    entries.extend(modified);
313
314    let identical = entries.is_empty();
315    DiffResult { entries, identical }
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321    use crate::snapshot::hash::{compute_snapshot_hash, sha256_hex};
322    use crate::snapshot::serial::serialize_snapshot;
323    use crate::snapshot::SnapshotFile;
324    use std::fs;
325    use tempfile::TempDir;
326
327    fn text_file(path: &str, content: &str) -> SnapshotFile {
328        text_file_mode(path, content, "644")
329    }
330
331    fn text_file_mode(path: &str, content: &str, mode: &str) -> SnapshotFile {
332        let bytes = content.as_bytes().to_vec();
333        SnapshotFile {
334            path: path.to_string(),
335            sha256: sha256_hex(&bytes),
336            mode: mode.to_string(),
337            size: bytes.len() as u64,
338            encoding: None,
339            symlink_target: None,
340            content: bytes,
341        }
342    }
343
344    fn symlink_file(path: &str, target: &str) -> SnapshotFile {
345        SnapshotFile {
346            path: path.to_string(),
347            sha256: String::new(),
348            mode: "120000".to_string(),
349            size: 0,
350            encoding: None,
351            symlink_target: Some(target.to_string()),
352            content: Vec::new(),
353        }
354    }
355
356    fn write_snap(dir: &TempDir, name: &str, files: &[SnapshotFile]) -> std::path::PathBuf {
357        use crate::snapshot::SnapshotHeader;
358        let mut sorted = files.to_vec();
359        sorted.sort_by(|a, b| a.path.cmp(&b.path));
360        let snapshot_hash = compute_snapshot_hash(&sorted);
361        let header = SnapshotHeader {
362            snapshot_hash,
363            file_count: sorted.len(),
364            git_rev: None,
365            git_branch: None,
366            extra_headers: Vec::new(),
367        };
368        let text = serialize_snapshot(&sorted, &header);
369        let path = dir.path().join(name);
370        fs::write(&path, text.as_bytes()).unwrap();
371        path
372    }
373
374    #[test]
375    fn diff_identical_snapshots_is_empty() {
376        let dir = TempDir::new().unwrap();
377        let files = vec![text_file("a.txt", "hello"), text_file("b.txt", "world")];
378        let left = write_snap(&dir, "left.gcl", &files);
379        let right = write_snap(&dir, "right.gcl", &files);
380        let result = diff_snapshots(&left, &right).unwrap();
381        assert!(result.identical);
382        assert!(result.entries.is_empty());
383    }
384
385    #[test]
386    fn diff_detects_added_file() {
387        let dir = TempDir::new().unwrap();
388        let left_files = vec![text_file("a.txt", "a")];
389        let right_files = vec![text_file("a.txt", "a"), text_file("b.txt", "b")];
390        let left = write_snap(&dir, "left.gcl", &left_files);
391        let right = write_snap(&dir, "right.gcl", &right_files);
392        let result = diff_snapshots(&left, &right).unwrap();
393        assert!(!result.identical);
394        assert!(result.entries.contains(&DiffEntry::Added {
395            path: "b.txt".to_string()
396        }));
397    }
398
399    #[test]
400    fn diff_detects_removed_file() {
401        let dir = TempDir::new().unwrap();
402        let left_files = vec![text_file("a.txt", "a"), text_file("b.txt", "b")];
403        let right_files = vec![text_file("a.txt", "a")];
404        let left = write_snap(&dir, "left.gcl", &left_files);
405        let right = write_snap(&dir, "right.gcl", &right_files);
406        let result = diff_snapshots(&left, &right).unwrap();
407        assert!(result.entries.contains(&DiffEntry::Removed {
408            path: "b.txt".to_string()
409        }));
410    }
411
412    #[test]
413    fn diff_detects_modified_file() {
414        let dir = TempDir::new().unwrap();
415        let left_files = vec![text_file("a.txt", "old content")];
416        let right_files = vec![text_file("a.txt", "new content")];
417        let left = write_snap(&dir, "left.gcl", &left_files);
418        let right = write_snap(&dir, "right.gcl", &right_files);
419        let result = diff_snapshots(&left, &right).unwrap();
420        assert!(result.entries.iter().any(|entry| {
421            matches!(
422                entry,
423                DiffEntry::Modified {
424                    path,
425                    old_sha256,
426                    new_sha256
427                } if path == "a.txt" && old_sha256 != new_sha256
428            )
429        }));
430    }
431
432    #[test]
433    fn diff_detects_rename() {
434        let dir = TempDir::new().unwrap();
435        // Same content, different path: rename.
436        let left_files = vec![text_file("old/name.txt", "content")];
437        let right_files = vec![text_file("new/name.txt", "content")];
438        let left = write_snap(&dir, "left.gcl", &left_files);
439        let right = write_snap(&dir, "right.gcl", &right_files);
440        let result = diff_snapshots(&left, &right).unwrap();
441        assert!(
442            result.entries.contains(&DiffEntry::Renamed {
443                old_path: "old/name.txt".to_string(),
444                new_path: "new/name.txt".to_string(),
445            }),
446            "expected Renamed, got {:?}",
447            result.entries
448        );
449        // Must NOT also appear as Added/Removed.
450        assert!(!result.entries.contains(&DiffEntry::Added {
451            path: "new/name.txt".to_string()
452        }));
453        assert!(!result.entries.contains(&DiffEntry::Removed {
454            path: "old/name.txt".to_string()
455        }));
456    }
457
458    #[test]
459    fn diff_symlink_target_change_uses_dedicated_variant() {
460        let dir = TempDir::new().unwrap();
461        let left_files = vec![symlink_file("link", "target_a.txt")];
462        let right_files = vec![symlink_file("link", "target_b.txt")];
463        let left = write_snap(&dir, "left.gcl", &left_files);
464        let right = write_snap(&dir, "right.gcl", &right_files);
465        let result = diff_snapshots(&left, &right).unwrap();
466        assert!(result.entries.contains(&DiffEntry::SymlinkTargetChanged {
467            path: "link".to_string(),
468            old_target: "target_a.txt".to_string(),
469            new_target: "target_b.txt".to_string(),
470        }));
471        assert!(
472            !result
473                .entries
474                .iter()
475                .any(|entry| matches!(entry, DiffEntry::Modified { path, .. } if path == "link")),
476            "symlink-vs-symlink changes must not emit Modified"
477        );
478    }
479
480    #[test]
481    fn diff_output_ordering_renames_first() {
482        let dir = TempDir::new().unwrap();
483        // One rename, one addition, one removal.
484        let left_files = vec![
485            text_file("old.txt", "renamed content"),
486            text_file("removed.txt", "gone"),
487        ];
488        let right_files = vec![
489            text_file("new.txt", "renamed content"),
490            text_file("added.txt", "new"),
491        ];
492        let left = write_snap(&dir, "left.gcl", &left_files);
493        let right = write_snap(&dir, "right.gcl", &right_files);
494        let result = diff_snapshots(&left, &right).unwrap();
495        assert_eq!(result.entries.len(), 3);
496        assert!(
497            matches!(result.entries[0], DiffEntry::Renamed { .. }),
498            "first entry must be Renamed, got {:?}",
499            result.entries[0]
500        );
501    }
502
503    #[test]
504    fn diff_detects_mode_change_without_modified() {
505        let dir = TempDir::new().unwrap();
506        let left_files = vec![text_file_mode("bin/tool.sh", "echo hi\n", "644")];
507        let right_files = vec![text_file_mode("bin/tool.sh", "echo hi\n", "755")];
508        let left = write_snap(&dir, "left.gcl", &left_files);
509        let right = write_snap(&dir, "right.gcl", &right_files);
510
511        let result = diff_snapshots(&left, &right).unwrap();
512        assert!(
513            result.entries.contains(&DiffEntry::ModeChanged {
514                path: "bin/tool.sh".to_string(),
515                old_mode: "644".to_string(),
516                new_mode: "755".to_string(),
517            }),
518            "expected ModeChanged entry, got {:?}",
519            result.entries
520        );
521        assert!(
522            !result.entries.iter().any(
523                |entry| matches!(entry, DiffEntry::Modified { path, .. } if path == "bin/tool.sh")
524            ),
525            "mode-only change must not be reported as Modified"
526        );
527    }
528
529    #[test]
530    fn diff_rename_with_mode_change_stays_single_rename() {
531        let dir = TempDir::new().unwrap();
532        let left_files = vec![text_file_mode("old.sh", "echo hi\n", "644")];
533        let right_files = vec![text_file_mode("new.sh", "echo hi\n", "755")];
534        let left = write_snap(&dir, "left.gcl", &left_files);
535        let right = write_snap(&dir, "right.gcl", &right_files);
536
537        let result = diff_snapshots(&left, &right).unwrap();
538        assert!(
539            result.entries.contains(&DiffEntry::Renamed {
540                old_path: "old.sh".to_string(),
541                new_path: "new.sh".to_string(),
542            }),
543            "rename+mode-change should still report a rename"
544        );
545        assert!(
546            !result
547                .entries
548                .iter()
549                .any(|entry| matches!(entry, DiffEntry::ModeChanged { .. })),
550            "rename+mode-change should not emit an extra ModeChanged entry"
551        );
552    }
553
554    #[test]
555    fn diff_regular_to_symlink_is_reported_as_removed_plus_added() {
556        let dir = TempDir::new().unwrap();
557        let left_files = vec![text_file("path", "payload")];
558        let right_files = vec![symlink_file("path", "target.txt")];
559        let left = write_snap(&dir, "left.gcl", &left_files);
560        let right = write_snap(&dir, "right.gcl", &right_files);
561
562        let result = diff_snapshots(&left, &right).unwrap();
563        assert!(
564            result.entries.contains(&DiffEntry::Removed {
565                path: "path".to_string()
566            }),
567            "type change should include Removed"
568        );
569        assert!(
570            result.entries.contains(&DiffEntry::Added {
571                path: "path".to_string()
572            }),
573            "type change should include Added"
574        );
575    }
576
577    #[test]
578    fn diff_snapshot_to_source_identical_tree_is_identical() {
579        let source = TempDir::new().unwrap();
580        let snapshots = TempDir::new().unwrap();
581        fs::write(source.path().join("a.txt"), b"alpha\n").unwrap();
582
583        let snapshot = snapshots.path().join("snap.gcl");
584        crate::snapshot::build::build_snapshot(source.path(), &snapshot).unwrap();
585
586        let result = diff_snapshot_to_source(
587            &snapshot,
588            source.path(),
589            &crate::snapshot::BuildOptions::default(),
590        )
591        .unwrap();
592        assert!(result.identical);
593        assert!(result.entries.is_empty());
594    }
595
596    #[test]
597    fn diff_snapshot_to_source_detects_modified_file() {
598        let source = TempDir::new().unwrap();
599        let snapshots = TempDir::new().unwrap();
600        fs::write(source.path().join("a.txt"), b"alpha\n").unwrap();
601
602        let snapshot = snapshots.path().join("snap.gcl");
603        crate::snapshot::build::build_snapshot(source.path(), &snapshot).unwrap();
604
605        fs::write(source.path().join("a.txt"), b"beta\n").unwrap();
606
607        let result = diff_snapshot_to_source(
608            &snapshot,
609            source.path(),
610            &crate::snapshot::BuildOptions::default(),
611        )
612        .unwrap();
613        assert!(result
614            .entries
615            .iter()
616            .any(|entry| matches!(entry, DiffEntry::Modified { path, .. } if path == "a.txt")));
617    }
618
619    #[test]
620    fn diff_snapshot_to_source_detects_added_file() {
621        let source = TempDir::new().unwrap();
622        let snapshots = TempDir::new().unwrap();
623        fs::write(source.path().join("a.txt"), b"alpha\n").unwrap();
624
625        let snapshot = snapshots.path().join("snap.gcl");
626        crate::snapshot::build::build_snapshot(source.path(), &snapshot).unwrap();
627
628        fs::write(source.path().join("b.txt"), b"new\n").unwrap();
629
630        let result = diff_snapshot_to_source(
631            &snapshot,
632            source.path(),
633            &crate::snapshot::BuildOptions::default(),
634        )
635        .unwrap();
636        assert!(result.entries.contains(&DiffEntry::Added {
637            path: "b.txt".to_string(),
638        }));
639    }
640}