Skip to main content

suture_core/engine/
diff.rs

1//! Diff computation — compare two FileTrees to find changes.
2//!
3//! Given two FileTree snapshots (typically from different commits),
4//! this module computes the set of changes between them.
5
6use crate::engine::tree::FileTree;
7use std::fmt;
8use suture_common::Hash;
9
10/// The type of change detected between two trees.
11#[derive(Clone, Debug, PartialEq, Eq)]
12pub enum DiffType {
13    /// A file was added (exists in new tree but not old).
14    Added,
15    /// A file was modified (exists in both, different blob hash).
16    Modified,
17    /// A file was deleted (exists in old tree but not new).
18    Deleted,
19    /// A file was renamed (removed from old path, added at new path with same hash).
20    Renamed { old_path: String, new_path: String },
21}
22
23impl fmt::Display for DiffType {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        match self {
26            DiffType::Added => write!(f, "added"),
27            DiffType::Modified => write!(f, "modified"),
28            DiffType::Deleted => write!(f, "deleted"),
29            DiffType::Renamed { old_path, new_path } => {
30                write!(f, "renamed {} → {}", old_path, new_path)
31            }
32        }
33    }
34}
35
36/// A single diff entry representing a change between two trees.
37#[derive(Clone, Debug, PartialEq, Eq)]
38pub struct DiffEntry {
39    /// The path of the changed file (new path for renames).
40    pub path: String,
41    /// The type of change.
42    pub diff_type: DiffType,
43    /// The blob hash in the old tree (None for additions).
44    pub old_hash: Option<Hash>,
45    /// The blob hash in the new tree (None for deletions).
46    pub new_hash: Option<Hash>,
47}
48
49impl fmt::Display for DiffEntry {
50    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51        match &self.diff_type {
52            DiffType::Added => {
53                write!(
54                    f,
55                    "  + {} ({})",
56                    self.path,
57                    self.new_hash.unwrap_or(Hash::ZERO)
58                )
59            }
60            DiffType::Modified => {
61                write!(
62                    f,
63                    "  M {} ({})",
64                    self.path,
65                    self.new_hash.unwrap_or(Hash::ZERO)
66                )
67            }
68            DiffType::Deleted => {
69                write!(f, "  - {}", self.path)
70            }
71            DiffType::Renamed { old_path, new_path } => {
72                write!(f, "  R {} → {}", old_path, new_path)
73            }
74        }
75    }
76}
77
78/// Compute the diff between two FileTrees.
79///
80/// Returns a list of DiffEntry describing all changes. The entries are
81/// sorted by path for deterministic output.
82///
83/// # Algorithm
84///
85/// 1. Files only in `old_tree` → Deleted
86/// 2. Files only in `new_tree` → Added
87/// 3. Files in both with same hash → Unchanged (omitted)
88/// 4. Files in both with different hash → Modified
89/// 5. Heuristic rename detection: a deleted file whose hash matches an
90///    added file is reported as a rename.
91pub fn diff_trees(old_tree: &FileTree, new_tree: &FileTree) -> Vec<DiffEntry> {
92    let mut diffs = Vec::new();
93
94    // Files only in old tree → deleted (or renamed)
95    let mut deleted_paths: Vec<String> = Vec::new();
96    for (path, old_hash) in old_tree.iter() {
97        match new_tree.get(path) {
98            Some(new_hash) if old_hash == new_hash => {
99                // Unchanged — skip
100            }
101            Some(_new_hash) => {
102                // Modified
103                diffs.push(DiffEntry {
104                    path: path.clone(),
105                    diff_type: DiffType::Modified,
106                    old_hash: Some(*old_hash),
107                    new_hash: new_tree.get(path).copied(),
108                });
109            }
110            None => {
111                deleted_paths.push(path.clone());
112            }
113        }
114    }
115
116    // Files only in new tree → added (or renamed target)
117    let mut added_paths: Vec<(String, Hash)> = Vec::new();
118    for (path, new_hash) in new_tree.iter() {
119        if !old_tree.contains(path) {
120            added_paths.push((path.clone(), *new_hash));
121        }
122    }
123
124    // Rename detection: match deleted files with added files by hash
125    let mut matched_deletes: std::collections::HashSet<String> = std::collections::HashSet::new();
126    let mut matched_adds: std::collections::HashSet<String> = std::collections::HashSet::new();
127
128    for (del_path, del_hash) in old_tree.iter() {
129        if !new_tree.contains(del_path) {
130            // Look for a matching added file
131            for (add_path, add_hash) in &added_paths {
132                if del_hash == add_hash
133                    && !matched_adds.contains(add_path)
134                    && !matched_deletes.contains(del_path)
135                {
136                    diffs.push(DiffEntry {
137                        path: add_path.clone(),
138                        diff_type: DiffType::Renamed {
139                            old_path: del_path.clone(),
140                            new_path: add_path.clone(),
141                        },
142                        old_hash: Some(*del_hash),
143                        new_hash: Some(*add_hash),
144                    });
145                    matched_deletes.insert(del_path.clone());
146                    matched_adds.insert(add_path.clone());
147                    break;
148                }
149            }
150        }
151    }
152
153    // Remaining deletes
154    for del_path in &deleted_paths {
155        if !matched_deletes.contains(del_path) {
156            diffs.push(DiffEntry {
157                path: del_path.clone(),
158                diff_type: DiffType::Deleted,
159                old_hash: old_tree.get(del_path).copied(),
160                new_hash: None,
161            });
162        }
163    }
164
165    // Remaining adds
166    for (add_path, add_hash) in &added_paths {
167        if !matched_adds.contains(add_path) {
168            diffs.push(DiffEntry {
169                path: add_path.clone(),
170                diff_type: DiffType::Added,
171                old_hash: None,
172                new_hash: Some(*add_hash),
173            });
174        }
175    }
176
177    // Sort by path for deterministic output
178    diffs.sort_by(|a, b| a.path.cmp(&b.path));
179    diffs
180}
181
182/// Check if two trees are identical.
183pub fn trees_equal(a: &FileTree, b: &FileTree) -> bool {
184    if a.len() != b.len() {
185        return false;
186    }
187    for (path, hash) in a.iter() {
188        if b.get(path) != Some(hash) {
189            return false;
190        }
191    }
192    true
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198
199    fn h(data: &[u8]) -> Hash {
200        Hash::from_data(data)
201    }
202
203    #[test]
204    fn test_diff_identical() {
205        let mut tree = FileTree::empty();
206        tree.insert("a.txt".to_string(), h(b"hello"));
207        tree.insert("b.txt".to_string(), h(b"world"));
208        let diffs = diff_trees(&tree, &tree);
209        assert!(diffs.is_empty());
210    }
211
212    #[test]
213    fn test_diff_empty_to_full() {
214        let empty = FileTree::empty();
215        let mut full = FileTree::empty();
216        full.insert("a.txt".to_string(), h(b"a"));
217        full.insert("b.txt".to_string(), h(b"b"));
218
219        let diffs = diff_trees(&empty, &full);
220        assert_eq!(diffs.len(), 2);
221        assert!(diffs.iter().all(|d| d.diff_type == DiffType::Added));
222    }
223
224    #[test]
225    fn test_diff_addition() {
226        let mut old = FileTree::empty();
227        old.insert("a.txt".to_string(), h(b"a"));
228        let mut new = old.clone();
229        new.insert("b.txt".to_string(), h(b"b"));
230
231        let diffs = diff_trees(&old, &new);
232        assert_eq!(diffs.len(), 1);
233        assert_eq!(diffs[0].path, "b.txt");
234        assert_eq!(diffs[0].diff_type, DiffType::Added);
235    }
236
237    #[test]
238    fn test_diff_modification() {
239        let mut old = FileTree::empty();
240        old.insert("a.txt".to_string(), h(b"v1"));
241        let mut new = FileTree::empty();
242        new.insert("a.txt".to_string(), h(b"v2"));
243
244        let diffs = diff_trees(&old, &new);
245        assert_eq!(diffs.len(), 1);
246        assert_eq!(diffs[0].diff_type, DiffType::Modified);
247        assert_eq!(diffs[0].old_hash, Some(h(b"v1")));
248        assert_eq!(diffs[0].new_hash, Some(h(b"v2")));
249    }
250
251    #[test]
252    fn test_diff_deletion() {
253        let mut old = FileTree::empty();
254        old.insert("a.txt".to_string(), h(b"data"));
255        let new = FileTree::empty();
256
257        let diffs = diff_trees(&old, &new);
258        assert_eq!(diffs.len(), 1);
259        assert_eq!(diffs[0].diff_type, DiffType::Deleted);
260    }
261
262    #[test]
263    fn test_diff_rename() {
264        let data = b"same content";
265        let mut old = FileTree::empty();
266        old.insert("old.txt".to_string(), h(data));
267        let mut new = FileTree::empty();
268        new.insert("new.txt".to_string(), h(data));
269
270        let diffs = diff_trees(&old, &new);
271        assert_eq!(diffs.len(), 1);
272        assert!(matches!(
273            &diffs[0].diff_type,
274            DiffType::Renamed { old_path, new_path } if old_path == "old.txt" && new_path == "new.txt"
275        ));
276    }
277
278    #[test]
279    fn test_diff_complex() {
280        let mut old = FileTree::empty();
281        old.insert("keep.txt".to_string(), h(b"same"));
282        old.insert("modify.txt".to_string(), h(b"old"));
283        old.insert("delete.txt".to_string(), h(b"gone"));
284
285        let mut new = FileTree::empty();
286        new.insert("keep.txt".to_string(), h(b"same"));
287        new.insert("modify.txt".to_string(), h(b"new"));
288        new.insert("add.txt".to_string(), h(b"new file"));
289
290        let diffs = diff_trees(&old, &new);
291        assert_eq!(diffs.len(), 3);
292
293        let types: Vec<&DiffType> = diffs.iter().map(|d| &d.diff_type).collect();
294        assert!(types.contains(&&DiffType::Added));
295        assert!(types.contains(&&DiffType::Modified));
296        assert!(types.contains(&&DiffType::Deleted));
297    }
298
299    #[test]
300    fn test_trees_equal() {
301        let mut t1 = FileTree::empty();
302        let mut t2 = FileTree::empty();
303        t1.insert("a.txt".to_string(), h(b"x"));
304        t2.insert("a.txt".to_string(), h(b"x"));
305        assert!(trees_equal(&t1, &t2));
306
307        t2.insert("a.txt".to_string(), h(b"y"));
308        assert!(!trees_equal(&t1, &t2));
309    }
310
311    #[test]
312    fn test_diff_display() {
313        let mut old = FileTree::empty();
314        old.insert("file.txt".to_string(), h(b"old"));
315        let mut new = FileTree::empty();
316        new.insert("file.txt".to_string(), h(b"new"));
317        let diffs = diff_trees(&old, &new);
318        let display = format!("{}", diffs[0]);
319        assert!(display.contains("M file.txt"));
320    }
321
322    mod proptests {
323        use super::*;
324        use proptest::prelude::*;
325        use suture_common::Hash;
326
327        fn valid_path() -> impl Strategy<Value = String> {
328            proptest::string::string_regex("[a-zA-Z0-9_/:-]{1,100}").unwrap()
329        }
330
331        fn hash_strategy() -> impl Strategy<Value = Hash> {
332            proptest::array::uniform32(proptest::num::u8::ANY).prop_map(Hash::from)
333        }
334
335        proptest! {
336            #[test]
337            fn diff_empty_vs_full(entries in proptest::collection::btree_map(valid_path(), hash_strategy(), 1..20)) {
338                prop_assume!(!entries.is_empty());
339                let empty = FileTree::empty();
340                let full = FileTree::from_map(entries);
341                let diffs = diff_trees(&empty, &full);
342                prop_assert!(!diffs.is_empty());
343                prop_assert!(diffs.iter().all(|d| d.diff_type == DiffType::Added),
344                    "all entries should be Added when diffing empty vs full");
345            }
346
347            #[test]
348            fn diff_full_vs_empty(entries in proptest::collection::btree_map(valid_path(), hash_strategy(), 1..20)) {
349                prop_assume!(!entries.is_empty());
350                let full = FileTree::from_map(entries);
351                let empty = FileTree::empty();
352                let diffs = diff_trees(&full, &empty);
353                prop_assert!(!diffs.is_empty());
354                for d in &diffs {
355                    prop_assert!(
356                        d.diff_type == DiffType::Deleted || matches!(d.diff_type, DiffType::Renamed { .. }),
357                        "expected Deleted or Renamed, got {:?}", d.diff_type
358                    );
359                }
360            }
361
362            #[test]
363            fn diff_identical(entries in proptest::collection::btree_map(valid_path(), hash_strategy(), 0..20)) {
364                let tree = FileTree::from_map(entries);
365                let diffs = diff_trees(&tree, &tree);
366                prop_assert!(diffs.is_empty(), "diff of identical trees should be empty");
367            }
368
369            #[test]
370            fn diff_symmetry_inverse(
371                entries_a in proptest::collection::btree_map(valid_path(), hash_strategy(), 1..15),
372                entries_b in proptest::collection::btree_map(valid_path(), hash_strategy(), 1..15)
373            ) {
374                prop_assume!(entries_a != entries_b);
375                let tree_a = FileTree::from_map(entries_a);
376                let tree_b = FileTree::from_map(entries_b);
377
378                let diffs_ab = diff_trees(&tree_a, &tree_b);
379                let diffs_ba = diff_trees(&tree_b, &tree_a);
380
381                for d_ab in &diffs_ab {
382                    let inverse = diffs_ba.iter().find(|d| d.path == d_ab.path);
383                    match &d_ab.diff_type {
384                        DiffType::Added => {
385                            if let Some(d_ba) = inverse {
386                                prop_assert!(
387                                    d_ba.diff_type == DiffType::Deleted || matches!(d_ba.diff_type, DiffType::Renamed { .. }),
388                                    "Added in A->B should be Deleted or Renamed in B->A, got {:?}", d_ba.diff_type
389                                );
390                            }
391                        }
392                        DiffType::Deleted => {
393                            if let Some(d_ba) = inverse {
394                                prop_assert!(
395                                    d_ba.diff_type == DiffType::Added || matches!(d_ba.diff_type, DiffType::Renamed { .. }),
396                                    "Deleted in A->B should be Added or Renamed in B->A, got {:?}", d_ba.diff_type
397                                );
398                            }
399                        }
400                        DiffType::Modified => {
401                            if let Some(d_ba) = inverse {
402                                prop_assert_eq!(&d_ba.diff_type, &DiffType::Modified,
403                                    "Modified in A->B should stay Modified in B->A");
404                            }
405                        }
406                        DiffType::Renamed { old_path, new_path: _ } => {
407                            if let Some(d_ba) = inverse {
408                                prop_assert!(
409                                    matches!(d_ba.diff_type, DiffType::Renamed { .. }) || d_ba.diff_type == DiffType::Deleted,
410                                    "Renamed in A->B should be Renamed or Deleted in B->A, got {:?}", d_ba.diff_type
411                                );
412                            }
413                            let old_inv = diffs_ba.iter().find(|d| d.path == *old_path);
414                            if let Some(d_old) = old_inv {
415                                prop_assert!(
416                                    d_old.diff_type == DiffType::Added || matches!(d_old.diff_type, DiffType::Renamed { .. }),
417                                    "old_path of rename should be Added or Renamed in B->A, got {:?}", d_old.diff_type
418                                );
419                            }
420                        }
421                    }
422                }
423            }
424        }
425    }
426}