Skip to main content

swh_graph_stdlib/
diff.rs

1// Copyright (C) 2025-2026  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Recursively lists differences between two directory nodes.
7
8use anyhow::{Result, bail, ensure};
9
10use swh_graph::{
11    NodeType,
12    graph::{NodeId, SwhGraphWithProperties, SwhLabeledForwardGraph},
13    labels::{EdgeLabel, LabelNameId, Permission},
14    properties,
15};
16
17/// An operation made between two revisions on a single directory entry
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum TreeDiffOperation<Path: AsRef<[LabelNameId]> = Box<[LabelNameId]>> {
20    /// Adds a single file or directory
21    Added {
22        /// Full path (from repo root) to the added file
23        path: Path,
24        new_file: NodeId,
25        new_perm: Option<Permission>,
26    },
27    /// Removes a single file or directory
28    Deleted {
29        /// Full path (from repo root) to the removed file
30        path: Path,
31        old_file: NodeId,
32        old_perm: Option<Permission>,
33    },
34    /// Edits a single file (it can't be a directory)
35    Modified {
36        /// Full path (from the repo root) to the removed file
37        path: Path,
38        new_file: NodeId,
39        old_file: NodeId,
40        new_perm: Option<Permission>,
41        old_perm: Option<Permission>,
42    },
43    /// Moves a file from one place to another.
44    /// The file may have been edited in the process,
45    /// in which case the two supplied node ids will differ.
46    Moved {
47        old_path: Path,
48        new_path: Path,
49        old_file: NodeId,
50        new_file: NodeId,
51        old_perm: Option<Permission>,
52        new_perm: Option<Permission>,
53    },
54}
55
56impl<Path: AsRef<[LabelNameId]>> TreeDiffOperation<Path> {
57    pub fn shallow_copy(&self) -> TreeDiffOperation<&[LabelNameId]> {
58        use TreeDiffOperation::*;
59
60        match self {
61            Added {
62                path,
63                new_file,
64                new_perm,
65            } => Added {
66                path: path.as_ref(),
67                new_file: *new_file,
68                new_perm: *new_perm,
69            },
70            Deleted {
71                path,
72                old_file,
73                old_perm,
74            } => Deleted {
75                path: path.as_ref(),
76                old_file: *old_file,
77                old_perm: *old_perm,
78            },
79            Modified {
80                path,
81                new_file,
82                old_file,
83                new_perm,
84                old_perm,
85            } => Modified {
86                path: path.as_ref(),
87                new_file: *new_file,
88                old_file: *old_file,
89                new_perm: *new_perm,
90                old_perm: *old_perm,
91            },
92            Moved {
93                old_path,
94                new_path,
95                new_file,
96                old_file,
97                new_perm,
98                old_perm,
99            } => Moved {
100                old_path: old_path.as_ref(),
101                new_path: new_path.as_ref(),
102                new_file: *new_file,
103                old_file: *old_file,
104                new_perm: *new_perm,
105                old_perm: *old_perm,
106            },
107        }
108    }
109}
110
111/// A diff between two trees, treating files as atomic units
112#[derive(Debug, Clone, PartialEq, Eq)]
113pub struct TreeDiff {
114    operations: Vec<TreeDiffOperation>,
115}
116
117impl TreeDiff {
118    pub fn operations(&self) -> impl Iterator<Item = TreeDiffOperation<&[LabelNameId]>> + use<'_> {
119        self.operations.iter().map(TreeDiffOperation::shallow_copy)
120    }
121}
122
123/// Compute a tree diff between the old and new trees rooted in
124/// the nodes supplied, which are required to be directory nodes.
125///
126/// The limit parameter can optionally limit the number of diff
127/// elements to return (to avoid returning very big diffs from
128/// git bombs). If this limit is reached, an error is returned.
129///
130/// If no `old_tree` is supplied, then all the files present in
131/// `new_tree` are treated as being added.
132///
133/// WARNING: rename detection is currently not supported. The `Moved` variant is never returned.
134pub fn tree_diff_dirs<G>(
135    graph: &G,
136    old_tree: Option<NodeId>,
137    new_tree: NodeId,
138    limit: Option<usize>,
139) -> Result<TreeDiff>
140where
141    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
142{
143    let mut operations = Vec::new();
144    let Some(old_tree) = old_tree else {
145        for dir_entry in list_dir(graph, new_tree)? {
146            add_directory_recursively(graph, &dir_entry, &limit, &mut operations, &[])?;
147        }
148        return Ok(TreeDiff { operations });
149    };
150    let mut path_prefix = Vec::new();
151    let mut stack: Vec<StackItem> = Vec::new();
152    stack.push(StackItem::Visit {
153        old: old_tree,
154        new: new_tree,
155        path: None,
156    });
157    while let Some(stack_item) = stack.pop() {
158        match stack_item {
159            StackItem::Leave => {
160                path_prefix.pop();
161            }
162            StackItem::Visit { path, old, new } => {
163                if let Some(path) = path {
164                    path_prefix.push(path);
165                }
166                tree_diff_internal(
167                    graph,
168                    old,
169                    new,
170                    &limit,
171                    &mut operations,
172                    &mut path_prefix,
173                    &mut stack,
174                )?;
175            }
176        }
177    }
178    Ok(TreeDiff { operations })
179}
180
181/// Internal representation of our stack to recurse into directories,
182/// to avoid overflowing Rust's call stack.
183enum StackItem {
184    /// marker to update the path to the directory currently being visited
185    Leave,
186    // task to visit a pair of nodes
187    Visit {
188        path: Option<LabelNameId>,
189        old: NodeId,
190        new: NodeId,
191    },
192}
193
194fn tree_diff_internal<G>(
195    graph: &G,
196    old_tree: NodeId,
197    new_tree: NodeId,
198    limit: &Option<usize>,
199    result: &mut Vec<TreeDiffOperation>,
200    path_prefix: &mut Vec<LabelNameId>,
201    stack: &mut Vec<StackItem>,
202) -> Result<()>
203where
204    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
205{
206    if old_tree == new_tree {
207        // nothing to diff!
208        return Ok(());
209    }
210
211    let props = graph.properties();
212
213    // Iterate jointly on the listing of both directories.
214    // The listings are known to be sorted on their label name ids.
215    let mut old_dir_iter = list_dir(graph, old_tree)?.into_iter().peekable();
216    let mut new_dir_iter = list_dir(graph, new_tree)?.into_iter().peekable();
217    loop {
218        if limit.is_some_and(|limit| result.len() >= limit) {
219            // TODO convert to if let-chains when they become available to avoid the `unwrap`
220            bail!("Diff has more than {} changes", limit.unwrap())
221        }
222        // concatenates file names to obtain full relative path from the repo's root
223        let full_path = |file_name: &LabelNameId| {
224            [path_prefix.as_slice(), &[*file_name]]
225                .concat()
226                .into_boxed_slice()
227        };
228        match (old_dir_iter.peek(), new_dir_iter.peek()) {
229            (None, None) => break,
230            (None, Some(new)) => {
231                add_directory_recursively(graph, new, limit, result, path_prefix)?;
232                new_dir_iter.next();
233            }
234            (Some(old), None) => {
235                delete_directory_recursively(graph, old, limit, result, path_prefix)?;
236                old_dir_iter.next();
237            }
238            (Some(old), Some(new)) => {
239                match old.path.0.cmp(&new.path.0) {
240                    std::cmp::Ordering::Less => {
241                        delete_directory_recursively(graph, old, limit, result, path_prefix)?;
242                        old_dir_iter.next();
243                    }
244                    std::cmp::Ordering::Greater => {
245                        add_directory_recursively(graph, new, limit, result, path_prefix)?;
246                        new_dir_iter.next();
247                    }
248                    std::cmp::Ordering::Equal => {
249                        // If the two ids are equal, we know the contents are recursively equal,
250                        // in which case there is nothing to compare. Otherwise:
251                        if old.id != new.id || old.perm != new.perm {
252                            let old_type = props.node_type(old.id);
253                            let new_type = props.node_type(new.id);
254                            let content_types = [NodeType::Content, NodeType::Revision];
255
256                            if content_types.contains(&old_type)
257                                && content_types.contains(&new_type)
258                            {
259                                // Both entries point to file-like things, so we register the difference as an edit on them
260                                result.push(TreeDiffOperation::Modified {
261                                    path: full_path(&new.path),
262                                    new_file: new.id,
263                                    old_file: old.id,
264                                    new_perm: new.perm,
265                                    old_perm: old.perm,
266                                });
267                            } else if content_types.contains(&old_type)
268                                && new_type == NodeType::Directory
269                            {
270                                // A file got changed into a directory.
271                                result.push(TreeDiffOperation::Deleted {
272                                    path: full_path(&old.path),
273                                    old_file: old.id,
274                                    old_perm: old.perm,
275                                });
276                                add_directory_recursively(graph, new, limit, result, path_prefix)?;
277                            } else if old_type == NodeType::Directory
278                                && content_types.contains(&new_type)
279                            {
280                                // A directory changed into a file
281                                delete_directory_recursively(
282                                    graph,
283                                    old,
284                                    limit,
285                                    result,
286                                    path_prefix,
287                                )?;
288                                result.push(TreeDiffOperation::Added {
289                                    path: full_path(&new.path),
290                                    new_file: new.id,
291                                    new_perm: new.perm,
292                                });
293                            } else if old_type == NodeType::Directory
294                                && new_type == NodeType::Directory
295                            {
296                                // Two directories with different contents: let's remember
297                                // to recurse into this directory later, by pushing the task
298                                // to visit it on our stack.
299                                // We also add a `Leave` marker to update the current path once
300                                // we're done visiting that subdirectory. In order for it to be
301                                // processed after the visit, we add it on the stack before the
302                                // visit (since it's Last-In-First-Out)
303                                stack.push(StackItem::Leave);
304                                stack.push(StackItem::Visit {
305                                    path: Some(new.path),
306                                    old: old.id,
307                                    new: new.id,
308                                });
309                            }
310                        };
311                        old_dir_iter.next();
312                        new_dir_iter.next();
313                    }
314                }
315            }
316        }
317    }
318    Ok(())
319}
320
321/// Recursively browse a part of a file system and mark all of its contents as added
322fn add_directory_recursively<G>(
323    graph: &G,
324    dir_entry: &DirEntry,
325    limit: &Option<usize>,
326    result: &mut Vec<TreeDiffOperation>,
327    path_prefix: &[LabelNameId],
328) -> Result<()>
329where
330    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
331{
332    add_or_delete_directory_recursively(graph, dir_entry, limit, result, path_prefix, true)
333}
334
335/// Recursively browse a part of a file system and mark all of its contents as deleted
336fn delete_directory_recursively<G>(
337    graph: &G,
338    dir_entry: &DirEntry,
339    limit: &Option<usize>,
340    result: &mut Vec<TreeDiffOperation>,
341    path_prefix: &[LabelNameId],
342) -> Result<()>
343where
344    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
345{
346    add_or_delete_directory_recursively(graph, dir_entry, limit, result, path_prefix, false)
347}
348
349enum TraverseStackItem {
350    Leave,
351    Visit {
352        node_id: NodeId,
353        path: LabelNameId,
354        perm: Option<Permission>,
355    },
356}
357
358/// Recursively browse a part of a file system and mark all of its contents as added
359/// or deleted (as indicated by the last parameter).
360fn add_or_delete_directory_recursively<G>(
361    graph: &G,
362    dir_entry: &DirEntry,
363    limit: &Option<usize>,
364    result: &mut Vec<TreeDiffOperation>,
365    path_prefix: &[LabelNameId],
366    add: bool,
367) -> Result<()>
368where
369    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
370{
371    let props = graph.properties();
372    let mut stack = Vec::new();
373    let mut path_prefix = path_prefix.to_vec();
374    stack.push(TraverseStackItem::Visit {
375        node_id: dir_entry.id,
376        path: dir_entry.path,
377        perm: dir_entry.perm,
378    });
379    while let Some(task) = stack.pop() {
380        if limit.is_some_and(|limit| result.len() >= limit) {
381            bail!("Diff has more than {} changes", limit.unwrap());
382        }
383        match task {
384            TraverseStackItem::Leave => {
385                path_prefix.pop();
386            }
387            TraverseStackItem::Visit {
388                node_id,
389                path,
390                perm,
391            } => {
392                path_prefix.push(path);
393                match props.node_type(node_id) {
394                    NodeType::Content | NodeType::Revision => {
395                        let diff_item = if add {
396                            TreeDiffOperation::Added {
397                                path: path_prefix.to_vec().into_boxed_slice(),
398                                new_file: node_id,
399                                new_perm: perm,
400                            }
401                        } else {
402                            TreeDiffOperation::Deleted {
403                                path: path_prefix.to_vec().into_boxed_slice(),
404                                old_file: node_id,
405                                old_perm: perm,
406                            }
407                        };
408                        result.push(diff_item);
409                    }
410                    NodeType::Directory => {
411                        let dir_listing = list_dir(graph, node_id)?;
412                        for entry in &dir_listing {
413                            stack.push(TraverseStackItem::Leave);
414                            stack.push(TraverseStackItem::Visit {
415                                node_id: entry.id,
416                                path: entry.path,
417                                perm: entry.perm,
418                            });
419                        }
420                    }
421                    x => {
422                        bail!("unexpected node type {x} when browsing a directory");
423                    }
424                }
425            }
426        }
427    }
428    Ok(())
429}
430
431/// Internal struct to represent an entry in the list of contents of a directory
432#[derive(Debug, Clone, Copy, PartialEq, Eq)]
433struct DirEntry {
434    path: LabelNameId,
435    perm: Option<Permission>,
436    id: NodeId,
437}
438
439/// Lists the contents of a directory, sorting the results by label name id.
440fn list_dir<G>(graph: &G, dir_node: NodeId) -> Result<Vec<DirEntry>>
441where
442    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
443{
444    let props = graph.properties();
445    let node_type = props.node_type(dir_node);
446    ensure!(
447        node_type == NodeType::Directory,
448        "Type of {dir_node} should be dir, but is {node_type} instead"
449    );
450    let mut listing = Vec::new();
451    for (succ, labels) in graph.labeled_successors(dir_node) {
452        let node_type = props.node_type(succ);
453        if node_type == NodeType::Content
454            || node_type == NodeType::Directory
455            || node_type == NodeType::Revision
456        {
457            for label in labels {
458                if let EdgeLabel::DirEntry(dentry) = label {
459                    let perm = dentry.permission();
460                    listing.push(DirEntry {
461                        path: dentry.label_name_id(),
462                        perm,
463                        id: succ,
464                    });
465                }
466            }
467        }
468    }
469    listing.sort_unstable_by_key(|dir_entry| dir_entry.path.0);
470    Ok(listing)
471}
472
473#[cfg(test)]
474mod tests {
475
476    use swh_graph::{
477        NodeType, SWHID,
478        graph::*,
479        graph_builder::{BuiltGraph, GraphBuilder},
480    };
481
482    use super::*;
483
484    /// Convenience type to define test cases.
485    /// The `u32` fields are used to generate SWHIDs
486    /// when converting the trees to graphs.
487    enum Tree {
488        File(u32),
489        Executable(u32),
490        Dir(u32, Vec<(&'static str, Tree)>),
491        Revision(u32),
492    }
493
494    #[rustfmt::skip] // to keep the tree representation more compact and readable
495    fn base_tree() -> Tree {
496        Tree::Dir(1, vec![
497            ("books", Tree::Dir(2, vec![
498                ("disparition.txt", Tree::File(3)),
499                ("revision_link", Tree::Revision(9)),
500                ("war_and_peace.doc", Tree::File(4)),
501            ])),
502            ("music", Tree::Dir(5, vec![
503                ("lemon_tree.mp3", Tree::File(6)),
504                ("on_reflection.wav", Tree::File(7)),
505            ])),
506            ("readme.txt", Tree::File(8)),
507        ])
508    }
509
510    fn parse_path(graph: &BuiltGraph, path: &str) -> Box<[LabelNameId]> {
511        path.split('/')
512            .map(|part| {
513                graph
514                    .properties()
515                    .label_name_id(part.as_bytes())
516                    .expect("label name does not exist in the graph")
517            })
518            .collect()
519    }
520
521    #[test]
522    fn test_list_dir() -> Result<()> {
523        let tree = base_tree();
524        let mut builder = GraphBuilder::default();
525        let root_id = build_graph_recursively(&tree, &mut builder)?.0;
526        let graph = builder.done()?;
527        let props = graph.properties();
528        let books_subdir_id =
529            props.node_id("swh:1:dir:0000000000000000000000000000000000000002")?;
530        let music_subdir_id =
531            props.node_id("swh:1:dir:0000000000000000000000000000000000000005")?;
532        let readme_id = props.node_id("swh:1:cnt:0000000000000000000000000000000000000008")?;
533        let lemon_tree_id = props.node_id("swh:1:cnt:0000000000000000000000000000000000000006")?;
534        let on_reflection_id =
535            props.node_id("swh:1:cnt:0000000000000000000000000000000000000007")?;
536
537        assert_eq!(
538            list_dir(&graph, root_id)?,
539            vec![
540                DirEntry {
541                    path: parse_path(&graph, "books")[0],
542                    perm: Some(Permission::Directory),
543                    id: books_subdir_id,
544                },
545                DirEntry {
546                    path: parse_path(&graph, "music")[0],
547                    perm: Some(Permission::Directory),
548                    id: music_subdir_id,
549                },
550                DirEntry {
551                    path: parse_path(&graph, "readme.txt")[0],
552                    perm: Some(Permission::Content),
553                    id: readme_id,
554                }
555            ]
556        );
557        assert_eq!(
558            list_dir(&graph, music_subdir_id)?,
559            vec![
560                DirEntry {
561                    path: parse_path(&graph, "lemon_tree.mp3")[0],
562                    perm: Some(Permission::Content),
563                    id: lemon_tree_id,
564                },
565                DirEntry {
566                    path: parse_path(&graph, "on_reflection.wav")[0],
567                    perm: Some(Permission::Content),
568                    id: on_reflection_id,
569                }
570            ]
571        );
572        Ok(())
573    }
574
575    #[test]
576    fn test_diff_root_dir() -> Result<()> {
577        let base_tree = base_tree();
578
579        let mut builder = GraphBuilder::default();
580        let new_root = build_graph_recursively(&base_tree, &mut builder)?.0;
581        let graph = builder.done()?;
582        let diff = tree_diff_dirs(&graph, None, new_root, None)?;
583
584        let mut actual = diff.operations().collect::<Vec<_>>();
585        actual.sort_by_key(|op| {
586            let TreeDiffOperation::Added { new_file, .. } = op else {
587                panic!("expected only 'added' diff elements when diffing a root revision");
588            };
589            *new_file
590        });
591
592        assert_eq!(
593            actual,
594            vec![
595                TreeDiffOperation::Added {
596                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
597                    new_file: graph
598                        .properties()
599                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
600                    new_perm: Some(Permission::Content),
601                },
602                TreeDiffOperation::Added {
603                    path: parse_path(&graph, "books/revision_link").as_ref(),
604                    new_file: graph
605                        .properties()
606                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
607                    new_perm: Some(Permission::Revision),
608                },
609                TreeDiffOperation::Added {
610                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
611                    new_file: graph
612                        .properties()
613                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
614                    new_perm: Some(Permission::Content),
615                },
616                TreeDiffOperation::Added {
617                    path: parse_path(&graph, "music/lemon_tree.mp3").as_ref(),
618                    new_file: graph
619                        .properties()
620                        .node_id("swh:1:cnt:0000000000000000000000000000000000000006")?,
621                    new_perm: Some(Permission::Content),
622                },
623                TreeDiffOperation::Added {
624                    path: parse_path(&graph, "music/on_reflection.wav").as_ref(),
625                    new_file: graph
626                        .properties()
627                        .node_id("swh:1:cnt:0000000000000000000000000000000000000007")?,
628                    new_perm: Some(Permission::Content),
629                },
630                TreeDiffOperation::Added {
631                    path: parse_path(&graph, "readme.txt").as_ref(),
632                    new_file: graph
633                        .properties()
634                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
635                    new_perm: Some(Permission::Content),
636                }
637            ],
638        );
639
640        Ok(())
641    }
642
643    #[test]
644    fn test_edit_file() -> Result<()> {
645        let base_tree = base_tree();
646
647        #[rustfmt::skip]
648        let tree_with_edit =
649            Tree::Dir(15, vec![
650                ("books", Tree::Dir(14, vec![
651                    ("disparition.txt", Tree::File(13)),
652                    ("revision_link", Tree::Revision(9)),
653                    ("war_and_peace.doc", Tree::File(4)),
654                ])),
655                ("music", Tree::Dir(10, vec![
656                    ("lemon_tree.mp3", Tree::File(6)),
657                    ("on_reflection.wav", Tree::File(7)),
658                ])),
659                ("readme.txt", Tree::File(8)),
660        ]);
661
662        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
663
664        assert_eq!(
665            diff.operations,
666            vec![TreeDiffOperation::Modified {
667                path: parse_path(&graph, "books/disparition.txt"),
668                old_file: graph
669                    .properties()
670                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
671                new_file: graph
672                    .properties()
673                    .node_id("swh:1:cnt:000000000000000000000000000000000000000d")?,
674                old_perm: Some(Permission::Content),
675                new_perm: Some(Permission::Content),
676            }]
677        );
678        Ok(())
679    }
680
681    #[test]
682    fn test_change_permissions() -> Result<()> {
683        let base_tree = base_tree();
684
685        #[rustfmt::skip]
686        let tree_with_edit =
687            Tree::Dir(15, vec![
688                ("books", Tree::Dir(14, vec![
689                    ("disparition.txt", Tree::Executable(3)),
690                    ("revision_link", Tree::Revision(9)),
691                    ("war_and_peace.doc", Tree::File(4)),
692                ])),
693                ("music", Tree::Dir(10, vec![
694                    ("lemon_tree.mp3", Tree::File(6)),
695                    ("on_reflection.wav", Tree::File(7)),
696                ])),
697                ("readme.txt", Tree::File(8)),
698        ]);
699
700        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
701
702        assert_eq!(
703            diff.operations,
704            vec![TreeDiffOperation::Modified {
705                path: parse_path(&graph, "books/disparition.txt"),
706                old_file: graph
707                    .properties()
708                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
709                new_file: graph
710                    .properties()
711                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
712                old_perm: Some(Permission::Content),
713                new_perm: Some(Permission::ExecutableContent),
714            }]
715        );
716        Ok(())
717    }
718
719    #[test]
720    fn test_edit_revision() -> Result<()> {
721        let base_tree = base_tree();
722
723        #[rustfmt::skip]
724        let tree_with_edit =
725            Tree::Dir(15, vec![
726                ("books", Tree::Dir(14, vec![
727                    ("disparition.txt", Tree::File(3)),
728                    ("revision_link", Tree::Revision(16)),
729                    ("war_and_peace.doc", Tree::File(4)),
730                ])),
731                ("music", Tree::Dir(10, vec![
732                    ("lemon_tree.mp3", Tree::File(6)),
733                    ("on_reflection.wav", Tree::File(7)),
734                ])),
735                ("readme.txt", Tree::File(8)),
736        ]);
737
738        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
739
740        assert_eq!(
741            diff.operations,
742            vec![TreeDiffOperation::Modified {
743                path: parse_path(&graph, "books/revision_link"),
744                old_file: graph
745                    .properties()
746                    .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
747                new_file: graph
748                    .properties()
749                    .node_id("swh:1:rev:0000000000000000000000000000000000000010")?,
750                old_perm: Some(Permission::Revision),
751                new_perm: Some(Permission::Revision),
752            }]
753        );
754        Ok(())
755    }
756
757    #[test]
758    fn test_additions_and_deletions() -> Result<()> {
759        let base_tree = base_tree();
760
761        #[rustfmt::skip]
762        let tree_with_addition =
763            Tree::Dir(11, vec![
764                ("books", Tree::Dir(2, vec![
765                    ("disparition.txt", Tree::File(3)),
766                    ("revision_link", Tree::Revision(9)),
767                    ("war_and_peace.doc", Tree::File(4)),
768                ])),
769                ("music", Tree::Dir(10, vec![
770                    ("lemon_tree.mp3", Tree::File(6)),
771                    ("gavotte_aven.wma", Tree::File(12)),
772                    ("on_reflection.wav", Tree::File(7)),
773                ])),
774                ("readme.txt", Tree::File(8)),
775        ]);
776
777        #[rustfmt::skip]
778        let tree_with_directory_deletion =
779            Tree::Dir(13, vec![
780                ("music", Tree::Dir(5, vec![
781                    ("lemon_tree.mp3", Tree::File(6)),
782                    ("on_reflection.wav", Tree::File(7)),
783                ])),
784                ("readme.txt", Tree::File(8)),
785        ]);
786
787        let (graph, diff) = diff_trees(&base_tree, &tree_with_addition, None)?;
788
789        assert_eq!(
790            diff.operations().collect::<Vec<_>>(),
791            vec![TreeDiffOperation::Added {
792                path: parse_path(&graph, "music/gavotte_aven.wma").as_ref(),
793                new_file: graph
794                    .properties()
795                    .node_id("swh:1:cnt:000000000000000000000000000000000000000c")?,
796                new_perm: Some(Permission::Content),
797            }]
798        );
799
800        let (graph, diff) = diff_trees(&base_tree, &tree_with_directory_deletion, None)?;
801
802        assert_eq!(
803            diff.operations().collect::<Vec<_>>(),
804            vec![
805                TreeDiffOperation::Deleted {
806                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
807                    old_file: graph
808                        .properties()
809                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
810                    old_perm: Some(Permission::Content),
811                },
812                TreeDiffOperation::Deleted {
813                    path: parse_path(&graph, "books/revision_link").as_ref(),
814                    old_file: graph
815                        .properties()
816                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
817                    old_perm: Some(Permission::Revision),
818                },
819                TreeDiffOperation::Deleted {
820                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
821                    old_file: graph
822                        .properties()
823                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
824                    old_perm: Some(Permission::Content),
825                },
826            ]
827        );
828
829        let (graph, diff) = diff_trees(&tree_with_addition, &tree_with_directory_deletion, None)?;
830
831        assert_eq!(
832            diff.operations().collect::<Vec<_>>(),
833            vec![
834                TreeDiffOperation::Deleted {
835                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
836                    old_file: graph
837                        .properties()
838                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
839                    old_perm: Some(Permission::Content),
840                },
841                TreeDiffOperation::Deleted {
842                    path: parse_path(&graph, "books/revision_link").as_ref(),
843                    old_file: graph
844                        .properties()
845                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
846                    old_perm: Some(Permission::Revision),
847                },
848                TreeDiffOperation::Deleted {
849                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
850                    old_file: graph
851                        .properties()
852                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
853                    old_perm: Some(Permission::Content),
854                },
855                TreeDiffOperation::Deleted {
856                    path: parse_path(&graph, "music/gavotte_aven.wma").as_ref(),
857                    old_file: graph
858                        .properties()
859                        .node_id("swh:1:cnt:000000000000000000000000000000000000000c")?,
860                    old_perm: Some(Permission::Content),
861                }
862            ]
863        );
864        Ok(())
865    }
866
867    #[test]
868    fn test_move_file() -> Result<()> {
869        let base_tree = base_tree();
870
871        #[rustfmt::skip]
872        let tree_with_edit =
873            Tree::Dir(17, vec![
874                ("books", Tree::Dir(16, vec![
875                    ("disparition.txt", Tree::File(3)),
876                    ("revision_link", Tree::Revision(9)),
877                    ("war_and_peace.doc", Tree::File(4)),
878                    ("readme.txt", Tree::File(8)),
879                ])),
880                ("music", Tree::Dir(10, vec![
881                    ("lemon_tree.mp3", Tree::File(6)),
882                    ("on_reflection.wav", Tree::File(7)),
883                ])),
884        ]);
885
886        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
887
888        // TODO: in the future we want to detect renames,
889        // instead of registering this as an addition and deletion
890        assert_eq!(
891            diff.operations().collect::<Vec<_>>(),
892            vec![
893                TreeDiffOperation::Deleted {
894                    path: parse_path(&graph, "readme.txt").as_ref(),
895                    old_file: graph
896                        .properties()
897                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
898                    old_perm: Some(Permission::Content),
899                },
900                TreeDiffOperation::Added {
901                    path: parse_path(&graph, "books/readme.txt").as_ref(),
902                    new_file: graph
903                        .properties()
904                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
905                    new_perm: Some(Permission::Content),
906                },
907            ]
908        );
909        Ok(())
910    }
911
912    #[test]
913    fn test_change_dir_to_file() -> Result<()> {
914        let base_tree = base_tree();
915
916        #[rustfmt::skip]
917        let modified_tree =
918            Tree::Dir(11, vec![
919                ("books", Tree::Dir(2, vec![
920                    ("disparition.txt", Tree::File(3)),
921                    ("revision_link", Tree::Revision(9)),
922                    ("war_and_peace.doc", Tree::File(4)),
923                ])),
924                ("music", Tree::Revision(10)),
925                ("readme.txt", Tree::File(8)),
926        ]);
927
928        let (graph, diff) = diff_trees(&base_tree, &modified_tree, None)?;
929
930        assert_eq!(
931            diff.operations().collect::<Vec<_>>(),
932            vec![
933                TreeDiffOperation::Deleted {
934                    path: parse_path(&graph, "music/on_reflection.wav").as_ref(),
935                    old_file: graph
936                        .properties()
937                        .node_id("swh:1:cnt:0000000000000000000000000000000000000007")?,
938                    old_perm: Some(Permission::Content),
939                },
940                TreeDiffOperation::Deleted {
941                    path: parse_path(&graph, "music/lemon_tree.mp3").as_ref(),
942                    old_file: graph
943                        .properties()
944                        .node_id("swh:1:cnt:0000000000000000000000000000000000000006")?,
945                    old_perm: Some(Permission::Content),
946                },
947                TreeDiffOperation::Added {
948                    path: parse_path(&graph, "music").as_ref(),
949                    new_file: graph
950                        .properties()
951                        .node_id("swh:1:rev:000000000000000000000000000000000000000a")?,
952                    new_perm: Some(Permission::Revision),
953                },
954            ]
955        );
956        Ok(())
957    }
958
959    #[test]
960    fn test_change_file_to_dir() -> Result<()> {
961        let base_tree = base_tree();
962
963        #[rustfmt::skip]
964        let tree_with_edit =
965            Tree::Dir(11, vec![
966                ("books", Tree::Dir(12, vec![
967                    ("disparition.txt", Tree::File(3)),
968                    ("revision_link", Tree::Dir(10, vec![
969                        ("readme.txt", Tree::File(8)),
970                    ])),
971                    ("war_and_peace.doc", Tree::File(4)),
972                ])),
973                ("music", Tree::Dir(5, vec![
974                    ("lemon_tree.mp3", Tree::File(6)),
975                    ("on_reflection.wav", Tree::File(7)),
976                ])),
977                ("readme.txt", Tree::File(8)),
978        ]);
979
980        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
981
982        assert_eq!(
983            diff.operations().collect::<Vec<_>>(),
984            vec![
985                TreeDiffOperation::Deleted {
986                    path: parse_path(&graph, "books/revision_link").as_ref(),
987                    old_file: graph
988                        .properties()
989                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
990                    old_perm: Some(Permission::Revision),
991                },
992                TreeDiffOperation::Added {
993                    path: parse_path(&graph, "books/revision_link/readme.txt").as_ref(),
994                    new_file: graph
995                        .properties()
996                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
997                    new_perm: Some(Permission::Content),
998                }
999            ]
1000        );
1001        Ok(())
1002    }
1003
1004    #[test]
1005    fn test_limit() -> Result<()> {
1006        let base_tree = base_tree();
1007
1008        #[rustfmt::skip]
1009        let tree_with_edit =
1010            Tree::Dir(15, vec![
1011                ("books", Tree::Dir(14, vec![
1012                    ("disparition.txt", Tree::File(3)),
1013                    ("revision_link", Tree::Revision(16)),
1014                    ("war_and_peace.doc", Tree::File(4)),
1015                ])),
1016        ]);
1017
1018        assert!(
1019            diff_trees(&base_tree, &tree_with_edit, Some(2)).is_err(),
1020            "Expected the diff to error out because there are too many changes"
1021        );
1022        Ok(())
1023    }
1024
1025    /// Helper function to call the [`diff_tree`] function more easily on graphs
1026    /// generated from synthetic trees
1027    fn diff_trees(
1028        tree_1: &Tree,
1029        tree_2: &Tree,
1030        limit: Option<usize>,
1031    ) -> Result<(BuiltGraph, TreeDiff)> {
1032        let mut builder = GraphBuilder::default();
1033        let old_root = build_graph_recursively(tree_1, &mut builder)?.0;
1034        let new_root = build_graph_recursively(tree_2, &mut builder)?.0;
1035        let graph = builder.done()?;
1036        let diff = tree_diff_dirs(&graph, Some(old_root), new_root, limit)?;
1037        Ok((graph, diff))
1038    }
1039
1040    fn make_node(id: &u32, node_type: NodeType, builder: &mut GraphBuilder) -> Result<NodeId> {
1041        let mut hash = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
1042        hash[16..].copy_from_slice(&id.to_be_bytes());
1043        let swhid = SWHID {
1044            namespace_version: 1,
1045            node_type,
1046            hash,
1047        };
1048        if let Some(node_id) = builder.node_id(swhid) {
1049            Ok(node_id)
1050        } else {
1051            Ok(builder.node(swhid)?.done())
1052        }
1053    }
1054
1055    fn build_graph_recursively(
1056        tree: &Tree,
1057        builder: &mut GraphBuilder,
1058    ) -> Result<(NodeId, Permission)> {
1059        match tree {
1060            Tree::File(node_id) => Ok((
1061                make_node(node_id, NodeType::Content, builder)?,
1062                Permission::Content,
1063            )),
1064            Tree::Executable(node_id) => Ok((
1065                make_node(node_id, NodeType::Content, builder)?,
1066                Permission::ExecutableContent,
1067            )),
1068            Tree::Dir(node_id, items) => {
1069                let dir = make_node(node_id, NodeType::Directory, builder)?;
1070                for (name, child) in items {
1071                    let (child_node, permission) = build_graph_recursively(child, builder)?;
1072                    builder.dir_arc(dir, child_node, permission, name.as_bytes());
1073                }
1074                Ok((dir, Permission::Directory))
1075            }
1076            Tree::Revision(node_id) => Ok((
1077                make_node(node_id, NodeType::Revision, builder)?,
1078                Permission::Revision,
1079            )),
1080        }
1081    }
1082}