swh_graph_stdlib/
diff.rs

1// Copyright (C) 2025  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Recursively lists differences between two directory nodes.
7
8use anyhow::{bail, ensure, Result};
9
10use swh_graph::{
11    graph::{NodeId, SwhGraphWithProperties, SwhLabeledForwardGraph},
12    labels::{EdgeLabel, LabelNameId, Permission},
13    properties, NodeType,
14};
15
16/// An operation made between two revisions on a single directory entry
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum TreeDiffOperation<Path: AsRef<[LabelNameId]> = Box<[LabelNameId]>> {
19    /// Adds a single file or directory
20    Added {
21        /// Full path (from repo root) to the added file
22        path: Path,
23        new_file: NodeId,
24        new_perm: Option<Permission>,
25    },
26    /// Removes a single file or directory
27    Deleted {
28        /// Full path (from repo root) to the removed file
29        path: Path,
30        old_file: NodeId,
31        old_perm: Option<Permission>,
32    },
33    /// Edits a single file (it can't be a directory)
34    Modified {
35        /// Full path (from the repo root) to the removed file
36        path: Path,
37        new_file: NodeId,
38        old_file: NodeId,
39        new_perm: Option<Permission>,
40        old_perm: Option<Permission>,
41    },
42    /// Moves a file from one place to another.
43    /// The file may have been edited in the process,
44    /// in which case the two supplied node ids will differ.
45    Moved {
46        old_path: Path,
47        new_path: Path,
48        old_file: NodeId,
49        new_file: NodeId,
50        old_perm: Option<Permission>,
51        new_perm: Option<Permission>,
52    },
53}
54
55impl<Path: AsRef<[LabelNameId]>> TreeDiffOperation<Path> {
56    pub fn shallow_copy(&self) -> TreeDiffOperation<&[LabelNameId]> {
57        use TreeDiffOperation::*;
58
59        match self {
60            Added {
61                path,
62                new_file,
63                new_perm,
64            } => Added {
65                path: path.as_ref(),
66                new_file: *new_file,
67                new_perm: *new_perm,
68            },
69            Deleted {
70                path,
71                old_file,
72                old_perm,
73            } => Deleted {
74                path: path.as_ref(),
75                old_file: *old_file,
76                old_perm: *old_perm,
77            },
78            Modified {
79                path,
80                new_file,
81                old_file,
82                new_perm,
83                old_perm,
84            } => Modified {
85                path: path.as_ref(),
86                new_file: *new_file,
87                old_file: *old_file,
88                new_perm: *new_perm,
89                old_perm: *old_perm,
90            },
91            Moved {
92                old_path,
93                new_path,
94                new_file,
95                old_file,
96                new_perm,
97                old_perm,
98            } => Moved {
99                old_path: old_path.as_ref(),
100                new_path: new_path.as_ref(),
101                new_file: *new_file,
102                old_file: *old_file,
103                new_perm: *new_perm,
104                old_perm: *old_perm,
105            },
106        }
107    }
108}
109
110/// A diff between two trees, treating files as atomic units
111#[derive(Debug, Clone, PartialEq, Eq)]
112pub struct TreeDiff {
113    operations: Vec<TreeDiffOperation>,
114}
115
116impl TreeDiff {
117    pub fn operations(&self) -> impl Iterator<Item = TreeDiffOperation<&[LabelNameId]>> + use<'_> {
118        self.operations.iter().map(TreeDiffOperation::shallow_copy)
119    }
120}
121
122/// Compute a tree diff between the old and new trees rooted in
123/// the nodes supplied, which are required to be directory nodes.
124///
125/// The limit parameter can optionally limit the number of diff
126/// elements to return (to avoid returning very big diffs from
127/// git bombs). If this limit is reached, an error is returned.
128///
129/// WARNING: rename detection is currently not supported. The `Moved` variant is never returned.
130pub fn tree_diff_dirs<G>(
131    graph: &G,
132    old_tree: NodeId,
133    new_tree: NodeId,
134    limit: Option<usize>,
135) -> Result<TreeDiff>
136where
137    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
138{
139    let mut operations = Vec::new();
140    let mut path_prefix = Vec::new();
141    let mut stack: Vec<StackItem> = Vec::new();
142    stack.push(StackItem::Visit {
143        old: old_tree,
144        new: new_tree,
145        path: None,
146    });
147    while let Some(stack_item) = stack.pop() {
148        match stack_item {
149            StackItem::Leave => {
150                path_prefix.pop();
151            }
152            StackItem::Visit { path, old, new } => {
153                if let Some(path) = path {
154                    path_prefix.push(path);
155                }
156                tree_diff_internal(
157                    graph,
158                    old,
159                    new,
160                    &limit,
161                    &mut operations,
162                    &mut path_prefix,
163                    &mut stack,
164                )?;
165            }
166        }
167    }
168    Ok(TreeDiff { operations })
169}
170
171/// Internal representation of our stack to recurse into directories,
172/// to avoid overflowing Rust's call stack.
173enum StackItem {
174    /// marker to update the path to the directory currently being visited
175    Leave,
176    // task to visit a pair of nodes
177    Visit {
178        path: Option<LabelNameId>,
179        old: NodeId,
180        new: NodeId,
181    },
182}
183
184fn tree_diff_internal<G>(
185    graph: &G,
186    old_tree: NodeId,
187    new_tree: NodeId,
188    limit: &Option<usize>,
189    result: &mut Vec<TreeDiffOperation>,
190    path_prefix: &mut Vec<LabelNameId>,
191    stack: &mut Vec<StackItem>,
192) -> Result<()>
193where
194    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
195{
196    if old_tree == new_tree {
197        // nothing to diff!
198        return Ok(());
199    }
200
201    let props = graph.properties();
202
203    // Iterate jointly on the listing of both directories.
204    // The listings are known to be sorted on their label name ids.
205    let mut old_dir_iter = list_dir(graph, old_tree)?.into_iter().peekable();
206    let mut new_dir_iter = list_dir(graph, new_tree)?.into_iter().peekable();
207    loop {
208        if limit.is_some_and(|limit| result.len() >= limit) {
209            // TODO convert to if let-chains when they become available to avoid the `unwrap`
210            bail!("Diff has more than {} changes", limit.unwrap())
211        }
212        // concatenates file names to obtain full relative path from the repo's root
213        let full_path = |file_name: &LabelNameId| {
214            [path_prefix.as_slice(), &[*file_name]]
215                .concat()
216                .into_boxed_slice()
217        };
218        match (old_dir_iter.peek(), new_dir_iter.peek()) {
219            (None, None) => break,
220            (None, Some(new)) => {
221                add_directory_recursively(graph, new, limit, result, path_prefix)?;
222                new_dir_iter.next();
223            }
224            (Some(old), None) => {
225                delete_directory_recursively(graph, old, limit, result, path_prefix)?;
226                old_dir_iter.next();
227            }
228            (Some(old), Some(new)) => {
229                match old.path.0.cmp(&new.path.0) {
230                    std::cmp::Ordering::Less => {
231                        delete_directory_recursively(graph, old, limit, result, path_prefix)?;
232                        old_dir_iter.next();
233                    }
234                    std::cmp::Ordering::Greater => {
235                        add_directory_recursively(graph, new, limit, result, path_prefix)?;
236                        new_dir_iter.next();
237                    }
238                    std::cmp::Ordering::Equal => {
239                        // If the two ids are equal, we know the contents are recursively equal,
240                        // in which case there is nothing to compare. Otherwise:
241                        if old.id != new.id || old.perm != new.perm {
242                            let old_type = props.node_type(old.id);
243                            let new_type = props.node_type(new.id);
244                            let content_types = [NodeType::Content, NodeType::Revision];
245
246                            if content_types.contains(&old_type)
247                                && content_types.contains(&new_type)
248                            {
249                                // Both entries point to file-like things, so we register the difference as an edit on them
250                                result.push(TreeDiffOperation::Modified {
251                                    path: full_path(&new.path),
252                                    new_file: new.id,
253                                    old_file: old.id,
254                                    new_perm: new.perm,
255                                    old_perm: old.perm,
256                                });
257                            } else if content_types.contains(&old_type)
258                                && new_type == NodeType::Directory
259                            {
260                                // A file got changed into a directory.
261                                result.push(TreeDiffOperation::Deleted {
262                                    path: full_path(&old.path),
263                                    old_file: old.id,
264                                    old_perm: old.perm,
265                                });
266                                add_directory_recursively(graph, new, limit, result, path_prefix)?;
267                            } else if old_type == NodeType::Directory
268                                && content_types.contains(&new_type)
269                            {
270                                // A directory changed into a file
271                                delete_directory_recursively(
272                                    graph,
273                                    old,
274                                    limit,
275                                    result,
276                                    path_prefix,
277                                )?;
278                                result.push(TreeDiffOperation::Added {
279                                    path: full_path(&new.path),
280                                    new_file: new.id,
281                                    new_perm: new.perm,
282                                });
283                            } else if old_type == NodeType::Directory
284                                && new_type == NodeType::Directory
285                            {
286                                // Two directories with different contents: let's remember
287                                // to recurse into this directory later, by pushing the task
288                                // to visit it on our stack.
289                                // We also add a `Leave` marker to update the current path once
290                                // we're done visiting that subdirectory. In order for it to be
291                                // processed after the visit, we add it on the stack before the
292                                // visit (since it's Last-In-First-Out)
293                                stack.push(StackItem::Leave);
294                                stack.push(StackItem::Visit {
295                                    path: Some(new.path),
296                                    old: old.id,
297                                    new: new.id,
298                                });
299                            }
300                        };
301                        old_dir_iter.next();
302                        new_dir_iter.next();
303                    }
304                }
305            }
306        }
307    }
308    Ok(())
309}
310
311/// Recursively browse a part of a file system and mark all of its contents as added
312fn add_directory_recursively<G>(
313    graph: &G,
314    dir_entry: &DirEntry,
315    limit: &Option<usize>,
316    result: &mut Vec<TreeDiffOperation>,
317    path_prefix: &[LabelNameId],
318) -> Result<()>
319where
320    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
321{
322    add_or_delete_directory_recursively(graph, dir_entry, limit, result, path_prefix, true)
323}
324
325/// Recursively browse a part of a file system and mark all of its contents as deleted
326fn delete_directory_recursively<G>(
327    graph: &G,
328    dir_entry: &DirEntry,
329    limit: &Option<usize>,
330    result: &mut Vec<TreeDiffOperation>,
331    path_prefix: &[LabelNameId],
332) -> Result<()>
333where
334    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
335{
336    add_or_delete_directory_recursively(graph, dir_entry, limit, result, path_prefix, false)
337}
338
339enum TraverseStackItem {
340    Leave,
341    Visit {
342        node_id: NodeId,
343        path: LabelNameId,
344        perm: Option<Permission>,
345    },
346}
347
348/// Recursively browse a part of a file system and mark all of its contents as added
349/// or deleted (as indicated by the last parameter).
350fn add_or_delete_directory_recursively<G>(
351    graph: &G,
352    dir_entry: &DirEntry,
353    limit: &Option<usize>,
354    result: &mut Vec<TreeDiffOperation>,
355    path_prefix: &[LabelNameId],
356    add: bool,
357) -> Result<()>
358where
359    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
360{
361    let props = graph.properties();
362    let mut stack = Vec::new();
363    let mut path_prefix = path_prefix.to_vec();
364    stack.push(TraverseStackItem::Visit {
365        node_id: dir_entry.id,
366        path: dir_entry.path,
367        perm: dir_entry.perm,
368    });
369    while let Some(task) = stack.pop() {
370        if limit.is_some_and(|limit| result.len() >= limit) {
371            bail!("Diff has more than {} changes", limit.unwrap());
372        }
373        match task {
374            TraverseStackItem::Leave => {
375                path_prefix.pop();
376            }
377            TraverseStackItem::Visit {
378                node_id,
379                path,
380                perm,
381            } => {
382                path_prefix.push(path);
383                match props.node_type(node_id) {
384                    NodeType::Content | NodeType::Revision => {
385                        let diff_item = if add {
386                            TreeDiffOperation::Added {
387                                path: path_prefix.to_vec().into_boxed_slice(),
388                                new_file: node_id,
389                                new_perm: perm,
390                            }
391                        } else {
392                            TreeDiffOperation::Deleted {
393                                path: path_prefix.to_vec().into_boxed_slice(),
394                                old_file: node_id,
395                                old_perm: perm,
396                            }
397                        };
398                        result.push(diff_item);
399                    }
400                    NodeType::Directory => {
401                        let dir_listing = list_dir(graph, node_id)?;
402                        for entry in &dir_listing {
403                            stack.push(TraverseStackItem::Leave);
404                            stack.push(TraverseStackItem::Visit {
405                                node_id: entry.id,
406                                path: entry.path,
407                                perm: entry.perm,
408                            });
409                        }
410                    }
411                    x => {
412                        bail!("unexpected node type {x} when browsing a directory");
413                    }
414                }
415            }
416        }
417    }
418    Ok(())
419}
420
421/// Internal struct to represent an entry in the list of contents of a directory
422#[derive(Debug, Clone, Copy, PartialEq, Eq)]
423struct DirEntry {
424    path: LabelNameId,
425    perm: Option<Permission>,
426    id: NodeId,
427}
428
429/// Lists the contents of a directory, sorting the results by label name id.
430fn list_dir<G>(graph: &G, dir_node: NodeId) -> Result<Vec<DirEntry>>
431where
432    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
433{
434    let props = graph.properties();
435    let node_type = props.node_type(dir_node);
436    ensure!(
437        node_type == NodeType::Directory,
438        "Type of {dir_node} should be dir, but is {node_type} instead"
439    );
440    let mut listing = Vec::new();
441    for (succ, labels) in graph.labeled_successors(dir_node) {
442        let node_type = props.node_type(succ);
443        if node_type == NodeType::Content
444            || node_type == NodeType::Directory
445            || node_type == NodeType::Revision
446        {
447            for label in labels {
448                if let EdgeLabel::DirEntry(dentry) = label {
449                    let perm = dentry.permission();
450                    listing.push(DirEntry {
451                        path: dentry.label_name_id(),
452                        perm,
453                        id: succ,
454                    });
455                }
456            }
457        }
458    }
459    listing.sort_unstable_by_key(|dir_entry| dir_entry.path.0);
460    Ok(listing)
461}
462
463#[cfg(test)]
464mod tests {
465
466    use swh_graph::{
467        graph::*,
468        graph_builder::{BuiltGraph, GraphBuilder},
469        NodeType, SWHID,
470    };
471
472    use super::*;
473
474    /// Convenience type to define test cases.
475    /// The `u32` fields are used to generate SWHIDs
476    /// when converting the trees to graphs.
477    enum Tree {
478        File(u32),
479        Executable(u32),
480        Dir(u32, Vec<(&'static str, Tree)>),
481        Revision(u32),
482    }
483
484    #[rustfmt::skip] // to keep the tree representation more compact and readable
485    fn base_tree() -> Tree {
486        Tree::Dir(1, vec![
487            ("books", Tree::Dir(2, vec![
488                ("disparition.txt", Tree::File(3)),
489                ("revision_link", Tree::Revision(9)),
490                ("war_and_peace.doc", Tree::File(4)),
491            ])),
492            ("music", Tree::Dir(5, vec![
493                ("lemon_tree.mp3", Tree::File(6)),
494                ("on_reflection.wav", Tree::File(7)),
495            ])),
496            ("readme.txt", Tree::File(8)),
497        ])
498    }
499
500    fn parse_path(graph: &BuiltGraph, path: &str) -> Box<[LabelNameId]> {
501        path.split('/')
502            .map(|part| {
503                graph
504                    .properties()
505                    .label_name_id(part.as_bytes())
506                    .expect("label name does not exist in the graph")
507            })
508            .collect()
509    }
510
511    #[test]
512    fn test_list_dir() -> Result<()> {
513        let tree = base_tree();
514        let mut builder = GraphBuilder::default();
515        let root_id = build_graph_recursively(&tree, &mut builder)?.0;
516        let graph = builder.done()?;
517        let props = graph.properties();
518        let books_subdir_id =
519            props.node_id("swh:1:dir:0000000000000000000000000000000000000002")?;
520        let music_subdir_id =
521            props.node_id("swh:1:dir:0000000000000000000000000000000000000005")?;
522        let readme_id = props.node_id("swh:1:cnt:0000000000000000000000000000000000000008")?;
523        let lemon_tree_id = props.node_id("swh:1:cnt:0000000000000000000000000000000000000006")?;
524        let on_reflection_id =
525            props.node_id("swh:1:cnt:0000000000000000000000000000000000000007")?;
526
527        assert_eq!(
528            list_dir(&graph, root_id)?,
529            vec![
530                DirEntry {
531                    path: parse_path(&graph, "books")[0],
532                    perm: Some(Permission::Directory),
533                    id: books_subdir_id,
534                },
535                DirEntry {
536                    path: parse_path(&graph, "music")[0],
537                    perm: Some(Permission::Directory),
538                    id: music_subdir_id,
539                },
540                DirEntry {
541                    path: parse_path(&graph, "readme.txt")[0],
542                    perm: Some(Permission::Content),
543                    id: readme_id,
544                }
545            ]
546        );
547        assert_eq!(
548            list_dir(&graph, music_subdir_id)?,
549            vec![
550                DirEntry {
551                    path: parse_path(&graph, "lemon_tree.mp3")[0],
552                    perm: Some(Permission::Content),
553                    id: lemon_tree_id,
554                },
555                DirEntry {
556                    path: parse_path(&graph, "on_reflection.wav")[0],
557                    perm: Some(Permission::Content),
558                    id: on_reflection_id,
559                }
560            ]
561        );
562        Ok(())
563    }
564
565    #[test]
566    fn test_edit_file() -> Result<()> {
567        let base_tree = base_tree();
568
569        #[rustfmt::skip]
570        let tree_with_edit =
571            Tree::Dir(15, vec![
572                ("books", Tree::Dir(14, vec![
573                    ("disparition.txt", Tree::File(13)),
574                    ("revision_link", Tree::Revision(9)),
575                    ("war_and_peace.doc", Tree::File(4)),
576                ])),
577                ("music", Tree::Dir(10, vec![
578                    ("lemon_tree.mp3", Tree::File(6)),
579                    ("on_reflection.wav", Tree::File(7)),
580                ])),
581                ("readme.txt", Tree::File(8)),
582        ]);
583
584        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
585
586        assert_eq!(
587            diff.operations,
588            vec![TreeDiffOperation::Modified {
589                path: parse_path(&graph, "books/disparition.txt"),
590                old_file: graph
591                    .properties()
592                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
593                new_file: graph
594                    .properties()
595                    .node_id("swh:1:cnt:000000000000000000000000000000000000000d")?,
596                old_perm: Some(Permission::Content),
597                new_perm: Some(Permission::Content),
598            }]
599        );
600        Ok(())
601    }
602
603    #[test]
604    fn test_change_permissions() -> Result<()> {
605        let base_tree = base_tree();
606
607        #[rustfmt::skip]
608        let tree_with_edit =
609            Tree::Dir(15, vec![
610                ("books", Tree::Dir(14, vec![
611                    ("disparition.txt", Tree::Executable(3)),
612                    ("revision_link", Tree::Revision(9)),
613                    ("war_and_peace.doc", Tree::File(4)),
614                ])),
615                ("music", Tree::Dir(10, vec![
616                    ("lemon_tree.mp3", Tree::File(6)),
617                    ("on_reflection.wav", Tree::File(7)),
618                ])),
619                ("readme.txt", Tree::File(8)),
620        ]);
621
622        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
623
624        assert_eq!(
625            diff.operations,
626            vec![TreeDiffOperation::Modified {
627                path: parse_path(&graph, "books/disparition.txt"),
628                old_file: graph
629                    .properties()
630                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
631                new_file: graph
632                    .properties()
633                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
634                old_perm: Some(Permission::Content),
635                new_perm: Some(Permission::ExecutableContent),
636            }]
637        );
638        Ok(())
639    }
640
641    #[test]
642    fn test_edit_revision() -> Result<()> {
643        let base_tree = base_tree();
644
645        #[rustfmt::skip]
646        let tree_with_edit =
647            Tree::Dir(15, vec![
648                ("books", Tree::Dir(14, vec![
649                    ("disparition.txt", Tree::File(3)),
650                    ("revision_link", Tree::Revision(16)),
651                    ("war_and_peace.doc", Tree::File(4)),
652                ])),
653                ("music", Tree::Dir(10, vec![
654                    ("lemon_tree.mp3", Tree::File(6)),
655                    ("on_reflection.wav", Tree::File(7)),
656                ])),
657                ("readme.txt", Tree::File(8)),
658        ]);
659
660        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
661
662        assert_eq!(
663            diff.operations,
664            vec![TreeDiffOperation::Modified {
665                path: parse_path(&graph, "books/revision_link"),
666                old_file: graph
667                    .properties()
668                    .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
669                new_file: graph
670                    .properties()
671                    .node_id("swh:1:rev:0000000000000000000000000000000000000010")?,
672                old_perm: Some(Permission::Revision),
673                new_perm: Some(Permission::Revision),
674            }]
675        );
676        Ok(())
677    }
678
679    #[test]
680    fn test_additions_and_deletions() -> Result<()> {
681        let base_tree = base_tree();
682
683        #[rustfmt::skip]
684        let tree_with_addition =
685            Tree::Dir(11, vec![
686                ("books", Tree::Dir(2, vec![
687                    ("disparition.txt", Tree::File(3)),
688                    ("revision_link", Tree::Revision(9)),
689                    ("war_and_peace.doc", Tree::File(4)),
690                ])),
691                ("music", Tree::Dir(10, vec![
692                    ("lemon_tree.mp3", Tree::File(6)),
693                    ("gavotte_aven.wma", Tree::File(12)),
694                    ("on_reflection.wav", Tree::File(7)),
695                ])),
696                ("readme.txt", Tree::File(8)),
697        ]);
698
699        #[rustfmt::skip]
700        let tree_with_directory_deletion =
701            Tree::Dir(13, vec![
702                ("music", Tree::Dir(5, vec![
703                    ("lemon_tree.mp3", Tree::File(6)),
704                    ("on_reflection.wav", Tree::File(7)),
705                ])),
706                ("readme.txt", Tree::File(8)),
707        ]);
708
709        let (graph, diff) = diff_trees(&base_tree, &tree_with_addition, None)?;
710
711        assert_eq!(
712            diff.operations().collect::<Vec<_>>(),
713            vec![TreeDiffOperation::Added {
714                path: parse_path(&graph, "music/gavotte_aven.wma").as_ref(),
715                new_file: graph
716                    .properties()
717                    .node_id("swh:1:cnt:000000000000000000000000000000000000000c")?,
718                new_perm: Some(Permission::Content),
719            }]
720        );
721
722        let (graph, diff) = diff_trees(&base_tree, &tree_with_directory_deletion, None)?;
723
724        assert_eq!(
725            diff.operations().collect::<Vec<_>>(),
726            vec![
727                TreeDiffOperation::Deleted {
728                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
729                    old_file: graph
730                        .properties()
731                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
732                    old_perm: Some(Permission::Content),
733                },
734                TreeDiffOperation::Deleted {
735                    path: parse_path(&graph, "books/revision_link").as_ref(),
736                    old_file: graph
737                        .properties()
738                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
739                    old_perm: Some(Permission::Revision),
740                },
741                TreeDiffOperation::Deleted {
742                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
743                    old_file: graph
744                        .properties()
745                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
746                    old_perm: Some(Permission::Content),
747                },
748            ]
749        );
750
751        let (graph, diff) = diff_trees(&tree_with_addition, &tree_with_directory_deletion, None)?;
752
753        assert_eq!(
754            diff.operations().collect::<Vec<_>>(),
755            vec![
756                TreeDiffOperation::Deleted {
757                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
758                    old_file: graph
759                        .properties()
760                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
761                    old_perm: Some(Permission::Content),
762                },
763                TreeDiffOperation::Deleted {
764                    path: parse_path(&graph, "books/revision_link").as_ref(),
765                    old_file: graph
766                        .properties()
767                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
768                    old_perm: Some(Permission::Revision),
769                },
770                TreeDiffOperation::Deleted {
771                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
772                    old_file: graph
773                        .properties()
774                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
775                    old_perm: Some(Permission::Content),
776                },
777                TreeDiffOperation::Deleted {
778                    path: parse_path(&graph, "music/gavotte_aven.wma").as_ref(),
779                    old_file: graph
780                        .properties()
781                        .node_id("swh:1:cnt:000000000000000000000000000000000000000c")?,
782                    old_perm: Some(Permission::Content),
783                }
784            ]
785        );
786        Ok(())
787    }
788
789    #[test]
790    fn test_move_file() -> Result<()> {
791        let base_tree = base_tree();
792
793        #[rustfmt::skip]
794        let tree_with_edit =
795            Tree::Dir(17, vec![
796                ("books", Tree::Dir(16, vec![
797                    ("disparition.txt", Tree::File(3)),
798                    ("revision_link", Tree::Revision(9)),
799                    ("war_and_peace.doc", Tree::File(4)),
800                    ("readme.txt", Tree::File(8)),
801                ])),
802                ("music", Tree::Dir(10, vec![
803                    ("lemon_tree.mp3", Tree::File(6)),
804                    ("on_reflection.wav", Tree::File(7)),
805                ])),
806        ]);
807
808        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
809
810        // TODO: in the future we want to detect renames,
811        // instead of registering this as an addition and deletion
812        assert_eq!(
813            diff.operations().collect::<Vec<_>>(),
814            vec![
815                TreeDiffOperation::Deleted {
816                    path: parse_path(&graph, "readme.txt").as_ref(),
817                    old_file: graph
818                        .properties()
819                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
820                    old_perm: Some(Permission::Content),
821                },
822                TreeDiffOperation::Added {
823                    path: parse_path(&graph, "books/readme.txt").as_ref(),
824                    new_file: graph
825                        .properties()
826                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
827                    new_perm: Some(Permission::Content),
828                },
829            ]
830        );
831        Ok(())
832    }
833
834    #[test]
835    fn test_change_dir_to_file() -> Result<()> {
836        let base_tree = base_tree();
837
838        #[rustfmt::skip]
839        let modified_tree =
840            Tree::Dir(11, vec![
841                ("books", Tree::Dir(2, vec![
842                    ("disparition.txt", Tree::File(3)),
843                    ("revision_link", Tree::Revision(9)),
844                    ("war_and_peace.doc", Tree::File(4)),
845                ])),
846                ("music", Tree::Revision(10)),
847                ("readme.txt", Tree::File(8)),
848        ]);
849
850        let (graph, diff) = diff_trees(&base_tree, &modified_tree, None)?;
851
852        assert_eq!(
853            diff.operations().collect::<Vec<_>>(),
854            vec![
855                TreeDiffOperation::Deleted {
856                    path: parse_path(&graph, "music/on_reflection.wav").as_ref(),
857                    old_file: graph
858                        .properties()
859                        .node_id("swh:1:cnt:0000000000000000000000000000000000000007")?,
860                    old_perm: Some(Permission::Content),
861                },
862                TreeDiffOperation::Deleted {
863                    path: parse_path(&graph, "music/lemon_tree.mp3").as_ref(),
864                    old_file: graph
865                        .properties()
866                        .node_id("swh:1:cnt:0000000000000000000000000000000000000006")?,
867                    old_perm: Some(Permission::Content),
868                },
869                TreeDiffOperation::Added {
870                    path: parse_path(&graph, "music").as_ref(),
871                    new_file: graph
872                        .properties()
873                        .node_id("swh:1:rev:000000000000000000000000000000000000000a")?,
874                    new_perm: Some(Permission::Revision),
875                },
876            ]
877        );
878        Ok(())
879    }
880
881    #[test]
882    fn test_change_file_to_dir() -> Result<()> {
883        let base_tree = base_tree();
884
885        #[rustfmt::skip]
886        let tree_with_edit =
887            Tree::Dir(11, vec![
888                ("books", Tree::Dir(12, vec![
889                    ("disparition.txt", Tree::File(3)),
890                    ("revision_link", Tree::Dir(10, vec![
891                        ("readme.txt", Tree::File(8)),
892                    ])),
893                    ("war_and_peace.doc", Tree::File(4)),
894                ])),
895                ("music", Tree::Dir(5, vec![
896                    ("lemon_tree.mp3", Tree::File(6)),
897                    ("on_reflection.wav", Tree::File(7)),
898                ])),
899                ("readme.txt", Tree::File(8)),
900        ]);
901
902        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
903
904        assert_eq!(
905            diff.operations().collect::<Vec<_>>(),
906            vec![
907                TreeDiffOperation::Deleted {
908                    path: parse_path(&graph, "books/revision_link").as_ref(),
909                    old_file: graph
910                        .properties()
911                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
912                    old_perm: Some(Permission::Revision),
913                },
914                TreeDiffOperation::Added {
915                    path: parse_path(&graph, "books/revision_link/readme.txt").as_ref(),
916                    new_file: graph
917                        .properties()
918                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
919                    new_perm: Some(Permission::Content),
920                }
921            ]
922        );
923        Ok(())
924    }
925
926    #[test]
927    fn test_limit() -> Result<()> {
928        let base_tree = base_tree();
929
930        #[rustfmt::skip]
931        let tree_with_edit =
932            Tree::Dir(15, vec![
933                ("books", Tree::Dir(14, vec![
934                    ("disparition.txt", Tree::File(3)),
935                    ("revision_link", Tree::Revision(16)),
936                    ("war_and_peace.doc", Tree::File(4)),
937                ])),
938        ]);
939
940        assert!(
941            diff_trees(&base_tree, &tree_with_edit, Some(2)).is_err(),
942            "Expected the diff to error out because there are too many changes"
943        );
944        Ok(())
945    }
946
947    /// Helper function to call the [`diff_tree`] function more easily on graphs
948    /// generated from synthetic trees
949    fn diff_trees(
950        tree_1: &Tree,
951        tree_2: &Tree,
952        limit: Option<usize>,
953    ) -> Result<(BuiltGraph, TreeDiff)> {
954        let mut builder = GraphBuilder::default();
955        let old_root = build_graph_recursively(tree_1, &mut builder)?.0;
956        let new_root = build_graph_recursively(tree_2, &mut builder)?.0;
957        let graph = builder.done()?;
958        let diff = tree_diff_dirs(&graph, old_root, new_root, limit)?;
959        Ok((graph, diff))
960    }
961
962    fn make_node(id: &u32, node_type: NodeType, builder: &mut GraphBuilder) -> Result<NodeId> {
963        let mut hash = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
964        hash[16..].copy_from_slice(&id.to_be_bytes());
965        let swhid = SWHID {
966            namespace_version: 1,
967            node_type,
968            hash,
969        };
970        if let Some(node_id) = builder.node_id(swhid) {
971            Ok(node_id)
972        } else {
973            Ok(builder.node(swhid)?.done())
974        }
975    }
976
977    fn build_graph_recursively(
978        tree: &Tree,
979        builder: &mut GraphBuilder,
980    ) -> Result<(NodeId, Permission)> {
981        match tree {
982            Tree::File(node_id) => Ok((
983                make_node(node_id, NodeType::Content, builder)?,
984                Permission::Content,
985            )),
986            Tree::Executable(node_id) => Ok((
987                make_node(node_id, NodeType::Content, builder)?,
988                Permission::ExecutableContent,
989            )),
990            Tree::Dir(node_id, items) => {
991                let dir = make_node(node_id, NodeType::Directory, builder)?;
992                for (name, child) in items {
993                    let (child_node, permission) = build_graph_recursively(child, builder)?;
994                    builder.dir_arc(dir, child_node, permission, name.as_bytes());
995                }
996                Ok((dir, Permission::Directory))
997            }
998            Tree::Revision(node_id) => Ok((
999                make_node(node_id, NodeType::Revision, builder)?,
1000                Permission::Revision,
1001            )),
1002        }
1003    }
1004}