swh_graph_stdlib/
diff.rs

1// Copyright (C) 2025  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Recursively lists differences between two directory nodes.
7
8use anyhow::{bail, ensure, Result};
9
10use swh_graph::{
11    graph::{NodeId, SwhGraphWithProperties, SwhLabeledForwardGraph},
12    labels::{EdgeLabel, LabelNameId, Permission},
13    properties, NodeType,
14};
15
16/// An operation made between two revisions on a single directory entry
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum TreeDiffOperation<Path: AsRef<[LabelNameId]> = Box<[LabelNameId]>> {
19    /// Adds a single file or directory
20    Added {
21        /// Full path (from repo root) to the added file
22        path: Path,
23        new_file: NodeId,
24        new_perm: Option<Permission>,
25    },
26    /// Removes a single file or directory
27    Deleted {
28        /// Full path (from repo root) to the removed file
29        path: Path,
30        old_file: NodeId,
31        old_perm: Option<Permission>,
32    },
33    /// Edits a single file (it can't be a directory)
34    Modified {
35        /// Full path (from the repo root) to the removed file
36        path: Path,
37        new_file: NodeId,
38        old_file: NodeId,
39        new_perm: Option<Permission>,
40        old_perm: Option<Permission>,
41    },
42    /// Moves a file from one place to another.
43    /// The file may have been edited in the process,
44    /// in which case the two supplied node ids will differ.
45    Moved {
46        old_path: Path,
47        new_path: Path,
48        old_file: NodeId,
49        new_file: NodeId,
50        old_perm: Option<Permission>,
51        new_perm: Option<Permission>,
52    },
53}
54
55impl<Path: AsRef<[LabelNameId]>> TreeDiffOperation<Path> {
56    pub fn shallow_copy(&self) -> TreeDiffOperation<&[LabelNameId]> {
57        use TreeDiffOperation::*;
58
59        match self {
60            Added {
61                path,
62                new_file,
63                new_perm,
64            } => Added {
65                path: path.as_ref(),
66                new_file: *new_file,
67                new_perm: *new_perm,
68            },
69            Deleted {
70                path,
71                old_file,
72                old_perm,
73            } => Deleted {
74                path: path.as_ref(),
75                old_file: *old_file,
76                old_perm: *old_perm,
77            },
78            Modified {
79                path,
80                new_file,
81                old_file,
82                new_perm,
83                old_perm,
84            } => Modified {
85                path: path.as_ref(),
86                new_file: *new_file,
87                old_file: *old_file,
88                new_perm: *new_perm,
89                old_perm: *old_perm,
90            },
91            Moved {
92                old_path,
93                new_path,
94                new_file,
95                old_file,
96                new_perm,
97                old_perm,
98            } => Moved {
99                old_path: old_path.as_ref(),
100                new_path: new_path.as_ref(),
101                new_file: *new_file,
102                old_file: *old_file,
103                new_perm: *new_perm,
104                old_perm: *old_perm,
105            },
106        }
107    }
108}
109
110/// A diff between two trees, treating files as atomic units
111#[derive(Debug, Clone, PartialEq, Eq)]
112pub struct TreeDiff {
113    operations: Vec<TreeDiffOperation>,
114}
115
116impl TreeDiff {
117    pub fn operations(&self) -> impl Iterator<Item = TreeDiffOperation<&[LabelNameId]>> + use<'_> {
118        self.operations.iter().map(TreeDiffOperation::shallow_copy)
119    }
120}
121
122/// Compute a tree diff between the old and new trees rooted in
123/// the nodes supplied, which are required to be directory nodes.
124///
125/// The limit parameter can optionally limit the number of diff
126/// elements to return (to avoid returning very big diffs from
127/// git bombs). If this limit is reached, an error is returned.
128///
129/// If no `old_tree` is supplied, then all the files present in
130/// `new_tree` are treated as being added.
131///
132/// WARNING: rename detection is currently not supported. The `Moved` variant is never returned.
133pub fn tree_diff_dirs<G>(
134    graph: &G,
135    old_tree: Option<NodeId>,
136    new_tree: NodeId,
137    limit: Option<usize>,
138) -> Result<TreeDiff>
139where
140    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
141{
142    let mut operations = Vec::new();
143    let Some(old_tree) = old_tree else {
144        for dir_entry in list_dir(graph, new_tree)? {
145            add_directory_recursively(graph, &dir_entry, &limit, &mut operations, &[])?;
146        }
147        return Ok(TreeDiff { operations });
148    };
149    let mut path_prefix = Vec::new();
150    let mut stack: Vec<StackItem> = Vec::new();
151    stack.push(StackItem::Visit {
152        old: old_tree,
153        new: new_tree,
154        path: None,
155    });
156    while let Some(stack_item) = stack.pop() {
157        match stack_item {
158            StackItem::Leave => {
159                path_prefix.pop();
160            }
161            StackItem::Visit { path, old, new } => {
162                if let Some(path) = path {
163                    path_prefix.push(path);
164                }
165                tree_diff_internal(
166                    graph,
167                    old,
168                    new,
169                    &limit,
170                    &mut operations,
171                    &mut path_prefix,
172                    &mut stack,
173                )?;
174            }
175        }
176    }
177    Ok(TreeDiff { operations })
178}
179
180/// Internal representation of our stack to recurse into directories,
181/// to avoid overflowing Rust's call stack.
182enum StackItem {
183    /// marker to update the path to the directory currently being visited
184    Leave,
185    // task to visit a pair of nodes
186    Visit {
187        path: Option<LabelNameId>,
188        old: NodeId,
189        new: NodeId,
190    },
191}
192
193fn tree_diff_internal<G>(
194    graph: &G,
195    old_tree: NodeId,
196    new_tree: NodeId,
197    limit: &Option<usize>,
198    result: &mut Vec<TreeDiffOperation>,
199    path_prefix: &mut Vec<LabelNameId>,
200    stack: &mut Vec<StackItem>,
201) -> Result<()>
202where
203    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
204{
205    if old_tree == new_tree {
206        // nothing to diff!
207        return Ok(());
208    }
209
210    let props = graph.properties();
211
212    // Iterate jointly on the listing of both directories.
213    // The listings are known to be sorted on their label name ids.
214    let mut old_dir_iter = list_dir(graph, old_tree)?.into_iter().peekable();
215    let mut new_dir_iter = list_dir(graph, new_tree)?.into_iter().peekable();
216    loop {
217        if limit.is_some_and(|limit| result.len() >= limit) {
218            // TODO convert to if let-chains when they become available to avoid the `unwrap`
219            bail!("Diff has more than {} changes", limit.unwrap())
220        }
221        // concatenates file names to obtain full relative path from the repo's root
222        let full_path = |file_name: &LabelNameId| {
223            [path_prefix.as_slice(), &[*file_name]]
224                .concat()
225                .into_boxed_slice()
226        };
227        match (old_dir_iter.peek(), new_dir_iter.peek()) {
228            (None, None) => break,
229            (None, Some(new)) => {
230                add_directory_recursively(graph, new, limit, result, path_prefix)?;
231                new_dir_iter.next();
232            }
233            (Some(old), None) => {
234                delete_directory_recursively(graph, old, limit, result, path_prefix)?;
235                old_dir_iter.next();
236            }
237            (Some(old), Some(new)) => {
238                match old.path.0.cmp(&new.path.0) {
239                    std::cmp::Ordering::Less => {
240                        delete_directory_recursively(graph, old, limit, result, path_prefix)?;
241                        old_dir_iter.next();
242                    }
243                    std::cmp::Ordering::Greater => {
244                        add_directory_recursively(graph, new, limit, result, path_prefix)?;
245                        new_dir_iter.next();
246                    }
247                    std::cmp::Ordering::Equal => {
248                        // If the two ids are equal, we know the contents are recursively equal,
249                        // in which case there is nothing to compare. Otherwise:
250                        if old.id != new.id || old.perm != new.perm {
251                            let old_type = props.node_type(old.id);
252                            let new_type = props.node_type(new.id);
253                            let content_types = [NodeType::Content, NodeType::Revision];
254
255                            if content_types.contains(&old_type)
256                                && content_types.contains(&new_type)
257                            {
258                                // Both entries point to file-like things, so we register the difference as an edit on them
259                                result.push(TreeDiffOperation::Modified {
260                                    path: full_path(&new.path),
261                                    new_file: new.id,
262                                    old_file: old.id,
263                                    new_perm: new.perm,
264                                    old_perm: old.perm,
265                                });
266                            } else if content_types.contains(&old_type)
267                                && new_type == NodeType::Directory
268                            {
269                                // A file got changed into a directory.
270                                result.push(TreeDiffOperation::Deleted {
271                                    path: full_path(&old.path),
272                                    old_file: old.id,
273                                    old_perm: old.perm,
274                                });
275                                add_directory_recursively(graph, new, limit, result, path_prefix)?;
276                            } else if old_type == NodeType::Directory
277                                && content_types.contains(&new_type)
278                            {
279                                // A directory changed into a file
280                                delete_directory_recursively(
281                                    graph,
282                                    old,
283                                    limit,
284                                    result,
285                                    path_prefix,
286                                )?;
287                                result.push(TreeDiffOperation::Added {
288                                    path: full_path(&new.path),
289                                    new_file: new.id,
290                                    new_perm: new.perm,
291                                });
292                            } else if old_type == NodeType::Directory
293                                && new_type == NodeType::Directory
294                            {
295                                // Two directories with different contents: let's remember
296                                // to recurse into this directory later, by pushing the task
297                                // to visit it on our stack.
298                                // We also add a `Leave` marker to update the current path once
299                                // we're done visiting that subdirectory. In order for it to be
300                                // processed after the visit, we add it on the stack before the
301                                // visit (since it's Last-In-First-Out)
302                                stack.push(StackItem::Leave);
303                                stack.push(StackItem::Visit {
304                                    path: Some(new.path),
305                                    old: old.id,
306                                    new: new.id,
307                                });
308                            }
309                        };
310                        old_dir_iter.next();
311                        new_dir_iter.next();
312                    }
313                }
314            }
315        }
316    }
317    Ok(())
318}
319
320/// Recursively browse a part of a file system and mark all of its contents as added
321fn add_directory_recursively<G>(
322    graph: &G,
323    dir_entry: &DirEntry,
324    limit: &Option<usize>,
325    result: &mut Vec<TreeDiffOperation>,
326    path_prefix: &[LabelNameId],
327) -> Result<()>
328where
329    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
330{
331    add_or_delete_directory_recursively(graph, dir_entry, limit, result, path_prefix, true)
332}
333
334/// Recursively browse a part of a file system and mark all of its contents as deleted
335fn delete_directory_recursively<G>(
336    graph: &G,
337    dir_entry: &DirEntry,
338    limit: &Option<usize>,
339    result: &mut Vec<TreeDiffOperation>,
340    path_prefix: &[LabelNameId],
341) -> Result<()>
342where
343    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
344{
345    add_or_delete_directory_recursively(graph, dir_entry, limit, result, path_prefix, false)
346}
347
348enum TraverseStackItem {
349    Leave,
350    Visit {
351        node_id: NodeId,
352        path: LabelNameId,
353        perm: Option<Permission>,
354    },
355}
356
357/// Recursively browse a part of a file system and mark all of its contents as added
358/// or deleted (as indicated by the last parameter).
359fn add_or_delete_directory_recursively<G>(
360    graph: &G,
361    dir_entry: &DirEntry,
362    limit: &Option<usize>,
363    result: &mut Vec<TreeDiffOperation>,
364    path_prefix: &[LabelNameId],
365    add: bool,
366) -> Result<()>
367where
368    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
369{
370    let props = graph.properties();
371    let mut stack = Vec::new();
372    let mut path_prefix = path_prefix.to_vec();
373    stack.push(TraverseStackItem::Visit {
374        node_id: dir_entry.id,
375        path: dir_entry.path,
376        perm: dir_entry.perm,
377    });
378    while let Some(task) = stack.pop() {
379        if limit.is_some_and(|limit| result.len() >= limit) {
380            bail!("Diff has more than {} changes", limit.unwrap());
381        }
382        match task {
383            TraverseStackItem::Leave => {
384                path_prefix.pop();
385            }
386            TraverseStackItem::Visit {
387                node_id,
388                path,
389                perm,
390            } => {
391                path_prefix.push(path);
392                match props.node_type(node_id) {
393                    NodeType::Content | NodeType::Revision => {
394                        let diff_item = if add {
395                            TreeDiffOperation::Added {
396                                path: path_prefix.to_vec().into_boxed_slice(),
397                                new_file: node_id,
398                                new_perm: perm,
399                            }
400                        } else {
401                            TreeDiffOperation::Deleted {
402                                path: path_prefix.to_vec().into_boxed_slice(),
403                                old_file: node_id,
404                                old_perm: perm,
405                            }
406                        };
407                        result.push(diff_item);
408                    }
409                    NodeType::Directory => {
410                        let dir_listing = list_dir(graph, node_id)?;
411                        for entry in &dir_listing {
412                            stack.push(TraverseStackItem::Leave);
413                            stack.push(TraverseStackItem::Visit {
414                                node_id: entry.id,
415                                path: entry.path,
416                                perm: entry.perm,
417                            });
418                        }
419                    }
420                    x => {
421                        bail!("unexpected node type {x} when browsing a directory");
422                    }
423                }
424            }
425        }
426    }
427    Ok(())
428}
429
430/// Internal struct to represent an entry in the list of contents of a directory
431#[derive(Debug, Clone, Copy, PartialEq, Eq)]
432struct DirEntry {
433    path: LabelNameId,
434    perm: Option<Permission>,
435    id: NodeId,
436}
437
438/// Lists the contents of a directory, sorting the results by label name id.
439fn list_dir<G>(graph: &G, dir_node: NodeId) -> Result<Vec<DirEntry>>
440where
441    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
442{
443    let props = graph.properties();
444    let node_type = props.node_type(dir_node);
445    ensure!(
446        node_type == NodeType::Directory,
447        "Type of {dir_node} should be dir, but is {node_type} instead"
448    );
449    let mut listing = Vec::new();
450    for (succ, labels) in graph.labeled_successors(dir_node) {
451        let node_type = props.node_type(succ);
452        if node_type == NodeType::Content
453            || node_type == NodeType::Directory
454            || node_type == NodeType::Revision
455        {
456            for label in labels {
457                if let EdgeLabel::DirEntry(dentry) = label {
458                    let perm = dentry.permission();
459                    listing.push(DirEntry {
460                        path: dentry.label_name_id(),
461                        perm,
462                        id: succ,
463                    });
464                }
465            }
466        }
467    }
468    listing.sort_unstable_by_key(|dir_entry| dir_entry.path.0);
469    Ok(listing)
470}
471
472#[cfg(test)]
473mod tests {
474
475    use swh_graph::{
476        graph::*,
477        graph_builder::{BuiltGraph, GraphBuilder},
478        NodeType, SWHID,
479    };
480
481    use super::*;
482
483    /// Convenience type to define test cases.
484    /// The `u32` fields are used to generate SWHIDs
485    /// when converting the trees to graphs.
486    enum Tree {
487        File(u32),
488        Executable(u32),
489        Dir(u32, Vec<(&'static str, Tree)>),
490        Revision(u32),
491    }
492
493    #[rustfmt::skip] // to keep the tree representation more compact and readable
494    fn base_tree() -> Tree {
495        Tree::Dir(1, vec![
496            ("books", Tree::Dir(2, vec![
497                ("disparition.txt", Tree::File(3)),
498                ("revision_link", Tree::Revision(9)),
499                ("war_and_peace.doc", Tree::File(4)),
500            ])),
501            ("music", Tree::Dir(5, vec![
502                ("lemon_tree.mp3", Tree::File(6)),
503                ("on_reflection.wav", Tree::File(7)),
504            ])),
505            ("readme.txt", Tree::File(8)),
506        ])
507    }
508
509    fn parse_path(graph: &BuiltGraph, path: &str) -> Box<[LabelNameId]> {
510        path.split('/')
511            .map(|part| {
512                graph
513                    .properties()
514                    .label_name_id(part.as_bytes())
515                    .expect("label name does not exist in the graph")
516            })
517            .collect()
518    }
519
520    #[test]
521    fn test_list_dir() -> Result<()> {
522        let tree = base_tree();
523        let mut builder = GraphBuilder::default();
524        let root_id = build_graph_recursively(&tree, &mut builder)?.0;
525        let graph = builder.done()?;
526        let props = graph.properties();
527        let books_subdir_id =
528            props.node_id("swh:1:dir:0000000000000000000000000000000000000002")?;
529        let music_subdir_id =
530            props.node_id("swh:1:dir:0000000000000000000000000000000000000005")?;
531        let readme_id = props.node_id("swh:1:cnt:0000000000000000000000000000000000000008")?;
532        let lemon_tree_id = props.node_id("swh:1:cnt:0000000000000000000000000000000000000006")?;
533        let on_reflection_id =
534            props.node_id("swh:1:cnt:0000000000000000000000000000000000000007")?;
535
536        assert_eq!(
537            list_dir(&graph, root_id)?,
538            vec![
539                DirEntry {
540                    path: parse_path(&graph, "books")[0],
541                    perm: Some(Permission::Directory),
542                    id: books_subdir_id,
543                },
544                DirEntry {
545                    path: parse_path(&graph, "music")[0],
546                    perm: Some(Permission::Directory),
547                    id: music_subdir_id,
548                },
549                DirEntry {
550                    path: parse_path(&graph, "readme.txt")[0],
551                    perm: Some(Permission::Content),
552                    id: readme_id,
553                }
554            ]
555        );
556        assert_eq!(
557            list_dir(&graph, music_subdir_id)?,
558            vec![
559                DirEntry {
560                    path: parse_path(&graph, "lemon_tree.mp3")[0],
561                    perm: Some(Permission::Content),
562                    id: lemon_tree_id,
563                },
564                DirEntry {
565                    path: parse_path(&graph, "on_reflection.wav")[0],
566                    perm: Some(Permission::Content),
567                    id: on_reflection_id,
568                }
569            ]
570        );
571        Ok(())
572    }
573
574    #[test]
575    fn test_diff_root_dir() -> Result<()> {
576        let base_tree = base_tree();
577
578        let mut builder = GraphBuilder::default();
579        let new_root = build_graph_recursively(&base_tree, &mut builder)?.0;
580        let graph = builder.done()?;
581        let diff = tree_diff_dirs(&graph, None, new_root, None)?;
582
583        let mut actual = diff.operations().collect::<Vec<_>>();
584        actual.sort_by_key(|op| {
585            let TreeDiffOperation::Added { new_file, .. } = op else {
586                panic!("expected only 'added' diff elements when diffing a root revision");
587            };
588            *new_file
589        });
590
591        assert_eq!(
592            actual,
593            vec![
594                TreeDiffOperation::Added {
595                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
596                    new_file: graph
597                        .properties()
598                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
599                    new_perm: Some(Permission::Content),
600                },
601                TreeDiffOperation::Added {
602                    path: parse_path(&graph, "books/revision_link").as_ref(),
603                    new_file: graph
604                        .properties()
605                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
606                    new_perm: Some(Permission::Revision),
607                },
608                TreeDiffOperation::Added {
609                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
610                    new_file: graph
611                        .properties()
612                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
613                    new_perm: Some(Permission::Content),
614                },
615                TreeDiffOperation::Added {
616                    path: parse_path(&graph, "music/lemon_tree.mp3").as_ref(),
617                    new_file: graph
618                        .properties()
619                        .node_id("swh:1:cnt:0000000000000000000000000000000000000006")?,
620                    new_perm: Some(Permission::Content),
621                },
622                TreeDiffOperation::Added {
623                    path: parse_path(&graph, "music/on_reflection.wav").as_ref(),
624                    new_file: graph
625                        .properties()
626                        .node_id("swh:1:cnt:0000000000000000000000000000000000000007")?,
627                    new_perm: Some(Permission::Content),
628                },
629                TreeDiffOperation::Added {
630                    path: parse_path(&graph, "readme.txt").as_ref(),
631                    new_file: graph
632                        .properties()
633                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
634                    new_perm: Some(Permission::Content),
635                }
636            ],
637        );
638
639        Ok(())
640    }
641
642    #[test]
643    fn test_edit_file() -> Result<()> {
644        let base_tree = base_tree();
645
646        #[rustfmt::skip]
647        let tree_with_edit =
648            Tree::Dir(15, vec![
649                ("books", Tree::Dir(14, vec![
650                    ("disparition.txt", Tree::File(13)),
651                    ("revision_link", Tree::Revision(9)),
652                    ("war_and_peace.doc", Tree::File(4)),
653                ])),
654                ("music", Tree::Dir(10, vec![
655                    ("lemon_tree.mp3", Tree::File(6)),
656                    ("on_reflection.wav", Tree::File(7)),
657                ])),
658                ("readme.txt", Tree::File(8)),
659        ]);
660
661        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
662
663        assert_eq!(
664            diff.operations,
665            vec![TreeDiffOperation::Modified {
666                path: parse_path(&graph, "books/disparition.txt"),
667                old_file: graph
668                    .properties()
669                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
670                new_file: graph
671                    .properties()
672                    .node_id("swh:1:cnt:000000000000000000000000000000000000000d")?,
673                old_perm: Some(Permission::Content),
674                new_perm: Some(Permission::Content),
675            }]
676        );
677        Ok(())
678    }
679
680    #[test]
681    fn test_change_permissions() -> Result<()> {
682        let base_tree = base_tree();
683
684        #[rustfmt::skip]
685        let tree_with_edit =
686            Tree::Dir(15, vec![
687                ("books", Tree::Dir(14, vec![
688                    ("disparition.txt", Tree::Executable(3)),
689                    ("revision_link", Tree::Revision(9)),
690                    ("war_and_peace.doc", Tree::File(4)),
691                ])),
692                ("music", Tree::Dir(10, vec![
693                    ("lemon_tree.mp3", Tree::File(6)),
694                    ("on_reflection.wav", Tree::File(7)),
695                ])),
696                ("readme.txt", Tree::File(8)),
697        ]);
698
699        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
700
701        assert_eq!(
702            diff.operations,
703            vec![TreeDiffOperation::Modified {
704                path: parse_path(&graph, "books/disparition.txt"),
705                old_file: graph
706                    .properties()
707                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
708                new_file: graph
709                    .properties()
710                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
711                old_perm: Some(Permission::Content),
712                new_perm: Some(Permission::ExecutableContent),
713            }]
714        );
715        Ok(())
716    }
717
718    #[test]
719    fn test_edit_revision() -> Result<()> {
720        let base_tree = base_tree();
721
722        #[rustfmt::skip]
723        let tree_with_edit =
724            Tree::Dir(15, vec![
725                ("books", Tree::Dir(14, vec![
726                    ("disparition.txt", Tree::File(3)),
727                    ("revision_link", Tree::Revision(16)),
728                    ("war_and_peace.doc", Tree::File(4)),
729                ])),
730                ("music", Tree::Dir(10, vec![
731                    ("lemon_tree.mp3", Tree::File(6)),
732                    ("on_reflection.wav", Tree::File(7)),
733                ])),
734                ("readme.txt", Tree::File(8)),
735        ]);
736
737        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
738
739        assert_eq!(
740            diff.operations,
741            vec![TreeDiffOperation::Modified {
742                path: parse_path(&graph, "books/revision_link"),
743                old_file: graph
744                    .properties()
745                    .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
746                new_file: graph
747                    .properties()
748                    .node_id("swh:1:rev:0000000000000000000000000000000000000010")?,
749                old_perm: Some(Permission::Revision),
750                new_perm: Some(Permission::Revision),
751            }]
752        );
753        Ok(())
754    }
755
756    #[test]
757    fn test_additions_and_deletions() -> Result<()> {
758        let base_tree = base_tree();
759
760        #[rustfmt::skip]
761        let tree_with_addition =
762            Tree::Dir(11, vec![
763                ("books", Tree::Dir(2, vec![
764                    ("disparition.txt", Tree::File(3)),
765                    ("revision_link", Tree::Revision(9)),
766                    ("war_and_peace.doc", Tree::File(4)),
767                ])),
768                ("music", Tree::Dir(10, vec![
769                    ("lemon_tree.mp3", Tree::File(6)),
770                    ("gavotte_aven.wma", Tree::File(12)),
771                    ("on_reflection.wav", Tree::File(7)),
772                ])),
773                ("readme.txt", Tree::File(8)),
774        ]);
775
776        #[rustfmt::skip]
777        let tree_with_directory_deletion =
778            Tree::Dir(13, vec![
779                ("music", Tree::Dir(5, vec![
780                    ("lemon_tree.mp3", Tree::File(6)),
781                    ("on_reflection.wav", Tree::File(7)),
782                ])),
783                ("readme.txt", Tree::File(8)),
784        ]);
785
786        let (graph, diff) = diff_trees(&base_tree, &tree_with_addition, None)?;
787
788        assert_eq!(
789            diff.operations().collect::<Vec<_>>(),
790            vec![TreeDiffOperation::Added {
791                path: parse_path(&graph, "music/gavotte_aven.wma").as_ref(),
792                new_file: graph
793                    .properties()
794                    .node_id("swh:1:cnt:000000000000000000000000000000000000000c")?,
795                new_perm: Some(Permission::Content),
796            }]
797        );
798
799        let (graph, diff) = diff_trees(&base_tree, &tree_with_directory_deletion, None)?;
800
801        assert_eq!(
802            diff.operations().collect::<Vec<_>>(),
803            vec![
804                TreeDiffOperation::Deleted {
805                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
806                    old_file: graph
807                        .properties()
808                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
809                    old_perm: Some(Permission::Content),
810                },
811                TreeDiffOperation::Deleted {
812                    path: parse_path(&graph, "books/revision_link").as_ref(),
813                    old_file: graph
814                        .properties()
815                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
816                    old_perm: Some(Permission::Revision),
817                },
818                TreeDiffOperation::Deleted {
819                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
820                    old_file: graph
821                        .properties()
822                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
823                    old_perm: Some(Permission::Content),
824                },
825            ]
826        );
827
828        let (graph, diff) = diff_trees(&tree_with_addition, &tree_with_directory_deletion, None)?;
829
830        assert_eq!(
831            diff.operations().collect::<Vec<_>>(),
832            vec![
833                TreeDiffOperation::Deleted {
834                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
835                    old_file: graph
836                        .properties()
837                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
838                    old_perm: Some(Permission::Content),
839                },
840                TreeDiffOperation::Deleted {
841                    path: parse_path(&graph, "books/revision_link").as_ref(),
842                    old_file: graph
843                        .properties()
844                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
845                    old_perm: Some(Permission::Revision),
846                },
847                TreeDiffOperation::Deleted {
848                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
849                    old_file: graph
850                        .properties()
851                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
852                    old_perm: Some(Permission::Content),
853                },
854                TreeDiffOperation::Deleted {
855                    path: parse_path(&graph, "music/gavotte_aven.wma").as_ref(),
856                    old_file: graph
857                        .properties()
858                        .node_id("swh:1:cnt:000000000000000000000000000000000000000c")?,
859                    old_perm: Some(Permission::Content),
860                }
861            ]
862        );
863        Ok(())
864    }
865
866    #[test]
867    fn test_move_file() -> Result<()> {
868        let base_tree = base_tree();
869
870        #[rustfmt::skip]
871        let tree_with_edit =
872            Tree::Dir(17, vec![
873                ("books", Tree::Dir(16, vec![
874                    ("disparition.txt", Tree::File(3)),
875                    ("revision_link", Tree::Revision(9)),
876                    ("war_and_peace.doc", Tree::File(4)),
877                    ("readme.txt", Tree::File(8)),
878                ])),
879                ("music", Tree::Dir(10, vec![
880                    ("lemon_tree.mp3", Tree::File(6)),
881                    ("on_reflection.wav", Tree::File(7)),
882                ])),
883        ]);
884
885        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
886
887        // TODO: in the future we want to detect renames,
888        // instead of registering this as an addition and deletion
889        assert_eq!(
890            diff.operations().collect::<Vec<_>>(),
891            vec![
892                TreeDiffOperation::Deleted {
893                    path: parse_path(&graph, "readme.txt").as_ref(),
894                    old_file: graph
895                        .properties()
896                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
897                    old_perm: Some(Permission::Content),
898                },
899                TreeDiffOperation::Added {
900                    path: parse_path(&graph, "books/readme.txt").as_ref(),
901                    new_file: graph
902                        .properties()
903                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
904                    new_perm: Some(Permission::Content),
905                },
906            ]
907        );
908        Ok(())
909    }
910
911    #[test]
912    fn test_change_dir_to_file() -> Result<()> {
913        let base_tree = base_tree();
914
915        #[rustfmt::skip]
916        let modified_tree =
917            Tree::Dir(11, vec![
918                ("books", Tree::Dir(2, vec![
919                    ("disparition.txt", Tree::File(3)),
920                    ("revision_link", Tree::Revision(9)),
921                    ("war_and_peace.doc", Tree::File(4)),
922                ])),
923                ("music", Tree::Revision(10)),
924                ("readme.txt", Tree::File(8)),
925        ]);
926
927        let (graph, diff) = diff_trees(&base_tree, &modified_tree, None)?;
928
929        assert_eq!(
930            diff.operations().collect::<Vec<_>>(),
931            vec![
932                TreeDiffOperation::Deleted {
933                    path: parse_path(&graph, "music/on_reflection.wav").as_ref(),
934                    old_file: graph
935                        .properties()
936                        .node_id("swh:1:cnt:0000000000000000000000000000000000000007")?,
937                    old_perm: Some(Permission::Content),
938                },
939                TreeDiffOperation::Deleted {
940                    path: parse_path(&graph, "music/lemon_tree.mp3").as_ref(),
941                    old_file: graph
942                        .properties()
943                        .node_id("swh:1:cnt:0000000000000000000000000000000000000006")?,
944                    old_perm: Some(Permission::Content),
945                },
946                TreeDiffOperation::Added {
947                    path: parse_path(&graph, "music").as_ref(),
948                    new_file: graph
949                        .properties()
950                        .node_id("swh:1:rev:000000000000000000000000000000000000000a")?,
951                    new_perm: Some(Permission::Revision),
952                },
953            ]
954        );
955        Ok(())
956    }
957
958    #[test]
959    fn test_change_file_to_dir() -> Result<()> {
960        let base_tree = base_tree();
961
962        #[rustfmt::skip]
963        let tree_with_edit =
964            Tree::Dir(11, vec![
965                ("books", Tree::Dir(12, vec![
966                    ("disparition.txt", Tree::File(3)),
967                    ("revision_link", Tree::Dir(10, vec![
968                        ("readme.txt", Tree::File(8)),
969                    ])),
970                    ("war_and_peace.doc", Tree::File(4)),
971                ])),
972                ("music", Tree::Dir(5, vec![
973                    ("lemon_tree.mp3", Tree::File(6)),
974                    ("on_reflection.wav", Tree::File(7)),
975                ])),
976                ("readme.txt", Tree::File(8)),
977        ]);
978
979        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
980
981        assert_eq!(
982            diff.operations().collect::<Vec<_>>(),
983            vec![
984                TreeDiffOperation::Deleted {
985                    path: parse_path(&graph, "books/revision_link").as_ref(),
986                    old_file: graph
987                        .properties()
988                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
989                    old_perm: Some(Permission::Revision),
990                },
991                TreeDiffOperation::Added {
992                    path: parse_path(&graph, "books/revision_link/readme.txt").as_ref(),
993                    new_file: graph
994                        .properties()
995                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
996                    new_perm: Some(Permission::Content),
997                }
998            ]
999        );
1000        Ok(())
1001    }
1002
1003    #[test]
1004    fn test_limit() -> Result<()> {
1005        let base_tree = base_tree();
1006
1007        #[rustfmt::skip]
1008        let tree_with_edit =
1009            Tree::Dir(15, vec![
1010                ("books", Tree::Dir(14, vec![
1011                    ("disparition.txt", Tree::File(3)),
1012                    ("revision_link", Tree::Revision(16)),
1013                    ("war_and_peace.doc", Tree::File(4)),
1014                ])),
1015        ]);
1016
1017        assert!(
1018            diff_trees(&base_tree, &tree_with_edit, Some(2)).is_err(),
1019            "Expected the diff to error out because there are too many changes"
1020        );
1021        Ok(())
1022    }
1023
1024    /// Helper function to call the [`diff_tree`] function more easily on graphs
1025    /// generated from synthetic trees
1026    fn diff_trees(
1027        tree_1: &Tree,
1028        tree_2: &Tree,
1029        limit: Option<usize>,
1030    ) -> Result<(BuiltGraph, TreeDiff)> {
1031        let mut builder = GraphBuilder::default();
1032        let old_root = build_graph_recursively(tree_1, &mut builder)?.0;
1033        let new_root = build_graph_recursively(tree_2, &mut builder)?.0;
1034        let graph = builder.done()?;
1035        let diff = tree_diff_dirs(&graph, Some(old_root), new_root, limit)?;
1036        Ok((graph, diff))
1037    }
1038
1039    fn make_node(id: &u32, node_type: NodeType, builder: &mut GraphBuilder) -> Result<NodeId> {
1040        let mut hash = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
1041        hash[16..].copy_from_slice(&id.to_be_bytes());
1042        let swhid = SWHID {
1043            namespace_version: 1,
1044            node_type,
1045            hash,
1046        };
1047        if let Some(node_id) = builder.node_id(swhid) {
1048            Ok(node_id)
1049        } else {
1050            Ok(builder.node(swhid)?.done())
1051        }
1052    }
1053
1054    fn build_graph_recursively(
1055        tree: &Tree,
1056        builder: &mut GraphBuilder,
1057    ) -> Result<(NodeId, Permission)> {
1058        match tree {
1059            Tree::File(node_id) => Ok((
1060                make_node(node_id, NodeType::Content, builder)?,
1061                Permission::Content,
1062            )),
1063            Tree::Executable(node_id) => Ok((
1064                make_node(node_id, NodeType::Content, builder)?,
1065                Permission::ExecutableContent,
1066            )),
1067            Tree::Dir(node_id, items) => {
1068                let dir = make_node(node_id, NodeType::Directory, builder)?;
1069                for (name, child) in items {
1070                    let (child_node, permission) = build_graph_recursively(child, builder)?;
1071                    builder.dir_arc(dir, child_node, permission, name.as_bytes());
1072                }
1073                Ok((dir, Permission::Directory))
1074            }
1075            Tree::Revision(node_id) => Ok((
1076                make_node(node_id, NodeType::Revision, builder)?,
1077                Permission::Revision,
1078            )),
1079        }
1080    }
1081}