Skip to main content

swh_graph_stdlib/
diff.rs

1// Copyright (C) 2025-2026  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Recursively lists differences between two directory nodes.
7
8use anyhow::{Context, Result, bail, ensure};
9
10use swh_graph::{
11    NodeType,
12    graph::{NodeId, SwhGraphWithProperties, SwhLabeledForwardGraph},
13    labels::{EdgeLabel, LabelNameId, Permission},
14    properties,
15};
16
17/// An operation made between two revisions on a single directory entry
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum TreeDiffOperation<Path: AsRef<[LabelNameId]> = Box<[LabelNameId]>> {
20    /// Adds a single file or directory
21    Added {
22        /// Full path (from repo root) to the added file
23        path: Path,
24        new_file: NodeId,
25        new_perm: Option<Permission>,
26    },
27    /// Removes a single file or directory
28    Deleted {
29        /// Full path (from repo root) to the removed file
30        path: Path,
31        old_file: NodeId,
32        old_perm: Option<Permission>,
33    },
34    /// Edits a single file (it can't be a directory)
35    Modified {
36        /// Full path (from the repo root) to the removed file
37        path: Path,
38        new_file: NodeId,
39        old_file: NodeId,
40        new_perm: Option<Permission>,
41        old_perm: Option<Permission>,
42    },
43    /// Moves a file from one place to another.
44    /// The file may have been edited in the process,
45    /// in which case the two supplied node ids will differ.
46    Moved {
47        old_path: Path,
48        new_path: Path,
49        old_file: NodeId,
50        new_file: NodeId,
51        old_perm: Option<Permission>,
52        new_perm: Option<Permission>,
53    },
54}
55
56impl<Path: AsRef<[LabelNameId]>> TreeDiffOperation<Path> {
57    pub fn shallow_copy(&self) -> TreeDiffOperation<&[LabelNameId]> {
58        use TreeDiffOperation::*;
59
60        match self {
61            Added {
62                path,
63                new_file,
64                new_perm,
65            } => Added {
66                path: path.as_ref(),
67                new_file: *new_file,
68                new_perm: *new_perm,
69            },
70            Deleted {
71                path,
72                old_file,
73                old_perm,
74            } => Deleted {
75                path: path.as_ref(),
76                old_file: *old_file,
77                old_perm: *old_perm,
78            },
79            Modified {
80                path,
81                new_file,
82                old_file,
83                new_perm,
84                old_perm,
85            } => Modified {
86                path: path.as_ref(),
87                new_file: *new_file,
88                old_file: *old_file,
89                new_perm: *new_perm,
90                old_perm: *old_perm,
91            },
92            Moved {
93                old_path,
94                new_path,
95                new_file,
96                old_file,
97                new_perm,
98                old_perm,
99            } => Moved {
100                old_path: old_path.as_ref(),
101                new_path: new_path.as_ref(),
102                new_file: *new_file,
103                old_file: *old_file,
104                new_perm: *new_perm,
105                old_perm: *old_perm,
106            },
107        }
108    }
109}
110
111/// A diff between two trees, treating files as atomic units
112#[derive(Debug, Clone, PartialEq, Eq)]
113pub struct TreeDiff {
114    operations: Vec<TreeDiffOperation>,
115}
116
117impl TreeDiff {
118    pub fn operations(&self) -> impl Iterator<Item = TreeDiffOperation<&[LabelNameId]>> + use<'_> {
119        self.operations.iter().map(TreeDiffOperation::shallow_copy)
120    }
121}
122
123/// Compute a tree diff between the old and new trees rooted in
124/// the nodes supplied, which are required to be directory nodes.
125///
126/// The limit parameter can optionally limit the number of diff
127/// elements to return (to avoid returning very big diffs from
128/// git bombs). If this limit is reached, an error is returned.
129///
130/// If no `old_tree` is supplied, then all the files present in
131/// `new_tree` are treated as being added.
132///
133/// WARNING: rename detection is currently not supported. The `Moved` variant is never returned.
134pub fn tree_diff_dirs<G>(
135    graph: &G,
136    old_tree: Option<NodeId>,
137    new_tree: NodeId,
138    limit: Option<usize>,
139) -> Result<TreeDiff>
140where
141    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
142{
143    let mut operations = Vec::new();
144    let Some(old_tree) = old_tree else {
145        for dir_entry in list_dir(graph, new_tree)? {
146            add_directory_recursively(graph, &dir_entry, &limit, &mut operations, &[])?;
147        }
148        return Ok(TreeDiff { operations });
149    };
150    let mut path_prefix = Vec::new();
151    let mut stack: Vec<StackItem> = Vec::new();
152    stack.push(StackItem::Visit {
153        old: old_tree,
154        new: new_tree,
155        path: None,
156    });
157    while let Some(stack_item) = stack.pop() {
158        match stack_item {
159            StackItem::Leave => {
160                path_prefix.pop();
161            }
162            StackItem::Visit { path, old, new } => {
163                if let Some(path) = path {
164                    path_prefix.push(path);
165                }
166                tree_diff_internal(
167                    graph,
168                    old,
169                    new,
170                    &limit,
171                    &mut operations,
172                    &mut path_prefix,
173                    &mut stack,
174                )
175                .with_context(|| {
176                    format!(
177                        "Cannot diff {} and {}, subtrees of {} and {}",
178                        graph.properties().swhid(old),
179                        graph.properties().swhid(new),
180                        graph.properties().swhid(old_tree),
181                        graph.properties().swhid(new_tree),
182                    )
183                })?;
184            }
185        }
186    }
187    Ok(TreeDiff { operations })
188}
189
190/// Internal representation of our stack to recurse into directories,
191/// to avoid overflowing Rust's call stack.
192enum StackItem {
193    /// marker to update the path to the directory currently being visited
194    Leave,
195    // task to visit a pair of nodes
196    Visit {
197        path: Option<LabelNameId>,
198        old: NodeId,
199        new: NodeId,
200    },
201}
202
203fn tree_diff_internal<G>(
204    graph: &G,
205    old_tree: NodeId,
206    new_tree: NodeId,
207    limit: &Option<usize>,
208    result: &mut Vec<TreeDiffOperation>,
209    path_prefix: &mut Vec<LabelNameId>,
210    stack: &mut Vec<StackItem>,
211) -> Result<()>
212where
213    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
214{
215    if old_tree == new_tree {
216        // nothing to diff!
217        return Ok(());
218    }
219
220    let props = graph.properties();
221
222    // Iterate jointly on the listing of both directories.
223    // The listings are known to be sorted on their label name ids.
224    let mut old_dir_iter = list_dir(graph, old_tree)?.into_iter().peekable();
225    let mut new_dir_iter = list_dir(graph, new_tree)?.into_iter().peekable();
226    loop {
227        if limit.is_some_and(|limit| result.len() >= limit) {
228            // TODO convert to if let-chains when they become available to avoid the `unwrap`
229            bail!("Diff has more than {} changes", limit.unwrap())
230        }
231        // concatenates file names to obtain full relative path from the repo's root
232        let full_path = |file_name: &LabelNameId| {
233            [path_prefix.as_slice(), &[*file_name]]
234                .concat()
235                .into_boxed_slice()
236        };
237        match (old_dir_iter.peek(), new_dir_iter.peek()) {
238            (None, None) => break,
239            (None, Some(new)) => {
240                add_directory_recursively(graph, new, limit, result, path_prefix)?;
241                new_dir_iter.next();
242            }
243            (Some(old), None) => {
244                delete_directory_recursively(graph, old, limit, result, path_prefix)?;
245                old_dir_iter.next();
246            }
247            (Some(old), Some(new)) => {
248                match old.path.0.cmp(&new.path.0) {
249                    std::cmp::Ordering::Less => {
250                        delete_directory_recursively(graph, old, limit, result, path_prefix)?;
251                        old_dir_iter.next();
252                    }
253                    std::cmp::Ordering::Greater => {
254                        add_directory_recursively(graph, new, limit, result, path_prefix)?;
255                        new_dir_iter.next();
256                    }
257                    std::cmp::Ordering::Equal => {
258                        // If the two ids are equal, we know the contents are recursively equal,
259                        // in which case there is nothing to compare. Otherwise:
260                        if old.id != new.id || old.perm != new.perm {
261                            let old_type = props.node_type(old.id);
262                            let new_type = props.node_type(new.id);
263                            let content_types = [NodeType::Content, NodeType::Revision];
264
265                            if content_types.contains(&old_type)
266                                && content_types.contains(&new_type)
267                            {
268                                // Both entries point to file-like things, so we register the difference as an edit on them
269                                result.push(TreeDiffOperation::Modified {
270                                    path: full_path(&new.path),
271                                    new_file: new.id,
272                                    old_file: old.id,
273                                    new_perm: new.perm,
274                                    old_perm: old.perm,
275                                });
276                            } else if content_types.contains(&old_type)
277                                && new_type == NodeType::Directory
278                            {
279                                // A file got changed into a directory.
280                                result.push(TreeDiffOperation::Deleted {
281                                    path: full_path(&old.path),
282                                    old_file: old.id,
283                                    old_perm: old.perm,
284                                });
285                                add_directory_recursively(graph, new, limit, result, path_prefix)?;
286                            } else if old_type == NodeType::Directory
287                                && content_types.contains(&new_type)
288                            {
289                                // A directory changed into a file
290                                delete_directory_recursively(
291                                    graph,
292                                    old,
293                                    limit,
294                                    result,
295                                    path_prefix,
296                                )?;
297                                result.push(TreeDiffOperation::Added {
298                                    path: full_path(&new.path),
299                                    new_file: new.id,
300                                    new_perm: new.perm,
301                                });
302                            } else if old_type == NodeType::Directory
303                                && new_type == NodeType::Directory
304                            {
305                                // Two directories with different contents: let's remember
306                                // to recurse into this directory later, by pushing the task
307                                // to visit it on our stack.
308                                // We also add a `Leave` marker to update the current path once
309                                // we're done visiting that subdirectory. In order for it to be
310                                // processed after the visit, we add it on the stack before the
311                                // visit (since it's Last-In-First-Out)
312                                stack.push(StackItem::Leave);
313                                stack.push(StackItem::Visit {
314                                    path: Some(new.path),
315                                    old: old.id,
316                                    new: new.id,
317                                });
318                            }
319                        };
320                        old_dir_iter.next();
321                        new_dir_iter.next();
322                    }
323                }
324            }
325        }
326    }
327    Ok(())
328}
329
330/// Recursively browse a part of a file system and mark all of its contents as added
331fn add_directory_recursively<G>(
332    graph: &G,
333    dir_entry: &DirEntry,
334    limit: &Option<usize>,
335    result: &mut Vec<TreeDiffOperation>,
336    path_prefix: &[LabelNameId],
337) -> Result<()>
338where
339    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
340{
341    add_or_delete_directory_recursively(graph, dir_entry, limit, result, path_prefix, true)
342}
343
344/// Recursively browse a part of a file system and mark all of its contents as deleted
345fn delete_directory_recursively<G>(
346    graph: &G,
347    dir_entry: &DirEntry,
348    limit: &Option<usize>,
349    result: &mut Vec<TreeDiffOperation>,
350    path_prefix: &[LabelNameId],
351) -> Result<()>
352where
353    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
354{
355    add_or_delete_directory_recursively(graph, dir_entry, limit, result, path_prefix, false)
356}
357
358enum TraverseStackItem {
359    Leave,
360    Visit {
361        node_id: NodeId,
362        path: LabelNameId,
363        perm: Option<Permission>,
364    },
365}
366
367/// Recursively browse a part of a file system and mark all of its contents as added
368/// or deleted (as indicated by the last parameter).
369fn add_or_delete_directory_recursively<G>(
370    graph: &G,
371    dir_entry: &DirEntry,
372    limit: &Option<usize>,
373    result: &mut Vec<TreeDiffOperation>,
374    path_prefix: &[LabelNameId],
375    add: bool,
376) -> Result<()>
377where
378    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
379{
380    let props = graph.properties();
381    let mut stack = Vec::new();
382    let mut path_prefix = path_prefix.to_vec();
383    stack.push(TraverseStackItem::Visit {
384        node_id: dir_entry.id,
385        path: dir_entry.path,
386        perm: dir_entry.perm,
387    });
388    while let Some(task) = stack.pop() {
389        if limit.is_some_and(|limit| result.len() >= limit) {
390            bail!("Diff has more than {} changes", limit.unwrap());
391        }
392        match task {
393            TraverseStackItem::Leave => {
394                path_prefix.pop();
395            }
396            TraverseStackItem::Visit {
397                node_id,
398                path,
399                perm,
400            } => {
401                path_prefix.push(path);
402                match props.node_type(node_id) {
403                    NodeType::Content | NodeType::Revision => {
404                        let diff_item = if add {
405                            TreeDiffOperation::Added {
406                                path: path_prefix.to_vec().into_boxed_slice(),
407                                new_file: node_id,
408                                new_perm: perm,
409                            }
410                        } else {
411                            TreeDiffOperation::Deleted {
412                                path: path_prefix.to_vec().into_boxed_slice(),
413                                old_file: node_id,
414                                old_perm: perm,
415                            }
416                        };
417                        result.push(diff_item);
418                    }
419                    NodeType::Directory => {
420                        let dir_listing = list_dir(graph, node_id)?;
421                        for entry in &dir_listing {
422                            stack.push(TraverseStackItem::Leave);
423                            stack.push(TraverseStackItem::Visit {
424                                node_id: entry.id,
425                                path: entry.path,
426                                perm: entry.perm,
427                            });
428                        }
429                    }
430                    x => {
431                        bail!("unexpected node type {x} when browsing a directory");
432                    }
433                }
434            }
435        }
436    }
437    Ok(())
438}
439
440/// Internal struct to represent an entry in the list of contents of a directory
441#[derive(Debug, Clone, Copy, PartialEq, Eq)]
442struct DirEntry {
443    path: LabelNameId,
444    perm: Option<Permission>,
445    id: NodeId,
446}
447
448/// Lists the contents of a directory, sorting the results by label name id.
449fn list_dir<G>(graph: &G, dir_node: NodeId) -> Result<Vec<DirEntry>>
450where
451    G: SwhLabeledForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>,
452{
453    let props = graph.properties();
454    let node_type = props.node_type(dir_node);
455    ensure!(
456        node_type == NodeType::Directory,
457        "Cannot list {}: not a directory",
458        props.swhid(dir_node),
459    );
460    let mut listing = Vec::new();
461    for (succ, labels) in graph.labeled_successors(dir_node) {
462        let node_type = props.node_type(succ);
463        if node_type == NodeType::Content
464            || node_type == NodeType::Directory
465            || node_type == NodeType::Revision
466        {
467            for label in labels {
468                if let EdgeLabel::DirEntry(dentry) = label {
469                    let perm = dentry.permission();
470                    listing.push(DirEntry {
471                        path: dentry.label_name_id(),
472                        perm,
473                        id: succ,
474                    });
475                }
476            }
477        }
478    }
479    listing.sort_unstable_by_key(|dir_entry| dir_entry.path.0);
480    Ok(listing)
481}
482
483#[cfg(test)]
484mod tests {
485
486    use swh_graph::{
487        NodeType, SWHID,
488        graph::*,
489        graph_builder::{BuiltGraph, GraphBuilder},
490    };
491
492    use super::*;
493
494    /// Convenience type to define test cases.
495    /// The `u32` fields are used to generate SWHIDs
496    /// when converting the trees to graphs.
497    enum Tree {
498        File(u32),
499        Executable(u32),
500        Dir(u32, Vec<(&'static str, Tree)>),
501        Revision(u32),
502    }
503
504    #[rustfmt::skip] // to keep the tree representation more compact and readable
505    fn base_tree() -> Tree {
506        Tree::Dir(1, vec![
507            ("books", Tree::Dir(2, vec![
508                ("disparition.txt", Tree::File(3)),
509                ("revision_link", Tree::Revision(9)),
510                ("war_and_peace.doc", Tree::File(4)),
511            ])),
512            ("music", Tree::Dir(5, vec![
513                ("lemon_tree.mp3", Tree::File(6)),
514                ("on_reflection.wav", Tree::File(7)),
515            ])),
516            ("readme.txt", Tree::File(8)),
517        ])
518    }
519
520    fn parse_path(graph: &BuiltGraph, path: &str) -> Box<[LabelNameId]> {
521        path.split('/')
522            .map(|part| {
523                graph
524                    .properties()
525                    .label_name_id(part.as_bytes())
526                    .expect("label name does not exist in the graph")
527            })
528            .collect()
529    }
530
531    #[test]
532    fn test_list_dir() -> Result<()> {
533        let tree = base_tree();
534        let mut builder = GraphBuilder::default();
535        let root_id = build_graph_recursively(&tree, &mut builder)?.0;
536        let graph = builder.done()?;
537        let props = graph.properties();
538        let books_subdir_id =
539            props.node_id("swh:1:dir:0000000000000000000000000000000000000002")?;
540        let music_subdir_id =
541            props.node_id("swh:1:dir:0000000000000000000000000000000000000005")?;
542        let readme_id = props.node_id("swh:1:cnt:0000000000000000000000000000000000000008")?;
543        let lemon_tree_id = props.node_id("swh:1:cnt:0000000000000000000000000000000000000006")?;
544        let on_reflection_id =
545            props.node_id("swh:1:cnt:0000000000000000000000000000000000000007")?;
546
547        assert_eq!(
548            list_dir(&graph, root_id)?,
549            vec![
550                DirEntry {
551                    path: parse_path(&graph, "books")[0],
552                    perm: Some(Permission::Directory),
553                    id: books_subdir_id,
554                },
555                DirEntry {
556                    path: parse_path(&graph, "music")[0],
557                    perm: Some(Permission::Directory),
558                    id: music_subdir_id,
559                },
560                DirEntry {
561                    path: parse_path(&graph, "readme.txt")[0],
562                    perm: Some(Permission::Content),
563                    id: readme_id,
564                }
565            ]
566        );
567        assert_eq!(
568            list_dir(&graph, music_subdir_id)?,
569            vec![
570                DirEntry {
571                    path: parse_path(&graph, "lemon_tree.mp3")[0],
572                    perm: Some(Permission::Content),
573                    id: lemon_tree_id,
574                },
575                DirEntry {
576                    path: parse_path(&graph, "on_reflection.wav")[0],
577                    perm: Some(Permission::Content),
578                    id: on_reflection_id,
579                }
580            ]
581        );
582        Ok(())
583    }
584
585    #[test]
586    fn test_diff_root_dir() -> Result<()> {
587        let base_tree = base_tree();
588
589        let mut builder = GraphBuilder::default();
590        let new_root = build_graph_recursively(&base_tree, &mut builder)?.0;
591        let graph = builder.done()?;
592        let diff = tree_diff_dirs(&graph, None, new_root, None)?;
593
594        let mut actual = diff.operations().collect::<Vec<_>>();
595        actual.sort_by_key(|op| {
596            let TreeDiffOperation::Added { new_file, .. } = op else {
597                panic!("expected only 'added' diff elements when diffing a root revision");
598            };
599            *new_file
600        });
601
602        assert_eq!(
603            actual,
604            vec![
605                TreeDiffOperation::Added {
606                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
607                    new_file: graph
608                        .properties()
609                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
610                    new_perm: Some(Permission::Content),
611                },
612                TreeDiffOperation::Added {
613                    path: parse_path(&graph, "books/revision_link").as_ref(),
614                    new_file: graph
615                        .properties()
616                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
617                    new_perm: Some(Permission::Revision),
618                },
619                TreeDiffOperation::Added {
620                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
621                    new_file: graph
622                        .properties()
623                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
624                    new_perm: Some(Permission::Content),
625                },
626                TreeDiffOperation::Added {
627                    path: parse_path(&graph, "music/lemon_tree.mp3").as_ref(),
628                    new_file: graph
629                        .properties()
630                        .node_id("swh:1:cnt:0000000000000000000000000000000000000006")?,
631                    new_perm: Some(Permission::Content),
632                },
633                TreeDiffOperation::Added {
634                    path: parse_path(&graph, "music/on_reflection.wav").as_ref(),
635                    new_file: graph
636                        .properties()
637                        .node_id("swh:1:cnt:0000000000000000000000000000000000000007")?,
638                    new_perm: Some(Permission::Content),
639                },
640                TreeDiffOperation::Added {
641                    path: parse_path(&graph, "readme.txt").as_ref(),
642                    new_file: graph
643                        .properties()
644                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
645                    new_perm: Some(Permission::Content),
646                }
647            ],
648        );
649
650        Ok(())
651    }
652
653    #[test]
654    fn test_edit_file() -> Result<()> {
655        let base_tree = base_tree();
656
657        #[rustfmt::skip]
658        let tree_with_edit =
659            Tree::Dir(15, vec![
660                ("books", Tree::Dir(14, vec![
661                    ("disparition.txt", Tree::File(13)),
662                    ("revision_link", Tree::Revision(9)),
663                    ("war_and_peace.doc", Tree::File(4)),
664                ])),
665                ("music", Tree::Dir(10, vec![
666                    ("lemon_tree.mp3", Tree::File(6)),
667                    ("on_reflection.wav", Tree::File(7)),
668                ])),
669                ("readme.txt", Tree::File(8)),
670        ]);
671
672        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
673
674        assert_eq!(
675            diff.operations,
676            vec![TreeDiffOperation::Modified {
677                path: parse_path(&graph, "books/disparition.txt"),
678                old_file: graph
679                    .properties()
680                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
681                new_file: graph
682                    .properties()
683                    .node_id("swh:1:cnt:000000000000000000000000000000000000000d")?,
684                old_perm: Some(Permission::Content),
685                new_perm: Some(Permission::Content),
686            }]
687        );
688        Ok(())
689    }
690
691    #[test]
692    fn test_change_permissions() -> Result<()> {
693        let base_tree = base_tree();
694
695        #[rustfmt::skip]
696        let tree_with_edit =
697            Tree::Dir(15, vec![
698                ("books", Tree::Dir(14, vec![
699                    ("disparition.txt", Tree::Executable(3)),
700                    ("revision_link", Tree::Revision(9)),
701                    ("war_and_peace.doc", Tree::File(4)),
702                ])),
703                ("music", Tree::Dir(10, vec![
704                    ("lemon_tree.mp3", Tree::File(6)),
705                    ("on_reflection.wav", Tree::File(7)),
706                ])),
707                ("readme.txt", Tree::File(8)),
708        ]);
709
710        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
711
712        assert_eq!(
713            diff.operations,
714            vec![TreeDiffOperation::Modified {
715                path: parse_path(&graph, "books/disparition.txt"),
716                old_file: graph
717                    .properties()
718                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
719                new_file: graph
720                    .properties()
721                    .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
722                old_perm: Some(Permission::Content),
723                new_perm: Some(Permission::ExecutableContent),
724            }]
725        );
726        Ok(())
727    }
728
729    #[test]
730    fn test_edit_revision() -> Result<()> {
731        let base_tree = base_tree();
732
733        #[rustfmt::skip]
734        let tree_with_edit =
735            Tree::Dir(15, vec![
736                ("books", Tree::Dir(14, vec![
737                    ("disparition.txt", Tree::File(3)),
738                    ("revision_link", Tree::Revision(16)),
739                    ("war_and_peace.doc", Tree::File(4)),
740                ])),
741                ("music", Tree::Dir(10, vec![
742                    ("lemon_tree.mp3", Tree::File(6)),
743                    ("on_reflection.wav", Tree::File(7)),
744                ])),
745                ("readme.txt", Tree::File(8)),
746        ]);
747
748        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
749
750        assert_eq!(
751            diff.operations,
752            vec![TreeDiffOperation::Modified {
753                path: parse_path(&graph, "books/revision_link"),
754                old_file: graph
755                    .properties()
756                    .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
757                new_file: graph
758                    .properties()
759                    .node_id("swh:1:rev:0000000000000000000000000000000000000010")?,
760                old_perm: Some(Permission::Revision),
761                new_perm: Some(Permission::Revision),
762            }]
763        );
764        Ok(())
765    }
766
767    #[test]
768    fn test_additions_and_deletions() -> Result<()> {
769        let base_tree = base_tree();
770
771        #[rustfmt::skip]
772        let tree_with_addition =
773            Tree::Dir(11, vec![
774                ("books", Tree::Dir(2, vec![
775                    ("disparition.txt", Tree::File(3)),
776                    ("revision_link", Tree::Revision(9)),
777                    ("war_and_peace.doc", Tree::File(4)),
778                ])),
779                ("music", Tree::Dir(10, vec![
780                    ("lemon_tree.mp3", Tree::File(6)),
781                    ("gavotte_aven.wma", Tree::File(12)),
782                    ("on_reflection.wav", Tree::File(7)),
783                ])),
784                ("readme.txt", Tree::File(8)),
785        ]);
786
787        #[rustfmt::skip]
788        let tree_with_directory_deletion =
789            Tree::Dir(13, vec![
790                ("music", Tree::Dir(5, vec![
791                    ("lemon_tree.mp3", Tree::File(6)),
792                    ("on_reflection.wav", Tree::File(7)),
793                ])),
794                ("readme.txt", Tree::File(8)),
795        ]);
796
797        let (graph, diff) = diff_trees(&base_tree, &tree_with_addition, None)?;
798
799        assert_eq!(
800            diff.operations().collect::<Vec<_>>(),
801            vec![TreeDiffOperation::Added {
802                path: parse_path(&graph, "music/gavotte_aven.wma").as_ref(),
803                new_file: graph
804                    .properties()
805                    .node_id("swh:1:cnt:000000000000000000000000000000000000000c")?,
806                new_perm: Some(Permission::Content),
807            }]
808        );
809
810        let (graph, diff) = diff_trees(&base_tree, &tree_with_directory_deletion, None)?;
811
812        assert_eq!(
813            diff.operations().collect::<Vec<_>>(),
814            vec![
815                TreeDiffOperation::Deleted {
816                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
817                    old_file: graph
818                        .properties()
819                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
820                    old_perm: Some(Permission::Content),
821                },
822                TreeDiffOperation::Deleted {
823                    path: parse_path(&graph, "books/revision_link").as_ref(),
824                    old_file: graph
825                        .properties()
826                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
827                    old_perm: Some(Permission::Revision),
828                },
829                TreeDiffOperation::Deleted {
830                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
831                    old_file: graph
832                        .properties()
833                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
834                    old_perm: Some(Permission::Content),
835                },
836            ]
837        );
838
839        let (graph, diff) = diff_trees(&tree_with_addition, &tree_with_directory_deletion, None)?;
840
841        assert_eq!(
842            diff.operations().collect::<Vec<_>>(),
843            vec![
844                TreeDiffOperation::Deleted {
845                    path: parse_path(&graph, "books/war_and_peace.doc").as_ref(),
846                    old_file: graph
847                        .properties()
848                        .node_id("swh:1:cnt:0000000000000000000000000000000000000004")?,
849                    old_perm: Some(Permission::Content),
850                },
851                TreeDiffOperation::Deleted {
852                    path: parse_path(&graph, "books/revision_link").as_ref(),
853                    old_file: graph
854                        .properties()
855                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
856                    old_perm: Some(Permission::Revision),
857                },
858                TreeDiffOperation::Deleted {
859                    path: parse_path(&graph, "books/disparition.txt").as_ref(),
860                    old_file: graph
861                        .properties()
862                        .node_id("swh:1:cnt:0000000000000000000000000000000000000003")?,
863                    old_perm: Some(Permission::Content),
864                },
865                TreeDiffOperation::Deleted {
866                    path: parse_path(&graph, "music/gavotte_aven.wma").as_ref(),
867                    old_file: graph
868                        .properties()
869                        .node_id("swh:1:cnt:000000000000000000000000000000000000000c")?,
870                    old_perm: Some(Permission::Content),
871                }
872            ]
873        );
874        Ok(())
875    }
876
877    #[test]
878    fn test_move_file() -> Result<()> {
879        let base_tree = base_tree();
880
881        #[rustfmt::skip]
882        let tree_with_edit =
883            Tree::Dir(17, vec![
884                ("books", Tree::Dir(16, vec![
885                    ("disparition.txt", Tree::File(3)),
886                    ("revision_link", Tree::Revision(9)),
887                    ("war_and_peace.doc", Tree::File(4)),
888                    ("readme.txt", Tree::File(8)),
889                ])),
890                ("music", Tree::Dir(10, vec![
891                    ("lemon_tree.mp3", Tree::File(6)),
892                    ("on_reflection.wav", Tree::File(7)),
893                ])),
894        ]);
895
896        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
897
898        // TODO: in the future we want to detect renames,
899        // instead of registering this as an addition and deletion
900        assert_eq!(
901            diff.operations().collect::<Vec<_>>(),
902            vec![
903                TreeDiffOperation::Deleted {
904                    path: parse_path(&graph, "readme.txt").as_ref(),
905                    old_file: graph
906                        .properties()
907                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
908                    old_perm: Some(Permission::Content),
909                },
910                TreeDiffOperation::Added {
911                    path: parse_path(&graph, "books/readme.txt").as_ref(),
912                    new_file: graph
913                        .properties()
914                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
915                    new_perm: Some(Permission::Content),
916                },
917            ]
918        );
919        Ok(())
920    }
921
922    #[test]
923    fn test_change_dir_to_file() -> Result<()> {
924        let base_tree = base_tree();
925
926        #[rustfmt::skip]
927        let modified_tree =
928            Tree::Dir(11, vec![
929                ("books", Tree::Dir(2, vec![
930                    ("disparition.txt", Tree::File(3)),
931                    ("revision_link", Tree::Revision(9)),
932                    ("war_and_peace.doc", Tree::File(4)),
933                ])),
934                ("music", Tree::Revision(10)),
935                ("readme.txt", Tree::File(8)),
936        ]);
937
938        let (graph, diff) = diff_trees(&base_tree, &modified_tree, None)?;
939
940        assert_eq!(
941            diff.operations().collect::<Vec<_>>(),
942            vec![
943                TreeDiffOperation::Deleted {
944                    path: parse_path(&graph, "music/on_reflection.wav").as_ref(),
945                    old_file: graph
946                        .properties()
947                        .node_id("swh:1:cnt:0000000000000000000000000000000000000007")?,
948                    old_perm: Some(Permission::Content),
949                },
950                TreeDiffOperation::Deleted {
951                    path: parse_path(&graph, "music/lemon_tree.mp3").as_ref(),
952                    old_file: graph
953                        .properties()
954                        .node_id("swh:1:cnt:0000000000000000000000000000000000000006")?,
955                    old_perm: Some(Permission::Content),
956                },
957                TreeDiffOperation::Added {
958                    path: parse_path(&graph, "music").as_ref(),
959                    new_file: graph
960                        .properties()
961                        .node_id("swh:1:rev:000000000000000000000000000000000000000a")?,
962                    new_perm: Some(Permission::Revision),
963                },
964            ]
965        );
966        Ok(())
967    }
968
969    #[test]
970    fn test_change_file_to_dir() -> Result<()> {
971        let base_tree = base_tree();
972
973        #[rustfmt::skip]
974        let tree_with_edit =
975            Tree::Dir(11, vec![
976                ("books", Tree::Dir(12, vec![
977                    ("disparition.txt", Tree::File(3)),
978                    ("revision_link", Tree::Dir(10, vec![
979                        ("readme.txt", Tree::File(8)),
980                    ])),
981                    ("war_and_peace.doc", Tree::File(4)),
982                ])),
983                ("music", Tree::Dir(5, vec![
984                    ("lemon_tree.mp3", Tree::File(6)),
985                    ("on_reflection.wav", Tree::File(7)),
986                ])),
987                ("readme.txt", Tree::File(8)),
988        ]);
989
990        let (graph, diff) = diff_trees(&base_tree, &tree_with_edit, None)?;
991
992        assert_eq!(
993            diff.operations().collect::<Vec<_>>(),
994            vec![
995                TreeDiffOperation::Deleted {
996                    path: parse_path(&graph, "books/revision_link").as_ref(),
997                    old_file: graph
998                        .properties()
999                        .node_id("swh:1:rev:0000000000000000000000000000000000000009")?,
1000                    old_perm: Some(Permission::Revision),
1001                },
1002                TreeDiffOperation::Added {
1003                    path: parse_path(&graph, "books/revision_link/readme.txt").as_ref(),
1004                    new_file: graph
1005                        .properties()
1006                        .node_id("swh:1:cnt:0000000000000000000000000000000000000008")?,
1007                    new_perm: Some(Permission::Content),
1008                }
1009            ]
1010        );
1011        Ok(())
1012    }
1013
1014    #[test]
1015    fn test_limit() -> Result<()> {
1016        let base_tree = base_tree();
1017
1018        #[rustfmt::skip]
1019        let tree_with_edit =
1020            Tree::Dir(15, vec![
1021                ("books", Tree::Dir(14, vec![
1022                    ("disparition.txt", Tree::File(3)),
1023                    ("revision_link", Tree::Revision(16)),
1024                    ("war_and_peace.doc", Tree::File(4)),
1025                ])),
1026        ]);
1027
1028        assert!(
1029            diff_trees(&base_tree, &tree_with_edit, Some(2)).is_err(),
1030            "Expected the diff to error out because there are too many changes"
1031        );
1032        Ok(())
1033    }
1034
1035    /// Helper function to call the [`diff_tree`] function more easily on graphs
1036    /// generated from synthetic trees
1037    fn diff_trees(
1038        tree_1: &Tree,
1039        tree_2: &Tree,
1040        limit: Option<usize>,
1041    ) -> Result<(BuiltGraph, TreeDiff)> {
1042        let mut builder = GraphBuilder::default();
1043        let old_root = build_graph_recursively(tree_1, &mut builder)?.0;
1044        let new_root = build_graph_recursively(tree_2, &mut builder)?.0;
1045        let graph = builder.done()?;
1046        let diff = tree_diff_dirs(&graph, Some(old_root), new_root, limit)?;
1047        Ok((graph, diff))
1048    }
1049
1050    fn make_node(id: &u32, node_type: NodeType, builder: &mut GraphBuilder) -> Result<NodeId> {
1051        let mut hash = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
1052        hash[16..].copy_from_slice(&id.to_be_bytes());
1053        let swhid = SWHID {
1054            namespace_version: 1,
1055            node_type,
1056            hash,
1057        };
1058        if let Some(node_id) = builder.node_id(swhid) {
1059            Ok(node_id)
1060        } else {
1061            Ok(builder.node(swhid)?.done())
1062        }
1063    }
1064
1065    fn build_graph_recursively(
1066        tree: &Tree,
1067        builder: &mut GraphBuilder,
1068    ) -> Result<(NodeId, Permission)> {
1069        match tree {
1070            Tree::File(node_id) => Ok((
1071                make_node(node_id, NodeType::Content, builder)?,
1072                Permission::Content,
1073            )),
1074            Tree::Executable(node_id) => Ok((
1075                make_node(node_id, NodeType::Content, builder)?,
1076                Permission::ExecutableContent,
1077            )),
1078            Tree::Dir(node_id, items) => {
1079                let dir = make_node(node_id, NodeType::Directory, builder)?;
1080                for (name, child) in items {
1081                    let (child_node, permission) = build_graph_recursively(child, builder)?;
1082                    builder.dir_arc(dir, child_node, permission, name.as_bytes());
1083                }
1084                Ok((dir, Permission::Directory))
1085            }
1086            Tree::Revision(node_id) => Ok((
1087                make_node(node_id, NodeType::Revision, builder)?,
1088                Permission::Revision,
1089            )),
1090        }
1091    }
1092}