Skip to main content

objects/object/
tree_walk.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Tree integrity walking — single traversal for reference and content checks.
3
4use std::collections::HashSet;
5
6use crate::{
7    error::Result,
8    object::{ContentHash, Tree, TreeEntry},
9    store::ObjectSource,
10};
11
12/// Events emitted while walking reachable trees for integrity checks.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub enum TreeIntegrityEvent<'a> {
15    /// A tree was entered for the first time during this walk.
16    EnterTree {
17        hash: ContentHash,
18        tree: &'a Tree,
19    },
20    /// A blob file entry at `path` (symlinks and gitlinks are excluded).
21    BlobLeaf {
22        entry: &'a TreeEntry,
23        path: String,
24    },
25    /// A child tree entry from `parent_hash`.
26    TreeRef {
27        parent_hash: ContentHash,
28        entry: &'a TreeEntry,
29    },
30}
31
32/// Walk all trees reachable from `roots`, deduplicating visited trees.
33///
34/// Missing root or subtree trees are skipped silently. Gitlink entries are not
35/// descended into. Visitation order is depth-first, sorted tree entry order.
36pub fn walk_tree_integrity<S, V>(
37    source: &S,
38    roots: impl IntoIterator<Item = ContentHash>,
39    visitor: &mut V,
40) -> Result<()>
41where
42    S: ObjectSource + ?Sized,
43    V: FnMut(TreeIntegrityEvent<'_>) -> Result<()>,
44{
45    let mut visited = HashSet::new();
46    for root in roots {
47        walk_tree_recursive(source, &root, "", &mut visited, visitor)?;
48    }
49    Ok(())
50}
51
52fn walk_tree_recursive<S, V>(
53    source: &S,
54    tree_hash: &ContentHash,
55    path_prefix: &str,
56    visited: &mut HashSet<ContentHash>,
57    visitor: &mut V,
58) -> Result<()>
59where
60    S: ObjectSource + ?Sized,
61    V: FnMut(TreeIntegrityEvent<'_>) -> Result<()>,
62{
63    if visited.contains(tree_hash) {
64        return Ok(());
65    }
66    visited.insert(*tree_hash);
67
68    let Some(tree) = source.get_tree(tree_hash)? else {
69        return Ok(());
70    };
71
72    visitor(TreeIntegrityEvent::EnterTree {
73        hash: *tree_hash,
74        tree: &tree,
75    })?;
76
77    for entry in tree.entries() {
78        let path = if path_prefix.is_empty() {
79            entry.name().to_string()
80        } else {
81            format!("{path_prefix}/{}", entry.name())
82        };
83
84        if entry.blob_hash().is_some() {
85            visitor(TreeIntegrityEvent::BlobLeaf { entry, path: path.clone() })?;
86        } else if let Some(child_hash) = entry.tree_hash() {
87            visitor(TreeIntegrityEvent::TreeRef {
88                parent_hash: *tree_hash,
89                entry,
90            })?;
91            walk_tree_recursive(source, &child_hash, &path, visited, visitor)?;
92        }
93    }
94
95    Ok(())
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101    use crate::{
102        object::{Blob, Tree, TreeEntry},
103        store::{InMemoryStore, ObjectStore},
104    };
105
106    #[test]
107    fn walk_tree_integrity_dedups_shared_subtrees() {
108        let store = InMemoryStore::new();
109        let blob = Blob::from("shared\n");
110        let blob_hash = store.put_blob(&blob).unwrap();
111        let shared = Tree::from_entries(vec![
112            TreeEntry::file("leaf.txt", blob_hash, false).unwrap(),
113        ]);
114        let shared_hash = store.put_tree(&shared).unwrap();
115        let root_a = Tree::from_entries(vec![
116            TreeEntry::directory("shared", shared_hash).unwrap(),
117            TreeEntry::file("a.txt", blob_hash, false).unwrap(),
118        ]);
119        let root_b = Tree::from_entries(vec![TreeEntry::directory("shared", shared_hash).unwrap()]);
120        let root_a_hash = store.put_tree(&root_a).unwrap();
121        let root_b_hash = store.put_tree(&root_b).unwrap();
122
123        let mut enter_count = 0;
124        let mut blob_leaves = Vec::new();
125
126        walk_tree_integrity(&store, [root_a_hash, root_b_hash], &mut |event| {
127            match event {
128                TreeIntegrityEvent::EnterTree { .. } => enter_count += 1,
129                TreeIntegrityEvent::BlobLeaf { path, .. } => blob_leaves.push(path),
130                TreeIntegrityEvent::TreeRef { .. } => {}
131            }
132            Ok(())
133        })
134        .unwrap();
135
136        assert_eq!(enter_count, 3, "shared subtree must be visited once");
137        assert_eq!(
138            blob_leaves,
139            vec!["a.txt".to_string(), "shared/leaf.txt".to_string()]
140        );
141    }
142
143    #[test]
144    fn walk_tree_integrity_skips_missing_subtree_silently() {
145        let store = InMemoryStore::new();
146        let missing = ContentHash::compute(b"missing-tree");
147        let root = Tree::from_entries(vec![TreeEntry::directory("gone", missing).unwrap()]);
148        let root_hash = store.put_tree(&root).unwrap();
149
150        let mut enter_count = 0;
151        walk_tree_integrity(&store, [root_hash], &mut |event| {
152            if let TreeIntegrityEvent::EnterTree { .. } = event {
153                enter_count += 1;
154            }
155            Ok(())
156        })
157        .unwrap();
158
159        assert_eq!(enter_count, 1);
160    }
161}