git2_ext/
tree.rs

1//! Lower-level Tree operations
2
3use std::collections::HashMap;
4use std::collections::HashSet;
5use std::iter::FromIterator;
6
7use itertools::Itertools;
8
9/// This function is a hot code path. Do not annotate with `#[instrument]`, and
10/// be mindful of performance/memory allocations.
11fn get_changed_paths_between_trees_internal(
12    repo: &git2::Repository,
13    acc: &mut Vec<Vec<std::path::PathBuf>>,
14    current_path: &[std::path::PathBuf],
15    lhs: Option<&git2::Tree<'_>>,
16    rhs: Option<&git2::Tree<'_>>,
17) -> Result<(), git2::Error> {
18    let lhs_entries = lhs
19        .map(|tree| tree.iter().collect_vec())
20        .unwrap_or_default();
21    let lhs_entries: HashMap<&[u8], &git2::TreeEntry<'_>> = lhs_entries
22        .iter()
23        .map(|entry| (entry.name_bytes(), entry))
24        .collect();
25
26    let rhs_entries = rhs
27        .map(|tree| tree.iter().collect_vec())
28        .unwrap_or_default();
29    let rhs_entries: HashMap<&[u8], &git2::TreeEntry<'_>> = rhs_entries
30        .iter()
31        .map(|entry| (entry.name_bytes(), entry))
32        .collect();
33
34    let all_entry_names: HashSet<&[u8]> = lhs_entries
35        .keys()
36        .chain(rhs_entries.keys())
37        .cloned()
38        .collect();
39    let entries: HashMap<&[u8], (Option<&git2::TreeEntry<'_>>, Option<&git2::TreeEntry<'_>>)> =
40        all_entry_names
41            .into_iter()
42            .map(|entry_name| {
43                (
44                    entry_name,
45                    (
46                        lhs_entries.get(entry_name).copied(),
47                        rhs_entries.get(entry_name).copied(),
48                    ),
49                )
50            })
51            .collect();
52
53    for (entry_name, (lhs_entry, rhs_entry)) in entries {
54        enum ClassifiedEntry {
55            Absent,
56            NotATree(git2::Oid, i32),
57            Tree(git2::Oid, i32),
58        }
59
60        fn classify_entry(
61            entry: Option<&git2::TreeEntry<'_>>,
62        ) -> Result<ClassifiedEntry, git2::Error> {
63            let entry = match entry {
64                Some(entry) => entry,
65                None => return Ok(ClassifiedEntry::Absent),
66            };
67
68            let file_mode = entry.filemode_raw();
69            match entry.kind() {
70                Some(git2::ObjectType::Tree) => Ok(ClassifiedEntry::Tree(entry.id(), file_mode)),
71                _ => Ok(ClassifiedEntry::NotATree(entry.id(), file_mode)),
72            }
73        }
74
75        let get_tree = |oid| repo.find_tree(oid);
76
77        let full_entry_path = || -> Vec<std::path::PathBuf> {
78            let entry_path = crate::bytes::bytes2path(entry_name);
79            let mut full_entry_path = current_path.to_vec();
80            full_entry_path.push(entry_path.to_owned());
81            full_entry_path
82        };
83        match (classify_entry(lhs_entry)?, classify_entry(rhs_entry)?) {
84            (ClassifiedEntry::Absent, ClassifiedEntry::Absent) => {
85                // Shouldn't happen, but there's no issue here.
86            }
87
88            (
89                ClassifiedEntry::NotATree(lhs_oid, lhs_file_mode),
90                ClassifiedEntry::NotATree(rhs_oid, rhs_file_mode),
91            ) => {
92                if lhs_oid == rhs_oid && lhs_file_mode == rhs_file_mode {
93                    // Unchanged file, do nothing.
94                } else {
95                    // Changed file.
96                    acc.push(full_entry_path());
97                }
98            }
99
100            (ClassifiedEntry::Absent, ClassifiedEntry::NotATree(_, _))
101            | (ClassifiedEntry::NotATree(_, _), ClassifiedEntry::Absent) => {
102                // Added, removed, or changed file.
103                acc.push(full_entry_path());
104            }
105
106            (ClassifiedEntry::Absent, ClassifiedEntry::Tree(tree_oid, _))
107            | (ClassifiedEntry::Tree(tree_oid, _), ClassifiedEntry::Absent) => {
108                // A directory was added or removed. Add all entries from that
109                // directory.
110                let full_entry_path = full_entry_path();
111                let tree = get_tree(tree_oid)?;
112                get_changed_paths_between_trees_internal(
113                    repo,
114                    acc,
115                    &full_entry_path,
116                    Some(&tree),
117                    None,
118                )?;
119            }
120
121            (ClassifiedEntry::NotATree(_, _), ClassifiedEntry::Tree(tree_oid, _))
122            | (ClassifiedEntry::Tree(tree_oid, _), ClassifiedEntry::NotATree(_, _)) => {
123                // A file was changed into a directory. Add both the file and
124                // all subdirectory entries as changed entries.
125                let full_entry_path = full_entry_path();
126                let tree = get_tree(tree_oid)?;
127                get_changed_paths_between_trees_internal(
128                    repo,
129                    acc,
130                    &full_entry_path,
131                    Some(&tree),
132                    None,
133                )?;
134                acc.push(full_entry_path);
135            }
136
137            (
138                ClassifiedEntry::Tree(lhs_tree_oid, lhs_file_mode),
139                ClassifiedEntry::Tree(rhs_tree_oid, rhs_file_mode),
140            ) => {
141                match (
142                    (lhs_tree_oid == rhs_tree_oid),
143                    // Note that there should only be one possible file mode for
144                    // an entry which points to a tree, but it's possible that
145                    // some extra non-meaningful bits are set. Should we report
146                    // a change in that case? This code takes the conservative
147                    // approach and reports a change.
148                    (lhs_file_mode == rhs_file_mode),
149                ) {
150                    (true, true) => {
151                        // Unchanged entry, do nothing.
152                    }
153
154                    (true, false) => {
155                        // Only the directory changed, but none of its contents.
156                        acc.push(full_entry_path());
157                    }
158
159                    (false, true) => {
160                        let lhs_tree = get_tree(lhs_tree_oid)?;
161                        let rhs_tree = get_tree(rhs_tree_oid)?;
162
163                        // Only include the files changed in the subtrees, and
164                        // not the directory itself.
165                        get_changed_paths_between_trees_internal(
166                            repo,
167                            acc,
168                            &full_entry_path(),
169                            Some(&lhs_tree),
170                            Some(&rhs_tree),
171                        )?;
172                    }
173
174                    (false, false) => {
175                        let lhs_tree = get_tree(lhs_tree_oid)?;
176                        let rhs_tree = get_tree(rhs_tree_oid)?;
177                        let full_entry_path = full_entry_path();
178
179                        get_changed_paths_between_trees_internal(
180                            repo,
181                            acc,
182                            &full_entry_path,
183                            Some(&lhs_tree),
184                            Some(&rhs_tree),
185                        )?;
186                        acc.push(full_entry_path);
187                    }
188                }
189            }
190        }
191    }
192
193    Ok(())
194}
195
196pub fn get_changed_paths_between_trees(
197    repo: &git2::Repository,
198    lhs: Option<&git2::Tree<'_>>,
199    rhs: Option<&git2::Tree<'_>>,
200) -> Result<HashSet<std::path::PathBuf>, git2::Error> {
201    let mut acc = Vec::new();
202    get_changed_paths_between_trees_internal(repo, &mut acc, &Vec::new(), lhs, rhs)?;
203    let changed_paths: HashSet<_> = acc.into_iter().map(std::path::PathBuf::from_iter).collect();
204    Ok(changed_paths)
205}
206
207/// Add the provided entries into the tree.
208///
209/// If the provided `Tree` is `None`, then this function adds the entries to the
210/// empty tree.
211///
212/// The paths for the provided entries can contain slashes.
213///
214/// If the value for an entry is `None`, then that element in the tree is
215/// removed. If a directory ever becomes empty, then it's removed from its
216/// parent directory.
217///
218/// If a path for a given entry is already present in the provided tree, then
219/// that entry is overwritten.
220///
221/// If a path refers to intermediate directories that don't exist in the
222/// provided tree, then those intermediate directories are created.
223pub fn rebuild_tree<'r>(
224    repo: &'r git2::Repository,
225    tree: Option<&git2::Tree<'r>>,
226    entries: HashMap<std::path::PathBuf, Option<(git2::Oid, i32)>>,
227) -> Result<git2::Oid, git2::Error> {
228    let (file_entries, dir_entries) = {
229        let mut file_entries: HashMap<std::path::PathBuf, Option<(git2::Oid, i32)>> =
230            HashMap::new();
231        let mut dir_entries: HashMap<
232            std::path::PathBuf,
233            HashMap<std::path::PathBuf, Option<(git2::Oid, i32)>>,
234        > = HashMap::new();
235        for (path, value) in entries {
236            match path.components().collect_vec().as_slice() {
237                [] => {
238                    log::trace!("Empty path when hydrating tree");
239                }
240                [file_name] => {
241                    file_entries.insert(file_name.into(), value);
242                }
243                components => {
244                    let first: std::path::PathBuf = [components[0]].iter().collect();
245                    let rest: std::path::PathBuf = components[1..].iter().collect();
246                    dir_entries.entry(first).or_default().insert(rest, value);
247                }
248            }
249        }
250        (file_entries, dir_entries)
251    };
252
253    let mut builder = repo.treebuilder(tree)?;
254    for (file_name, file_value) in file_entries {
255        match file_value {
256            Some((oid, file_mode)) => {
257                builder.insert(&file_name, oid, file_mode)?;
258            }
259            None => {
260                remove_entry_if_exists(&mut builder, &file_name)?;
261            }
262        }
263    }
264
265    for (dir_name, dir_value) in dir_entries {
266        let existing_dir_entry: Option<git2::Tree<'_>> = match builder.get(&dir_name)? {
267            Some(existing_dir_entry)
268                if !existing_dir_entry.id().is_zero()
269                    && existing_dir_entry.kind() == Some(git2::ObjectType::Tree) =>
270            {
271                Some(repo.find_tree(existing_dir_entry.id())?)
272            }
273            _ => None,
274        };
275        let new_entry_oid = rebuild_tree(repo, existing_dir_entry.as_ref(), dir_value)?;
276
277        let new_entry_tree = repo.find_tree(new_entry_oid)?;
278        if new_entry_tree.is_empty() {
279            remove_entry_if_exists(&mut builder, &dir_name)?;
280        } else {
281            builder.insert(&dir_name, new_entry_oid, git2::FileMode::Tree.into())?;
282        }
283    }
284
285    let tree_oid = builder.write()?;
286    Ok(tree_oid)
287}
288
289/// `libgit2` raises an error if the entry isn't present, but that's often not
290/// an error condition here. We may be referring to a created or deleted path,
291/// which wouldn't exist in one of the pre-/post-patch trees.
292fn remove_entry_if_exists(
293    builder: &mut git2::TreeBuilder<'_>,
294    name: &std::path::Path,
295) -> Result<(), git2::Error> {
296    if builder.get(name)?.is_some() {
297        builder.remove(name)?;
298    }
299    Ok(())
300}
301
302/// Filter the entries in the provided tree by only keeping the provided paths.
303///
304/// If a provided path does not appear in the tree at all, then it's ignored.
305pub fn filter_tree<'r>(
306    repo: &'r git2::Repository,
307    tree: &git2::Tree<'r>,
308    paths: &[&std::path::Path],
309) -> Result<git2::Oid, git2::Error> {
310    let entries: HashMap<std::path::PathBuf, Option<(git2::Oid, i32)>> = paths
311        .iter()
312        .map(|path| -> Result<(std::path::PathBuf, _), git2::Error> {
313            let key = path.to_path_buf();
314            match tree.get_path(path) {
315                Ok(tree_entry) => {
316                    let value = Some((tree_entry.id(), tree_entry.filemode()));
317                    Ok((key, value))
318                }
319                Err(err) if err.code() == git2::ErrorCode::NotFound => Ok((key, None)),
320                Err(err) => Err(err),
321            }
322        })
323        .try_collect()?;
324
325    rebuild_tree(repo, None, entries)
326}
327
328#[cfg(test)]
329mod tests {
330    use snapbox::assert_data_eq;
331    use snapbox::prelude::*;
332    use snapbox::str;
333
334    use super::*;
335
336    use crate::testing::make_git;
337
338    fn dump_tree_entries(tree: &git2::Tree<'_>) -> String {
339        #[allow(clippy::format_collect)]
340        tree.iter()
341            .map(|entry| format!("{:?} {:?}\n", entry.name().unwrap(), entry.id()))
342            .collect()
343    }
344
345    #[test]
346    fn test_rebuild_tree() -> eyre::Result<()> {
347        let git = make_git()?;
348
349        git.init_repo()?;
350
351        git.write_file("foo", "foo")?;
352        git.write_file("bar/bar", "bar")?;
353        git.write_file("bar/baz", "qux")?;
354        git.write_file("xyzzy", "xyzzy")?;
355        git.run(&["add", "."])?;
356        git.run(&["commit", "-m", "commit"])?;
357
358        let repo = git.get_repo()?;
359        let head_oid = repo.head()?.target().unwrap();
360        let head_commit = repo.find_commit(head_oid)?;
361        let head_tree = head_commit.tree()?;
362
363        assert_data_eq!(
364            dump_tree_entries(&head_tree),
365            str![[r#"
366"bar" 778e23a1e80b1feb10e00b15b29a33315929c5b5
367"foo.txt" 19102815663d23f8b75a47e7a01965dcdc96468c
368"initial.txt" 63af22885f8665a312ba8b83db722134f1f8290d
369"xyzzy.txt" 7c465afc533f95ff7d2c91e18921f94aac8292fc
370
371"#]]
372        );
373
374        {
375            let hydrated_tree = {
376                let hydrated_tree_oid = rebuild_tree(&repo, Some(&head_tree), {
377                    let mut result = HashMap::new();
378                    result.insert(
379                        std::path::PathBuf::from("foo-copy.txt"),
380                        Some((
381                            head_tree
382                                .get_path(&std::path::PathBuf::from("foo.txt"))?
383                                .id(),
384                            0o100644,
385                        )),
386                    );
387                    result.insert(std::path::PathBuf::from("foo.txt"), None);
388                    result
389                })?;
390                repo.find_tree(hydrated_tree_oid)?
391            };
392            assert_data_eq!(
393                dump_tree_entries(&hydrated_tree),
394                str![[r#"
395"bar" 778e23a1e80b1feb10e00b15b29a33315929c5b5
396"foo-copy.txt" 19102815663d23f8b75a47e7a01965dcdc96468c
397"initial.txt" 63af22885f8665a312ba8b83db722134f1f8290d
398"xyzzy.txt" 7c465afc533f95ff7d2c91e18921f94aac8292fc
399
400"#]]
401            );
402        }
403
404        {
405            let hydrated_tree = {
406                let hydrated_tree_oid = rebuild_tree(&repo, Some(&head_tree), {
407                    let mut result = HashMap::new();
408                    result.insert(std::path::PathBuf::from("bar/bar.txt"), None);
409                    result
410                })?;
411                repo.find_tree(hydrated_tree_oid)?
412            };
413            assert_data_eq!(
414                dump_tree_entries(&hydrated_tree),
415                str![[r#"
416"bar" 08ee88e1c53fbd01ab76f136a4f2c9d759b981d0
417"foo.txt" 19102815663d23f8b75a47e7a01965dcdc96468c
418"initial.txt" 63af22885f8665a312ba8b83db722134f1f8290d
419"xyzzy.txt" 7c465afc533f95ff7d2c91e18921f94aac8292fc
420
421"#]]
422            );
423        }
424
425        {
426            let hydrated_tree = {
427                let hydrated_tree_oid = rebuild_tree(&repo, Some(&head_tree), {
428                    let mut result = HashMap::new();
429                    result.insert(std::path::PathBuf::from("bar/bar.txt"), None);
430                    result.insert(std::path::PathBuf::from("bar/baz.txt"), None);
431                    result
432                })?;
433                repo.find_tree(hydrated_tree_oid)?
434            };
435            assert_data_eq!(
436                dump_tree_entries(&hydrated_tree),
437                str![[r#"
438"foo.txt" 19102815663d23f8b75a47e7a01965dcdc96468c
439"initial.txt" 63af22885f8665a312ba8b83db722134f1f8290d
440"xyzzy.txt" 7c465afc533f95ff7d2c91e18921f94aac8292fc
441
442"#]]
443            );
444        }
445
446        {
447            let dehydrated_tree_oid = filter_tree(
448                &repo,
449                &head_tree,
450                &[
451                    std::path::Path::new("bar/baz.txt"),
452                    std::path::Path::new("foo.txt"),
453                ],
454            )?;
455            let dehydrated_tree = repo.find_tree(dehydrated_tree_oid)?;
456            assert_data_eq!(
457                dump_tree_entries(&dehydrated_tree),
458                str![[r#"
459"bar" 08ee88e1c53fbd01ab76f136a4f2c9d759b981d0
460"foo.txt" 19102815663d23f8b75a47e7a01965dcdc96468c
461
462"#]]
463            );
464        }
465
466        Ok(())
467    }
468
469    #[test]
470    fn test_detect_path_only_changed_file_mode() -> eyre::Result<()> {
471        let git = make_git()?;
472        git.init_repo()?;
473
474        git.run(&["update-index", "--chmod=+x", "initial.txt"])?;
475        git.run(&["commit", "-m", "update file mode"])?;
476
477        let repo = git.get_repo()?;
478        let oid = repo.head()?.target().unwrap();
479        let commit = repo.find_commit(oid)?;
480
481        let lhs = commit.parent(0).unwrap();
482        let lhs_tree = lhs.tree()?;
483        let rhs_tree = commit.tree()?;
484        let changed_paths =
485            get_changed_paths_between_trees(&repo, Some(&lhs_tree), Some(&rhs_tree))?;
486        let mut changed_paths = changed_paths
487            .into_iter()
488            .map(|p| p.display().to_string())
489            .collect::<Vec<_>>();
490        changed_paths.sort();
491
492        assert_data_eq!(
493            changed_paths.to_debug(),
494            str![[r#"
495[
496    "initial.txt",
497]
498
499"#]]
500        );
501
502        Ok(())
503    }
504}