branchless/git/
tree.rs

1use std::collections::{HashMap, HashSet};
2use std::fmt::Debug;
3use std::path::{Path, PathBuf};
4
5use bstr::ByteVec;
6use itertools::Itertools;
7use thiserror::Error;
8use tracing::{instrument, warn};
9
10use super::oid::make_non_zero_oid;
11use super::status::FileMode;
12use super::{repo, MaybeZeroOid, NonZeroOid, Repo};
13
14#[derive(Debug, Error)]
15pub enum Error {
16    #[error("could not decode tree entry name: {0}")]
17    DecodeTreeEntryName(#[source] bstr::FromUtf8Error),
18
19    #[error(
20        "Tree entry was said to be an object of kind tree, but it could not be looked up: {oid}"
21    )]
22    NotATree { oid: NonZeroOid },
23
24    #[error("could not parse OID: {0}")]
25    ParseOid(#[source] eyre::Error),
26
27    #[error(transparent)]
28    FindTree(Box<repo::Error>),
29
30    #[error("could not find just-hydrated tree: {0}")]
31    FindHydratedTree(NonZeroOid),
32
33    #[error("could not read tree from path {path}: {source}")]
34    ReadTreeEntry { source: git2::Error, path: PathBuf },
35
36    #[error("could not construct tree builder: {0}")]
37    CreateTreeBuilder(#[source] git2::Error),
38
39    #[error("could not insert object {oid} with mode {file_mode:?} into tree builder: {source}")]
40    InsertTreeBuilderEntry {
41        source: git2::Error,
42        oid: NonZeroOid,
43        file_mode: FileMode,
44    },
45
46    #[error("could not read object at path {path} from tree builder: {source}")]
47    ReadTreeBuilderEntry { source: git2::Error, path: PathBuf },
48
49    #[error("could not delete object at path {path} from tree builder: {source}")]
50    DeleteTreeBuilderEntry { source: git2::Error, path: PathBuf },
51
52    #[error("could not build tree: {0}")]
53    BuildTree(#[source] git2::Error),
54}
55
56pub type Result<T> = std::result::Result<T, Error>;
57
58pub struct TreeEntry<'repo> {
59    pub(super) inner: git2::TreeEntry<'repo>,
60}
61
62impl TreeEntry<'_> {
63    /// Get the object ID for this tree entry.
64    pub fn get_oid(&self) -> NonZeroOid {
65        make_non_zero_oid(self.inner.id())
66    }
67
68    /// Get the object filemode for this tree entry.
69    pub fn get_filemode(&self) -> FileMode {
70        FileMode::from(self.inner.filemode())
71    }
72}
73
74/// A tree object. Contains a mapping from name to OID.
75#[derive(Debug)]
76pub struct Tree<'repo> {
77    pub(super) inner: git2::Tree<'repo>,
78}
79
80impl Tree<'_> {
81    /// Get the object ID for this tree.
82    pub fn get_oid(&self) -> NonZeroOid {
83        make_non_zero_oid(self.inner.id())
84    }
85
86    /// Determine whether this tree is empty (i.e. contains no entries).
87    ///
88    /// This doesn't happen in typical practice, since the index can't represent
89    /// empty directories (trees). However, this can happen when operating on
90    /// trees directly.
91    pub fn is_empty(&self) -> bool {
92        self.inner.is_empty()
93    }
94
95    /// Get the tree entry for the the given path.
96    ///
97    /// Note that the path isn't just restricted to entries of the current tree,
98    /// i.e. you can use slashes in the provided path.
99    pub fn get_path(&self, path: &Path) -> Result<Option<TreeEntry>> {
100        match self.inner.get_path(path) {
101            Ok(entry) => Ok(Some(TreeEntry { inner: entry })),
102            Err(err) if err.code() == git2::ErrorCode::NotFound => Ok(None),
103            Err(err) => Err(Error::ReadTreeEntry {
104                source: err,
105                path: path.to_owned(),
106            }),
107        }
108    }
109
110    /// Get the OID for the entry with the given path.
111    ///
112    /// Note that the path isn't just restricted to entries of the current tree,
113    /// i.e. you can use slashes in the provided path.
114    pub fn get_oid_for_path(&self, path: &Path) -> Result<Option<MaybeZeroOid>> {
115        self.get_path(path)
116            .map(|maybe_entry| maybe_entry.map(|entry| entry.inner.id().into()))
117    }
118
119    /// Get the (top-level) list of paths in this tree, for testing.
120    pub fn get_entry_paths_for_testing(&self) -> impl Debug {
121        self.inner
122            .iter()
123            .map(|entry| entry.name().unwrap().to_string())
124            .collect_vec()
125    }
126
127    /// Get the (top-level) list of paths and OIDs in this tree, for testing.
128    pub fn get_entries_for_testing(&self) -> impl Debug {
129        self.inner
130            .iter()
131            .map(|entry| (entry.name().unwrap().to_string(), entry.id().to_string()))
132            .collect_vec()
133    }
134}
135
136/// This function is a hot code path. Do not annotate with `#[instrument]`, and
137/// be mindful of performance/memory allocations.
138fn get_changed_paths_between_trees_internal(
139    repo: &Repo,
140    acc: &mut Vec<Vec<PathBuf>>,
141    current_path: &[PathBuf],
142    lhs: Option<&git2::Tree>,
143    rhs: Option<&git2::Tree>,
144) -> Result<()> {
145    let lhs_entries = lhs
146        .map(|tree| tree.iter().collect_vec())
147        .unwrap_or_default();
148    let lhs_entries: HashMap<&[u8], &git2::TreeEntry> = lhs_entries
149        .iter()
150        .map(|entry| (entry.name_bytes(), entry))
151        .collect();
152
153    let rhs_entries = rhs
154        .map(|tree| tree.iter().collect_vec())
155        .unwrap_or_default();
156    let rhs_entries: HashMap<&[u8], &git2::TreeEntry> = rhs_entries
157        .iter()
158        .map(|entry| (entry.name_bytes(), entry))
159        .collect();
160
161    let all_entry_names: HashSet<&[u8]> = lhs_entries
162        .keys()
163        .chain(rhs_entries.keys())
164        .cloned()
165        .collect();
166    let entries: HashMap<&[u8], (Option<&git2::TreeEntry>, Option<&git2::TreeEntry>)> =
167        all_entry_names
168            .into_iter()
169            .map(|entry_name| {
170                (
171                    entry_name,
172                    (
173                        lhs_entries.get(entry_name).copied(),
174                        rhs_entries.get(entry_name).copied(),
175                    ),
176                )
177            })
178            .collect();
179
180    for (entry_name, (lhs_entry, rhs_entry)) in entries {
181        enum ClassifiedEntry {
182            Absent,
183            NotATree(git2::Oid, i32),
184            Tree(git2::Oid, i32),
185        }
186
187        fn classify_entry(entry: Option<&git2::TreeEntry>) -> Result<ClassifiedEntry> {
188            let entry = match entry {
189                Some(entry) => entry,
190                None => return Ok(ClassifiedEntry::Absent),
191            };
192
193            let file_mode = entry.filemode_raw();
194            match entry.kind() {
195                Some(git2::ObjectType::Tree) => Ok(ClassifiedEntry::Tree(entry.id(), file_mode)),
196                _ => Ok(ClassifiedEntry::NotATree(entry.id(), file_mode)),
197            }
198        }
199
200        let get_tree = |oid: git2::Oid| -> Result<Tree> {
201            let entry_oid = MaybeZeroOid::from(oid);
202            let entry_oid = NonZeroOid::try_from(entry_oid).map_err(Error::ParseOid)?;
203            let entry_tree = repo
204                .find_tree(entry_oid)
205                .map_err(Box::new)
206                .map_err(Error::FindTree)?;
207            entry_tree.ok_or(Error::NotATree { oid: entry_oid })
208        };
209
210        let full_entry_path = || -> Result<Vec<PathBuf>> {
211            let mut full_entry_path = current_path.to_vec();
212            let entry_name = entry_name
213                .to_vec()
214                .into_path_buf()
215                .map_err(Error::DecodeTreeEntryName)?;
216            full_entry_path.push(entry_name);
217            Ok(full_entry_path)
218        };
219        match (classify_entry(lhs_entry)?, classify_entry(rhs_entry)?) {
220            (ClassifiedEntry::Absent, ClassifiedEntry::Absent) => {
221                // Shouldn't happen, but there's no issue here.
222            }
223
224            (
225                ClassifiedEntry::NotATree(lhs_oid, lhs_file_mode),
226                ClassifiedEntry::NotATree(rhs_oid, rhs_file_mode),
227            ) => {
228                if lhs_oid == rhs_oid && lhs_file_mode == rhs_file_mode {
229                    // Unchanged file, do nothing.
230                } else {
231                    // Changed file.
232                    acc.push(full_entry_path()?);
233                }
234            }
235
236            (ClassifiedEntry::Absent, ClassifiedEntry::NotATree(_, _))
237            | (ClassifiedEntry::NotATree(_, _), ClassifiedEntry::Absent) => {
238                // Added, removed, or changed file.
239                acc.push(full_entry_path()?);
240            }
241
242            (ClassifiedEntry::Absent, ClassifiedEntry::Tree(tree_oid, _))
243            | (ClassifiedEntry::Tree(tree_oid, _), ClassifiedEntry::Absent) => {
244                // A directory was added or removed. Add all entries from that
245                // directory.
246                let full_entry_path = full_entry_path()?;
247                let tree = get_tree(tree_oid)?;
248                get_changed_paths_between_trees_internal(
249                    repo,
250                    acc,
251                    &full_entry_path,
252                    Some(&tree.inner),
253                    None,
254                )?;
255            }
256
257            (ClassifiedEntry::NotATree(_, _), ClassifiedEntry::Tree(tree_oid, _))
258            | (ClassifiedEntry::Tree(tree_oid, _), ClassifiedEntry::NotATree(_, _)) => {
259                // A file was changed into a directory. Add both the file and
260                // all subdirectory entries as changed entries.
261                let full_entry_path = full_entry_path()?;
262                let tree = get_tree(tree_oid)?;
263                get_changed_paths_between_trees_internal(
264                    repo,
265                    acc,
266                    &full_entry_path,
267                    Some(&tree.inner),
268                    None,
269                )?;
270                acc.push(full_entry_path);
271            }
272
273            (
274                ClassifiedEntry::Tree(lhs_tree_oid, lhs_file_mode),
275                ClassifiedEntry::Tree(rhs_tree_oid, rhs_file_mode),
276            ) => {
277                match (
278                    (lhs_tree_oid == rhs_tree_oid),
279                    // Note that there should only be one possible file mode for
280                    // an entry which points to a tree, but it's possible that
281                    // some extra non-meaningful bits are set. Should we report
282                    // a change in that case? This code takes the conservative
283                    // approach and reports a change.
284                    (lhs_file_mode == rhs_file_mode),
285                ) {
286                    (true, true) => {
287                        // Unchanged entry, do nothing.
288                    }
289
290                    (true, false) => {
291                        // Only the directory changed, but none of its contents.
292                        acc.push(full_entry_path()?);
293                    }
294
295                    (false, true) => {
296                        let lhs_tree = get_tree(lhs_tree_oid)?;
297                        let rhs_tree = get_tree(rhs_tree_oid)?;
298
299                        // Only include the files changed in the subtrees, and
300                        // not the directory itself.
301                        get_changed_paths_between_trees_internal(
302                            repo,
303                            acc,
304                            &full_entry_path()?,
305                            Some(&lhs_tree.inner),
306                            Some(&rhs_tree.inner),
307                        )?;
308                    }
309
310                    (false, false) => {
311                        let lhs_tree = get_tree(lhs_tree_oid)?;
312                        let rhs_tree = get_tree(rhs_tree_oid)?;
313                        let full_entry_path = full_entry_path()?;
314
315                        get_changed_paths_between_trees_internal(
316                            repo,
317                            acc,
318                            &full_entry_path,
319                            Some(&lhs_tree.inner),
320                            Some(&rhs_tree.inner),
321                        )?;
322                        acc.push(full_entry_path);
323                    }
324                }
325            }
326        }
327    }
328
329    Ok(())
330}
331
332/// Get the paths which are different between two tree objects. This is faster
333/// than the `git2` implementation, which always iterates all tree entries in
334/// all tree objects recursively.
335#[instrument]
336pub fn get_changed_paths_between_trees(
337    repo: &Repo,
338    lhs: Option<&Tree>,
339    rhs: Option<&Tree>,
340) -> Result<HashSet<PathBuf>> {
341    let mut acc = Vec::new();
342    get_changed_paths_between_trees_internal(
343        repo,
344        &mut acc,
345        &Vec::new(),
346        lhs.map(|tree| &tree.inner),
347        rhs.map(|tree| &tree.inner),
348    )?;
349    let changed_paths: HashSet<PathBuf> = acc.into_iter().map(PathBuf::from_iter).collect();
350    Ok(changed_paths)
351}
352
353/// Add the provided entries into the tree.
354///
355/// If the provided `Tree` is `None`, then this function adds the entries to the
356/// empty tree.
357///
358/// The paths for the provided entries can contain slashes.
359///
360/// If the value for an entry is `None`, then that element in the tree is
361/// removed. If a directory ever becomes empty, then it's removed from its
362/// parent directory.
363///
364/// If a path for a given entry is already present in the provided tree, then
365/// that entry is overwritten.
366///
367/// If a path refers to intermediate directories that don't exist in the
368/// provided tree, then those intermediate directories are created.
369#[instrument]
370pub fn hydrate_tree(
371    repo: &Repo,
372    tree: Option<&Tree>,
373    entries: HashMap<PathBuf, Option<(NonZeroOid, FileMode)>>,
374) -> Result<NonZeroOid> {
375    let (file_entries, dir_entries) = {
376        let mut file_entries: HashMap<PathBuf, Option<(NonZeroOid, FileMode)>> = HashMap::new();
377        let mut dir_entries: HashMap<PathBuf, HashMap<PathBuf, Option<(NonZeroOid, FileMode)>>> =
378            HashMap::new();
379        for (path, value) in entries {
380            match path.components().collect_vec().as_slice() {
381                [] => {
382                    warn!(?tree, ?value, "Empty path when hydrating tree");
383                }
384                [file_name] => {
385                    file_entries.insert(file_name.into(), value);
386                }
387                components => {
388                    let first: PathBuf = [components[0]].iter().collect();
389                    let rest: PathBuf = components[1..].iter().collect();
390                    dir_entries.entry(first).or_default().insert(rest, value);
391                }
392            }
393        }
394        (file_entries, dir_entries)
395    };
396
397    let tree = tree.map(|tree| &tree.inner);
398    let mut builder = repo
399        .inner
400        .treebuilder(tree)
401        .map_err(Error::CreateTreeBuilder)?;
402    for (file_name, file_value) in file_entries {
403        match file_value {
404            Some((oid, file_mode)) => {
405                builder
406                    .insert(&file_name, oid.inner, file_mode.into())
407                    .map_err(|err| Error::InsertTreeBuilderEntry {
408                        source: err,
409                        oid,
410                        file_mode,
411                    })?;
412            }
413            None => remove_entry_if_exists(&mut builder, &file_name)?,
414        }
415    }
416
417    for (dir_name, dir_value) in dir_entries {
418        let existing_dir_entry: Option<Tree> =
419            match builder
420                .get(&dir_name)
421                .map_err(|err| Error::ReadTreeBuilderEntry {
422                    source: err,
423                    path: dir_name.to_owned(),
424                })? {
425                Some(existing_dir_entry)
426                    if !existing_dir_entry.id().is_zero()
427                        && existing_dir_entry.kind() == Some(git2::ObjectType::Tree) =>
428                {
429                    repo.find_tree(make_non_zero_oid(existing_dir_entry.id()))
430                        .map_err(Box::new)
431                        .map_err(Error::FindTree)?
432                }
433                _ => None,
434            };
435        let new_entry_oid = hydrate_tree(repo, existing_dir_entry.as_ref(), dir_value)?;
436
437        let new_entry_tree = repo
438            .find_tree(new_entry_oid)
439            .map_err(Box::new)
440            .map_err(Error::FindTree)?
441            .ok_or(Error::FindHydratedTree(new_entry_oid))?;
442        if new_entry_tree.is_empty() {
443            remove_entry_if_exists(&mut builder, &dir_name)?;
444        } else {
445            builder
446                .insert(&dir_name, new_entry_oid.inner, git2::FileMode::Tree.into())
447                .map_err(|err| Error::InsertTreeBuilderEntry {
448                    source: err,
449                    oid: new_entry_oid,
450                    file_mode: FileMode::Tree,
451                })?;
452        }
453    }
454
455    let tree_oid = builder.write().map_err(Error::BuildTree)?;
456    Ok(make_non_zero_oid(tree_oid))
457}
458
459pub fn make_empty_tree(repo: &Repo) -> Result<Tree> {
460    let tree_oid = hydrate_tree(repo, None, Default::default())?;
461    repo.find_tree_or_fail(tree_oid)
462        .map_err(Box::new)
463        .map_err(Error::FindTree)
464}
465
466/// `libgit2` raises an error if the entry isn't present, but that's often not
467/// an error condition here. We may be referring to a created or deleted path,
468/// which wouldn't exist in one of the pre-/post-patch trees.
469fn remove_entry_if_exists(builder: &mut git2::TreeBuilder, name: &Path) -> Result<()> {
470    if builder
471        .get(name)
472        .map_err(|err| Error::ReadTreeBuilderEntry {
473            source: err,
474            path: name.to_owned(),
475        })?
476        .is_some()
477    {
478        builder
479            .remove(name)
480            .map_err(|err| Error::DeleteTreeBuilderEntry {
481                source: err,
482                path: name.to_owned(),
483            })?;
484    }
485    Ok(())
486}
487
488/// Filter the entries in the provided tree by only keeping the provided paths.
489///
490/// If a provided path does not appear in the tree at all, then it's ignored.
491#[instrument]
492pub fn dehydrate_tree(repo: &Repo, tree: &Tree, paths: &[&Path]) -> Result<NonZeroOid> {
493    let entries: HashMap<PathBuf, Option<(NonZeroOid, FileMode)>> = paths
494        .iter()
495        .map(|path| -> Result<(PathBuf, _)> {
496            let key = path.to_path_buf();
497            match tree.inner.get_path(path) {
498                Ok(tree_entry) => {
499                    let value = Some((
500                        make_non_zero_oid(tree_entry.id()),
501                        FileMode::from(tree_entry.filemode()),
502                    ));
503                    Ok((key, value))
504                }
505                Err(err) if err.code() == git2::ErrorCode::NotFound => Ok((key, None)),
506                Err(err) => Err(Error::ReadTreeEntry {
507                    source: err,
508                    path: key,
509                }),
510            }
511        })
512        .try_collect()?;
513
514    hydrate_tree(repo, None, entries)
515}