Skip to main content

branchless/git/
tree.rs

1use std::collections::{HashMap, HashSet};
2use std::fmt::Debug;
3use std::path::{Path, PathBuf};
4
5use bstr::ByteVec;
6use git2::build::TreeUpdateBuilder;
7use itertools::Itertools;
8use thiserror::Error;
9use tracing::{instrument, warn};
10
11use super::oid::make_non_zero_oid;
12use super::status::FileMode;
13use super::{MaybeZeroOid, NonZeroOid, Repo, repo};
14
15#[derive(Debug, Error)]
16pub enum Error {
17    #[error("could not decode tree entry name: {0}")]
18    DecodeTreeEntryName(#[source] bstr::FromUtf8Error),
19
20    #[error(
21        "Tree entry was said to be an object of kind tree, but it could not be looked up: {oid}"
22    )]
23    NotATree { oid: NonZeroOid },
24
25    #[error("could not parse OID: {0}")]
26    ParseOid(#[source] eyre::Error),
27
28    #[error(transparent)]
29    FindTree(Box<repo::Error>),
30
31    #[error("could not find just-hydrated tree: {0}")]
32    FindHydratedTree(NonZeroOid),
33
34    #[error("could not read tree from path {path}: {source}")]
35    ReadTreeEntry { source: git2::Error, path: PathBuf },
36
37    #[error("could not construct tree builder: {0}")]
38    CreateTreeBuilder(#[source] git2::Error),
39
40    #[error("could not insert object {oid} with mode {file_mode:?} into tree builder: {source}")]
41    InsertTreeBuilderEntry {
42        source: git2::Error,
43        oid: NonZeroOid,
44        file_mode: FileMode,
45    },
46
47    #[error("could not read object at path {path} from tree builder: {source}")]
48    ReadTreeBuilderEntry { source: git2::Error, path: PathBuf },
49
50    #[error("could not delete object at path {path} from tree builder: {source}")]
51    DeleteTreeBuilderEntry { source: git2::Error, path: PathBuf },
52
53    #[error("could not build tree: {0}")]
54    BuildTree(#[source] git2::Error),
55}
56
57pub type Result<T> = std::result::Result<T, Error>;
58
59pub struct TreeEntry<'repo> {
60    pub(super) inner: git2::TreeEntry<'repo>,
61}
62
63impl TreeEntry<'_> {
64    /// Get the object ID for this tree entry.
65    pub fn get_oid(&self) -> NonZeroOid {
66        make_non_zero_oid(self.inner.id())
67    }
68
69    /// Get the object filemode for this tree entry.
70    pub fn get_filemode(&self) -> FileMode {
71        FileMode::from(self.inner.filemode())
72    }
73}
74
75/// A tree object. Contains a mapping from name to OID.
76#[derive(Debug)]
77pub struct Tree<'repo> {
78    pub(super) inner: git2::Tree<'repo>,
79}
80
81impl Tree<'_> {
82    /// Get the object ID for this tree.
83    pub fn get_oid(&self) -> NonZeroOid {
84        make_non_zero_oid(self.inner.id())
85    }
86
87    /// Determine whether this tree is empty (i.e. contains no entries).
88    ///
89    /// This doesn't happen in typical practice, since the index can't represent
90    /// empty directories (trees). However, this can happen when operating on
91    /// trees directly.
92    pub fn is_empty(&self) -> bool {
93        self.inner.is_empty()
94    }
95
96    /// Get the tree entry for the the given path.
97    ///
98    /// Note that the path isn't just restricted to entries of the current tree,
99    /// i.e. you can use slashes in the provided path.
100    pub fn get_path(&self, path: &Path) -> Result<Option<TreeEntry<'_>>> {
101        match self.inner.get_path(path) {
102            Ok(entry) => Ok(Some(TreeEntry { inner: entry })),
103            Err(err) if err.code() == git2::ErrorCode::NotFound => Ok(None),
104            Err(err) => Err(Error::ReadTreeEntry {
105                source: err,
106                path: path.to_owned(),
107            }),
108        }
109    }
110
111    /// Get the OID for the entry with the given path.
112    ///
113    /// Note that the path isn't just restricted to entries of the current tree,
114    /// i.e. you can use slashes in the provided path.
115    pub fn get_oid_for_path(&self, path: &Path) -> Result<Option<MaybeZeroOid>> {
116        self.get_path(path)
117            .map(|maybe_entry| maybe_entry.map(|entry| entry.inner.id().into()))
118    }
119
120    /// Remove the given path from the Tree, creating a new Tree in the given repo.
121    pub fn remove(&self, repo: &Repo, path: &Path) -> Result<NonZeroOid> {
122        let mut builder = TreeUpdateBuilder::new();
123        let tree_oid = builder
124            .remove(path)
125            .create_updated(&repo.inner, &self.inner)
126            .map_err(Error::BuildTree)?;
127        Ok(make_non_zero_oid(tree_oid))
128    }
129
130    /// Add or replace the given path/entry from the Tree, creating a new Tree in the given repo.
131    pub fn add_or_replace(
132        &self,
133        repo: &Repo,
134        path: &Path,
135        entry: &TreeEntry,
136    ) -> Result<NonZeroOid> {
137        let mut builder = TreeUpdateBuilder::new();
138        let tree_oid = builder
139            .upsert(path, entry.get_oid().into(), entry.get_filemode().into())
140            .create_updated(&repo.inner, &self.inner)
141            .map_err(Error::BuildTree)?;
142        Ok(make_non_zero_oid(tree_oid))
143    }
144
145    /// Get the (top-level) list of paths in this tree, for testing.
146    pub fn get_entry_paths_for_testing(&self) -> impl Debug {
147        self.inner
148            .iter()
149            .map(|entry| entry.name().unwrap().to_string())
150            .collect_vec()
151    }
152
153    /// Get the (top-level) list of paths and OIDs in this tree, for testing.
154    pub fn get_entries_for_testing(&self) -> impl Debug {
155        self.inner
156            .iter()
157            .map(|entry| (entry.name().unwrap().to_string(), entry.id().to_string()))
158            .collect_vec()
159    }
160}
161
162/// This function is a hot code path. Do not annotate with `#[instrument]`, and
163/// be mindful of performance/memory allocations.
164fn get_changed_paths_between_trees_internal(
165    repo: &Repo,
166    acc: &mut Vec<Vec<PathBuf>>,
167    current_path: &[PathBuf],
168    lhs: Option<&git2::Tree>,
169    rhs: Option<&git2::Tree>,
170) -> Result<()> {
171    let lhs_entries = lhs
172        .map(|tree| tree.iter().collect_vec())
173        .unwrap_or_default();
174    let lhs_entries: HashMap<&[u8], &git2::TreeEntry> = lhs_entries
175        .iter()
176        .map(|entry| (entry.name_bytes(), entry))
177        .collect();
178
179    let rhs_entries = rhs
180        .map(|tree| tree.iter().collect_vec())
181        .unwrap_or_default();
182    let rhs_entries: HashMap<&[u8], &git2::TreeEntry> = rhs_entries
183        .iter()
184        .map(|entry| (entry.name_bytes(), entry))
185        .collect();
186
187    let all_entry_names: HashSet<&[u8]> = lhs_entries
188        .keys()
189        .chain(rhs_entries.keys())
190        .cloned()
191        .collect();
192    let entries: HashMap<&[u8], (Option<&git2::TreeEntry>, Option<&git2::TreeEntry>)> =
193        all_entry_names
194            .into_iter()
195            .map(|entry_name| {
196                (
197                    entry_name,
198                    (
199                        lhs_entries.get(entry_name).copied(),
200                        rhs_entries.get(entry_name).copied(),
201                    ),
202                )
203            })
204            .collect();
205
206    for (entry_name, (lhs_entry, rhs_entry)) in entries {
207        enum ClassifiedEntry {
208            Absent,
209            NotATree(git2::Oid, i32),
210            Tree(git2::Oid, i32),
211        }
212
213        fn classify_entry(entry: Option<&git2::TreeEntry>) -> Result<ClassifiedEntry> {
214            let entry = match entry {
215                Some(entry) => entry,
216                None => return Ok(ClassifiedEntry::Absent),
217            };
218
219            let file_mode = entry.filemode_raw();
220            match entry.kind() {
221                Some(git2::ObjectType::Tree) => Ok(ClassifiedEntry::Tree(entry.id(), file_mode)),
222                _ => Ok(ClassifiedEntry::NotATree(entry.id(), file_mode)),
223            }
224        }
225
226        let get_tree = |oid: git2::Oid| -> Result<Tree> {
227            let entry_oid = MaybeZeroOid::from(oid);
228            let entry_oid = NonZeroOid::try_from(entry_oid).map_err(Error::ParseOid)?;
229            let entry_tree = repo
230                .find_tree(entry_oid)
231                .map_err(Box::new)
232                .map_err(Error::FindTree)?;
233            entry_tree.ok_or(Error::NotATree { oid: entry_oid })
234        };
235
236        let full_entry_path = || -> Result<Vec<PathBuf>> {
237            let mut full_entry_path = current_path.to_vec();
238            let entry_name = entry_name
239                .to_vec()
240                .into_path_buf()
241                .map_err(Error::DecodeTreeEntryName)?;
242            full_entry_path.push(entry_name);
243            Ok(full_entry_path)
244        };
245        match (classify_entry(lhs_entry)?, classify_entry(rhs_entry)?) {
246            (ClassifiedEntry::Absent, ClassifiedEntry::Absent) => {
247                // Shouldn't happen, but there's no issue here.
248            }
249
250            (
251                ClassifiedEntry::NotATree(lhs_oid, lhs_file_mode),
252                ClassifiedEntry::NotATree(rhs_oid, rhs_file_mode),
253            ) => {
254                if lhs_oid == rhs_oid && lhs_file_mode == rhs_file_mode {
255                    // Unchanged file, do nothing.
256                } else {
257                    // Changed file.
258                    acc.push(full_entry_path()?);
259                }
260            }
261
262            (ClassifiedEntry::Absent, ClassifiedEntry::NotATree(_, _))
263            | (ClassifiedEntry::NotATree(_, _), ClassifiedEntry::Absent) => {
264                // Added, removed, or changed file.
265                acc.push(full_entry_path()?);
266            }
267
268            (ClassifiedEntry::Absent, ClassifiedEntry::Tree(tree_oid, _))
269            | (ClassifiedEntry::Tree(tree_oid, _), ClassifiedEntry::Absent) => {
270                // A directory was added or removed. Add all entries from that
271                // directory.
272                let full_entry_path = full_entry_path()?;
273                let tree = get_tree(tree_oid)?;
274                get_changed_paths_between_trees_internal(
275                    repo,
276                    acc,
277                    &full_entry_path,
278                    Some(&tree.inner),
279                    None,
280                )?;
281            }
282
283            (ClassifiedEntry::NotATree(_, _), ClassifiedEntry::Tree(tree_oid, _))
284            | (ClassifiedEntry::Tree(tree_oid, _), ClassifiedEntry::NotATree(_, _)) => {
285                // A file was changed into a directory. Add both the file and
286                // all subdirectory entries as changed entries.
287                let full_entry_path = full_entry_path()?;
288                let tree = get_tree(tree_oid)?;
289                get_changed_paths_between_trees_internal(
290                    repo,
291                    acc,
292                    &full_entry_path,
293                    Some(&tree.inner),
294                    None,
295                )?;
296                acc.push(full_entry_path);
297            }
298
299            (
300                ClassifiedEntry::Tree(lhs_tree_oid, lhs_file_mode),
301                ClassifiedEntry::Tree(rhs_tree_oid, rhs_file_mode),
302            ) => {
303                match (
304                    (lhs_tree_oid == rhs_tree_oid),
305                    // Note that there should only be one possible file mode for
306                    // an entry which points to a tree, but it's possible that
307                    // some extra non-meaningful bits are set. Should we report
308                    // a change in that case? This code takes the conservative
309                    // approach and reports a change.
310                    (lhs_file_mode == rhs_file_mode),
311                ) {
312                    (true, true) => {
313                        // Unchanged entry, do nothing.
314                    }
315
316                    (true, false) => {
317                        // Only the directory changed, but none of its contents.
318                        acc.push(full_entry_path()?);
319                    }
320
321                    (false, true) => {
322                        let lhs_tree = get_tree(lhs_tree_oid)?;
323                        let rhs_tree = get_tree(rhs_tree_oid)?;
324
325                        // Only include the files changed in the subtrees, and
326                        // not the directory itself.
327                        get_changed_paths_between_trees_internal(
328                            repo,
329                            acc,
330                            &full_entry_path()?,
331                            Some(&lhs_tree.inner),
332                            Some(&rhs_tree.inner),
333                        )?;
334                    }
335
336                    (false, false) => {
337                        let lhs_tree = get_tree(lhs_tree_oid)?;
338                        let rhs_tree = get_tree(rhs_tree_oid)?;
339                        let full_entry_path = full_entry_path()?;
340
341                        get_changed_paths_between_trees_internal(
342                            repo,
343                            acc,
344                            &full_entry_path,
345                            Some(&lhs_tree.inner),
346                            Some(&rhs_tree.inner),
347                        )?;
348                        acc.push(full_entry_path);
349                    }
350                }
351            }
352        }
353    }
354
355    Ok(())
356}
357
358/// Get the paths which are different between two tree objects. This is faster
359/// than the `git2` implementation, which always iterates all tree entries in
360/// all tree objects recursively.
361#[instrument]
362pub fn get_changed_paths_between_trees(
363    repo: &Repo,
364    lhs: Option<&Tree>,
365    rhs: Option<&Tree>,
366) -> Result<HashSet<PathBuf>> {
367    let mut acc = Vec::new();
368    get_changed_paths_between_trees_internal(
369        repo,
370        &mut acc,
371        &Vec::new(),
372        lhs.map(|tree| &tree.inner),
373        rhs.map(|tree| &tree.inner),
374    )?;
375    let changed_paths: HashSet<PathBuf> = acc.into_iter().map(PathBuf::from_iter).collect();
376    Ok(changed_paths)
377}
378
379/// Add the provided entries into the tree.
380///
381/// If the provided `Tree` is `None`, then this function adds the entries to the
382/// empty tree.
383///
384/// The paths for the provided entries can contain slashes.
385///
386/// If the value for an entry is `None`, then that element in the tree is
387/// removed. If a directory ever becomes empty, then it's removed from its
388/// parent directory.
389///
390/// If a path for a given entry is already present in the provided tree, then
391/// that entry is overwritten.
392///
393/// If a path refers to intermediate directories that don't exist in the
394/// provided tree, then those intermediate directories are created.
395#[instrument]
396pub fn hydrate_tree(
397    repo: &Repo,
398    tree: Option<&Tree>,
399    entries: HashMap<PathBuf, Option<(NonZeroOid, FileMode)>>,
400) -> Result<NonZeroOid> {
401    let (file_entries, dir_entries) = {
402        let mut file_entries: HashMap<PathBuf, Option<(NonZeroOid, FileMode)>> = HashMap::new();
403        let mut dir_entries: HashMap<PathBuf, HashMap<PathBuf, Option<(NonZeroOid, FileMode)>>> =
404            HashMap::new();
405        for (path, value) in entries {
406            match path.components().collect_vec().as_slice() {
407                [] => {
408                    warn!(?tree, ?value, "Empty path when hydrating tree");
409                }
410                [file_name] => {
411                    file_entries.insert(file_name.into(), value);
412                }
413                components => {
414                    let first: PathBuf = [components[0]].iter().collect();
415                    let rest: PathBuf = components[1..].iter().collect();
416                    dir_entries.entry(first).or_default().insert(rest, value);
417                }
418            }
419        }
420        (file_entries, dir_entries)
421    };
422
423    let tree = tree.map(|tree| &tree.inner);
424    let mut builder = repo
425        .inner
426        .treebuilder(tree)
427        .map_err(Error::CreateTreeBuilder)?;
428    for (file_name, file_value) in file_entries {
429        match file_value {
430            Some((oid, file_mode)) => {
431                builder
432                    .insert(&file_name, oid.inner, file_mode.into())
433                    .map_err(|err| Error::InsertTreeBuilderEntry {
434                        source: err,
435                        oid,
436                        file_mode,
437                    })?;
438            }
439            None => remove_entry_if_exists(&mut builder, &file_name)?,
440        }
441    }
442
443    for (dir_name, dir_value) in dir_entries {
444        let existing_dir_entry: Option<Tree> =
445            match builder
446                .get(&dir_name)
447                .map_err(|err| Error::ReadTreeBuilderEntry {
448                    source: err,
449                    path: dir_name.to_owned(),
450                })? {
451                Some(existing_dir_entry)
452                    if !existing_dir_entry.id().is_zero()
453                        && existing_dir_entry.kind() == Some(git2::ObjectType::Tree) =>
454                {
455                    repo.find_tree(make_non_zero_oid(existing_dir_entry.id()))
456                        .map_err(Box::new)
457                        .map_err(Error::FindTree)?
458                }
459                _ => None,
460            };
461        let new_entry_oid = hydrate_tree(repo, existing_dir_entry.as_ref(), dir_value)?;
462
463        let new_entry_tree = repo
464            .find_tree(new_entry_oid)
465            .map_err(Box::new)
466            .map_err(Error::FindTree)?
467            .ok_or(Error::FindHydratedTree(new_entry_oid))?;
468        if new_entry_tree.is_empty() {
469            remove_entry_if_exists(&mut builder, &dir_name)?;
470        } else {
471            builder
472                .insert(&dir_name, new_entry_oid.inner, git2::FileMode::Tree.into())
473                .map_err(|err| Error::InsertTreeBuilderEntry {
474                    source: err,
475                    oid: new_entry_oid,
476                    file_mode: FileMode::Tree,
477                })?;
478        }
479    }
480
481    let tree_oid = builder.write().map_err(Error::BuildTree)?;
482    Ok(make_non_zero_oid(tree_oid))
483}
484
485/// Create a new, empty Tree in the given Repo.
486pub fn make_empty_tree(repo: &Repo) -> Result<Tree<'_>> {
487    let tree_oid = hydrate_tree(repo, None, Default::default())?;
488    repo.find_tree_or_fail(tree_oid)
489        .map_err(Box::new)
490        .map_err(Error::FindTree)
491}
492
493/// `libgit2` raises an error if the entry isn't present, but that's often not
494/// an error condition here. We may be referring to a created or deleted path,
495/// which wouldn't exist in one of the pre-/post-patch trees.
496fn remove_entry_if_exists(builder: &mut git2::TreeBuilder, name: &Path) -> Result<()> {
497    if builder
498        .get(name)
499        .map_err(|err| Error::ReadTreeBuilderEntry {
500            source: err,
501            path: name.to_owned(),
502        })?
503        .is_some()
504    {
505        builder
506            .remove(name)
507            .map_err(|err| Error::DeleteTreeBuilderEntry {
508                source: err,
509                path: name.to_owned(),
510            })?;
511    }
512    Ok(())
513}
514
515/// Filter the entries in the provided tree by only keeping the provided paths.
516///
517/// If a provided path does not appear in the tree at all, then it's ignored.
518#[instrument]
519pub fn dehydrate_tree(repo: &Repo, tree: &Tree, paths: &[&Path]) -> Result<NonZeroOid> {
520    let entries: HashMap<PathBuf, Option<(NonZeroOid, FileMode)>> = paths
521        .iter()
522        .map(|path| -> Result<(PathBuf, _)> {
523            let key = path.to_path_buf();
524            match tree.inner.get_path(path) {
525                Ok(tree_entry) => {
526                    let value = Some((
527                        make_non_zero_oid(tree_entry.id()),
528                        FileMode::from(tree_entry.filemode()),
529                    ));
530                    Ok((key, value))
531                }
532                Err(err) if err.code() == git2::ErrorCode::NotFound => Ok((key, None)),
533                Err(err) => Err(Error::ReadTreeEntry {
534                    source: err,
535                    path: key,
536                }),
537            }
538        })
539        .try_collect()?;
540
541    hydrate_tree(repo, None, entries)
542}