gix_dir/walk/
mod.rs

1use std::{collections::BTreeSet, path::PathBuf, sync::atomic::AtomicBool};
2
3use bstr::{BStr, BString};
4
5use crate::{entry, EntryRef};
6
7/// A type returned by the [`Delegate::emit()`] as passed to [`walk()`](function::walk()).
8#[derive(Debug, Copy, Clone, Eq, PartialEq)]
9#[must_use]
10pub enum Action {
11    /// Continue the traversal as normal.
12    Continue,
13    /// Do not continue the traversal, but exit it.
14    Cancel,
15}
16
17/// Ready-made delegate implementations.
18pub mod delegate {
19    use crate::{entry, walk, walk::Action, Entry, EntryRef};
20
21    type Entries = Vec<(Entry, Option<entry::Status>)>;
22
23    /// A [`Delegate`](walk::Delegate) implementation that collects all `entries` along with their directory status, if present.
24    ///
25    /// Note that this allocates for each entry.
26    #[derive(Default)]
27    pub struct Collect {
28        /// All collected entries, in any order.
29        pub unorded_entries: Entries,
30    }
31
32    impl Collect {
33        /// Return the list of entries that were emitted, sorted ascending by their repository-relative tree path.
34        pub fn into_entries_by_path(mut self) -> Entries {
35            self.unorded_entries.sort_by(|a, b| a.0.rela_path.cmp(&b.0.rela_path));
36            self.unorded_entries
37        }
38    }
39
40    impl walk::Delegate for Collect {
41        fn emit(&mut self, entry: EntryRef<'_>, dir_status: Option<entry::Status>) -> Action {
42            self.unorded_entries.push((entry.to_owned(), dir_status));
43            walk::Action::Continue
44        }
45    }
46}
47
48/// A way for the caller to control the traversal based on provided data.
49pub trait Delegate {
50    /// Called for each observed `entry` *inside* a directory, or the directory itself if the traversal is configured
51    /// to simplify the result (i.e. if every file in a directory is ignored, emit the containing directory instead
52    /// of each file), or if the root of the traversal passes through a directory that can't be traversed.
53    ///
54    /// It will also be called if the `root` in [`walk()`](crate::walk()) itself is matching a particular status,
55    /// even if it is a file.
56    ///
57    /// Note that tracked entries will only be emitted if [`Options::emit_tracked`] is `true`.
58    /// Further, not all pruned entries will be observable as they might be pruned so early that the kind of
59    /// item isn't yet known. Pruned entries are also only emitted if [`Options::emit_pruned`] is `true`.
60    ///
61    /// `collapsed_directory_status` is `Some(dir_status)` if this entry was part of a directory with the given
62    /// `dir_status` that wasn't the same as the one of `entry` and if [Options::emit_collapsed] was
63    /// [CollapsedEntriesEmissionMode::OnStatusMismatch]. It will also be `Some(dir_status)` if that option
64    /// was [CollapsedEntriesEmissionMode::All].
65    fn emit(&mut self, entry: EntryRef<'_>, collapsed_directory_status: Option<entry::Status>) -> Action;
66
67    /// Return `true` if the given entry can be recursed into. Will only be called if the entry is a physical directory.
68    /// The base implementation will act like Git does by default in `git status` or `git clean`.
69    ///
70    /// Use `for_deletion` to specify if the seen entries should ultimately be deleted, which may affect the decision
71    /// of whether to resource or not.
72    ///
73    /// If `worktree_root_is_repository` is `true`, then this status is part of the root of an iteration, and the corresponding
74    /// worktree root is a repository itself. This typically happens for submodules. In this case, recursion rules are relaxed
75    /// to allow traversing submodule worktrees.
76    ///
77    /// Note that this method will see all directories, even though not all of them may end up being [emitted](Self::emit()).
78    /// If this method returns `false`, the `entry` will always be emitted.
79    fn can_recurse(
80        &mut self,
81        entry: EntryRef<'_>,
82        for_deletion: Option<ForDeletionMode>,
83        worktree_root_is_repository: bool,
84    ) -> bool {
85        entry.status.can_recurse(
86            entry.disk_kind,
87            entry.pathspec_match,
88            for_deletion,
89            worktree_root_is_repository,
90        )
91    }
92}
93
94/// The way entries are emitted using the [Delegate].
95///
96/// The choice here controls if entries are emitted immediately, or have to be held back.
97#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
98pub enum EmissionMode {
99    /// Emit each entry as it matches exactly, without doing any kind of simplification.
100    ///
101    /// Emissions in this mode are happening as they occur, without any buffering or ordering.
102    #[default]
103    Matching,
104    /// Emit only a containing directory if all of its entries are of the same type.
105    ///
106    /// Note that doing so is more expensive as it requires us to keep track of all entries in the directory structure
107    /// until it's clear what to finally emit.
108    CollapseDirectory,
109}
110
111/// The way entries that are contained in collapsed directories are emitted using the [Delegate].
112#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
113pub enum CollapsedEntriesEmissionMode {
114    /// Emit only entries if their status does not match the one of the parent directory that is
115    /// going to be collapsed.
116    ///
117    /// E.g. if a directory is determined to be untracked, and the entries in question are ignored,
118    /// they will be emitted.
119    ///
120    /// Entries that have the same status will essentially be 'merged' into the collapsing directory
121    /// and won't be observable anymore.
122    #[default]
123    OnStatusMismatch,
124    /// Emit all entries inside of a collapsed directory to make them observable.
125    All,
126}
127
128/// When the walk is for deletion, assure that we don't collapse directories that have precious files in
129/// them, and otherwise assure that no entries are observable that shouldn't be deleted.
130#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
131pub enum ForDeletionMode {
132    /// We will stop traversing into ignored directories which may save a lot of time, but also may include nested repositories
133    /// which might end up being deleted.
134    #[default]
135    IgnoredDirectoriesCanHideNestedRepositories,
136    /// Instead of skipping over ignored directories entirely, we will dive in and find ignored non-bare repositories
137    /// so these are emitted separately and prevent collapsing. These are assumed to be a directory with `.git` inside.
138    /// Only relevant when ignored entries are emitted.
139    FindNonBareRepositoriesInIgnoredDirectories,
140    /// This is a more expensive form of the above variant as it finds all repositories, bare or non-bare.
141    FindRepositoriesInIgnoredDirectories,
142}
143
144/// Options for use in [`walk()`](function::walk()) function.
145#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
146pub struct Options<'a> {
147    /// If `true`, the filesystem will store paths as decomposed unicode, i.e. `รค` becomes `"a\u{308}"`, which means that
148    /// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally
149    /// using it. This also applies to input received from the command-line, so callers may have to be aware of this and
150    /// perform conversions accordingly.
151    /// If `false`, no conversions will be performed.
152    pub precompose_unicode: bool,
153    /// If true, the filesystem ignores the case of input, which makes `A` the same file as `a`.
154    /// This is also called case-folding.
155    /// Note that [pathspecs](Context::pathspec) must also be using the same defaults, which makes them match case-insensitive
156    /// automatically.
157    pub ignore_case: bool,
158    /// If `true`, we will stop figuring out if any directory that is a candidate for recursion is also a nested repository,
159    /// which saves time but leads to recurse into it. If `false`, nested repositories will not be traversed.
160    pub recurse_repositories: bool,
161    /// If `true`, entries that are pruned and whose [Kind](crate::entry::Kind) is known will be emitted.
162    pub emit_pruned: bool,
163    /// If `Some(mode)`, entries that are ignored will be emitted according to the given `mode`.
164    /// If `None`, ignored entries will not be emitted at all.
165    pub emit_ignored: Option<EmissionMode>,
166    /// When the walk is for deletion, this must be `Some(_)` to assure we don't collapse directories that have precious files in
167    /// them, and otherwise assure that no entries are observable that shouldn't be deleted.
168    /// If `None`, precious files are treated like expendable files, which is usually what you want when displaying them
169    /// for addition to the repository, and the collapse of folders can be more generous in relation to ignored files.
170    pub for_deletion: Option<ForDeletionMode>,
171    /// If `true`, we will not only find non-bare repositories in untracked directories, but also bare ones.
172    ///
173    /// Note that this is very costly, but without it, bare repositories will appear like untracked directories when collapsed,
174    /// and they will be recursed into.
175    pub classify_untracked_bare_repositories: bool,
176    /// If `true`, we will also emit entries for tracked items. Otherwise these will remain 'hidden', even if a pathspec directly
177    /// refers to it.
178    pub emit_tracked: bool,
179    /// Controls the way untracked files are emitted. By default, this is happening immediately and without any simplification.
180    pub emit_untracked: EmissionMode,
181    /// If `true`, emit empty directories as well. Note that a directory also counts as empty if it has any amount or depth of nested
182    /// subdirectories, as long as none of them includes a file.
183    /// Thus, this makes leaf-level empty directories visible, as those don't have any content.
184    pub emit_empty_directories: bool,
185    /// If `None`, no entries inside of collapsed directories are emitted. Otherwise, act as specified by `Some(mode)`.
186    pub emit_collapsed: Option<CollapsedEntriesEmissionMode>,
187    /// This is a `libgit2` compatibility flag, and if enabled, symlinks that point to directories will be considered a directory
188    /// when checking for exclusion.
189    ///
190    /// This is relevant if `src2` points to `src`, and is excluded with `src2/`. If `false`, `src2` will not be excluded,
191    /// if `true` it will be excluded as the symlink is considered a directory.
192    ///
193    /// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`.
194    pub symlinks_to_directories_are_ignored_like_directories: bool,
195    /// A set of all git worktree checkouts that are located within the main worktree directory.
196    ///
197    /// They will automatically be detected as 'tracked', but without providing index information (as there is no actual index entry).
198    /// Note that the unicode composition must match the `precompose_unicode` field so that paths will match verbatim.
199    pub worktree_relative_worktree_dirs: Option<&'a BTreeSet<BString>>,
200}
201
202/// All information that is required to perform a dirwalk, and classify paths properly.
203pub struct Context<'a> {
204    /// If not `None`, it will be checked before entering any directory to trigger early interruption.
205    ///
206    /// If this flag is `true` at any point in the iteration, it will abort with an error.
207    pub should_interrupt: Option<&'a AtomicBool>,
208    /// The `git_dir` of the parent repository, after a call to [`gix_path::realpath()`].
209    ///
210    /// It's used to help us differentiate our own `.git` directory from nested unrelated repositories,
211    /// which is needed if `core.worktree` is used to nest the `.git` directory deeper within.
212    pub git_dir_realpath: &'a std::path::Path,
213    /// The current working directory as returned by `gix_fs::current_dir()` to assure it respects `core.precomposeUnicode`.
214    /// It's used to produce the realpath of the git-dir of a repository candidate to assure it's not our own repository.
215    ///
216    /// It is also used to assure that when the walk is for deletion, that the current working dir will not be collapsed.
217    pub current_dir: &'a std::path::Path,
218    /// The index to quickly understand if a file or directory is tracked or not.
219    ///
220    /// ### Important
221    ///
222    /// The index must have been validated so that each entry that is considered up-to-date will have the [gix_index::entry::Flags::UPTODATE] flag
223    /// set. Otherwise the index entry is not considered and a disk-access may occur which is costly.
224    pub index: &'a gix_index::State,
225    /// A utility to lookup index entries faster, and deal with ignore-case handling.
226    ///
227    /// Must be set if `ignore_case` is `true`, or else some entries won't be found if their case is different.
228    ///
229    /// ### Deviation
230    ///
231    /// Git uses a name-based hash (for looking up entries, not directories) even when operating
232    /// in case-sensitive mode. It does, however, skip the directory hash creation (for looking
233    /// up directories) unless `core.ignoreCase` is enabled.
234    ///
235    /// We only use the hashmap when available and when [`ignore_case`](Options::ignore_case) is enabled in the options.
236    pub ignore_case_index_lookup: Option<&'a gix_index::AccelerateLookup<'a>>,
237    /// A pathspec to use as filter - we only traverse into directories if it matches.
238    /// Note that the `ignore_case` setting it uses should match our [Options::ignore_case].
239    /// If no such filtering is desired, pass an empty `pathspec` which will match everything.
240    pub pathspec: &'a mut gix_pathspec::Search,
241    /// The `attributes` callback for use in [gix_pathspec::Search::pattern_matching_relative_path()], which happens when
242    /// pathspecs use attributes for filtering.
243    /// If `pathspec` isn't empty, this function may be called if pathspecs perform attribute lookups.
244    pub pathspec_attributes: &'a mut dyn FnMut(
245        &BStr,
246        gix_pathspec::attributes::glob::pattern::Case,
247        bool,
248        &mut gix_pathspec::attributes::search::Outcome,
249    ) -> bool,
250    /// A way to query the `.gitignore` files to see if a directory or file is ignored.
251    /// Set to `None` to not perform any work on checking for ignored, which turns previously ignored files into untracked ones, a useful
252    /// operation when trying to add ignored files to a repository.
253    pub excludes: Option<&'a mut gix_worktree::Stack>,
254    /// Access to the object database for use with `excludes` - it's possible to access `.gitignore` files in the index if configured.
255    pub objects: &'a dyn gix_object::Find,
256    /// If not `None`, override the traversal root that is computed and use this one instead.
257    ///
258    /// This can be useful if the traversal root may be a file, in which case the traversal will
259    /// still be returning possibly matching root entries.
260    ///
261    /// ### Panics
262    ///
263    /// If the `traversal_root` is not in the `worktree_root` passed to [walk()](crate::walk()).
264    pub explicit_traversal_root: Option<&'a std::path::Path>,
265}
266
267/// Additional information collected as outcome of [`walk()`](function::walk()).
268#[derive(Default, Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
269pub struct Outcome {
270    /// The amount of calls to read the directory contents.
271    pub read_dir_calls: u32,
272    /// The amount of returned entries provided to the callback. This number can be lower than `seen_entries`.
273    pub returned_entries: usize,
274    /// The amount of entries, prior to pathspecs filtering them out or otherwise excluding them.
275    pub seen_entries: u32,
276}
277
278/// The error returned by [`walk()`](function::walk()).
279#[derive(Debug, thiserror::Error)]
280#[allow(missing_docs)]
281pub enum Error {
282    #[error("Interrupted")]
283    Interrupted,
284    #[error("Worktree root at '{}' is not a directory", root.display())]
285    WorktreeRootIsFile { root: PathBuf },
286    #[error("Traversal root '{}' contains relative path components and could not be normalized", root.display())]
287    NormalizeRoot { root: PathBuf },
288    #[error("A symlink was found at component {component_index} of traversal root '{}' as seen from worktree root '{}'", root.display(), worktree_root.display())]
289    SymlinkInRoot {
290        root: PathBuf,
291        worktree_root: PathBuf,
292        /// This index starts at 0, with 0 being the first component.
293        component_index: usize,
294    },
295    #[error("Failed to update the excludes stack to see if a path is excluded")]
296    ExcludesAccess(std::io::Error),
297    #[error("Failed to read the directory at '{}'", path.display())]
298    ReadDir { path: PathBuf, source: std::io::Error },
299    #[error("Could not obtain directory entry in root of '{}'", parent_directory.display())]
300    DirEntry {
301        parent_directory: PathBuf,
302        source: std::io::Error,
303    },
304    #[error("Could not obtain filetype of directory entry '{}'", path.display())]
305    DirEntryFileType { path: PathBuf, source: std::io::Error },
306    #[error("Could not obtain symlink metadata on '{}'", path.display())]
307    SymlinkMetadata { path: PathBuf, source: std::io::Error },
308}
309
310mod classify;
311pub(crate) mod function;
312mod readdir;