gix_dir/walk/mod.rs
1use std::{collections::BTreeSet, path::PathBuf, sync::atomic::AtomicBool};
2
3use bstr::{BStr, BString};
4
5use crate::{entry, EntryRef};
6
7/// A type returned by the [`Delegate::emit()`] as passed to [`walk()`](function::walk()).
8#[derive(Debug, Copy, Clone, Eq, PartialEq)]
9#[must_use]
10pub enum Action {
11 /// Continue the traversal as normal.
12 Continue,
13 /// Do not continue the traversal, but exit it.
14 Cancel,
15}
16
17/// Ready-made delegate implementations.
18pub mod delegate {
19 use crate::{entry, walk, walk::Action, Entry, EntryRef};
20
21 type Entries = Vec<(Entry, Option<entry::Status>)>;
22
23 /// A [`Delegate`](walk::Delegate) implementation that collects all `entries` along with their directory status, if present.
24 ///
25 /// Note that this allocates for each entry.
26 #[derive(Default)]
27 pub struct Collect {
28 /// All collected entries, in any order.
29 pub unorded_entries: Entries,
30 }
31
32 impl Collect {
33 /// Return the list of entries that were emitted, sorted ascending by their repository-relative tree path.
34 pub fn into_entries_by_path(mut self) -> Entries {
35 self.unorded_entries.sort_by(|a, b| a.0.rela_path.cmp(&b.0.rela_path));
36 self.unorded_entries
37 }
38 }
39
40 impl walk::Delegate for Collect {
41 fn emit(&mut self, entry: EntryRef<'_>, dir_status: Option<entry::Status>) -> Action {
42 self.unorded_entries.push((entry.to_owned(), dir_status));
43 walk::Action::Continue
44 }
45 }
46}
47
48/// A way for the caller to control the traversal based on provided data.
49pub trait Delegate {
50 /// Called for each observed `entry` *inside* a directory, or the directory itself if the traversal is configured
51 /// to simplify the result (i.e. if every file in a directory is ignored, emit the containing directory instead
52 /// of each file), or if the root of the traversal passes through a directory that can't be traversed.
53 ///
54 /// It will also be called if the `root` in [`walk()`](crate::walk()) itself is matching a particular status,
55 /// even if it is a file.
56 ///
57 /// Note that tracked entries will only be emitted if [`Options::emit_tracked`] is `true`.
58 /// Further, not all pruned entries will be observable as they might be pruned so early that the kind of
59 /// item isn't yet known. Pruned entries are also only emitted if [`Options::emit_pruned`] is `true`.
60 ///
61 /// `collapsed_directory_status` is `Some(dir_status)` if this entry was part of a directory with the given
62 /// `dir_status` that wasn't the same as the one of `entry` and if [Options::emit_collapsed] was
63 /// [CollapsedEntriesEmissionMode::OnStatusMismatch]. It will also be `Some(dir_status)` if that option
64 /// was [CollapsedEntriesEmissionMode::All].
65 fn emit(&mut self, entry: EntryRef<'_>, collapsed_directory_status: Option<entry::Status>) -> Action;
66
67 /// Return `true` if the given entry can be recursed into. Will only be called if the entry is a physical directory.
68 /// The base implementation will act like Git does by default in `git status` or `git clean`.
69 ///
70 /// Use `for_deletion` to specify if the seen entries should ultimately be deleted, which may affect the decision
71 /// of whether to resource or not.
72 ///
73 /// If `worktree_root_is_repository` is `true`, then this status is part of the root of an iteration, and the corresponding
74 /// worktree root is a repository itself. This typically happens for submodules. In this case, recursion rules are relaxed
75 /// to allow traversing submodule worktrees.
76 ///
77 /// Note that this method will see all directories, even though not all of them may end up being [emitted](Self::emit()).
78 /// If this method returns `false`, the `entry` will always be emitted.
79 fn can_recurse(
80 &mut self,
81 entry: EntryRef<'_>,
82 for_deletion: Option<ForDeletionMode>,
83 worktree_root_is_repository: bool,
84 ) -> bool {
85 entry.status.can_recurse(
86 entry.disk_kind,
87 entry.pathspec_match,
88 for_deletion,
89 worktree_root_is_repository,
90 )
91 }
92}
93
94/// The way entries are emitted using the [Delegate].
95///
96/// The choice here controls if entries are emitted immediately, or have to be held back.
97#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
98pub enum EmissionMode {
99 /// Emit each entry as it matches exactly, without doing any kind of simplification.
100 ///
101 /// Emissions in this mode are happening as they occur, without any buffering or ordering.
102 #[default]
103 Matching,
104 /// Emit only a containing directory if all of its entries are of the same type.
105 ///
106 /// Note that doing so is more expensive as it requires us to keep track of all entries in the directory structure
107 /// until it's clear what to finally emit.
108 CollapseDirectory,
109}
110
111/// The way entries that are contained in collapsed directories are emitted using the [Delegate].
112#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
113pub enum CollapsedEntriesEmissionMode {
114 /// Emit only entries if their status does not match the one of the parent directory that is
115 /// going to be collapsed.
116 ///
117 /// E.g. if a directory is determined to be untracked, and the entries in question are ignored,
118 /// they will be emitted.
119 ///
120 /// Entries that have the same status will essentially be 'merged' into the collapsing directory
121 /// and won't be observable anymore.
122 #[default]
123 OnStatusMismatch,
124 /// Emit all entries inside of a collapsed directory to make them observable.
125 All,
126}
127
128/// When the walk is for deletion, assure that we don't collapse directories that have precious files in
129/// them, and otherwise assure that no entries are observable that shouldn't be deleted.
130#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
131pub enum ForDeletionMode {
132 /// We will stop traversing into ignored directories which may save a lot of time, but also may include nested repositories
133 /// which might end up being deleted.
134 #[default]
135 IgnoredDirectoriesCanHideNestedRepositories,
136 /// Instead of skipping over ignored directories entirely, we will dive in and find ignored non-bare repositories
137 /// so these are emitted separately and prevent collapsing. These are assumed to be a directory with `.git` inside.
138 /// Only relevant when ignored entries are emitted.
139 FindNonBareRepositoriesInIgnoredDirectories,
140 /// This is a more expensive form of the above variant as it finds all repositories, bare or non-bare.
141 FindRepositoriesInIgnoredDirectories,
142}
143
144/// Options for use in [`walk()`](function::walk()) function.
145#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
146pub struct Options<'a> {
147 /// If `true`, the filesystem will store paths as decomposed unicode, i.e. `รค` becomes `"a\u{308}"`, which means that
148 /// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally
149 /// using it. This also applies to input received from the command-line, so callers may have to be aware of this and
150 /// perform conversions accordingly.
151 /// If `false`, no conversions will be performed.
152 pub precompose_unicode: bool,
153 /// If true, the filesystem ignores the case of input, which makes `A` the same file as `a`.
154 /// This is also called case-folding.
155 /// Note that [pathspecs](Context::pathspec) must also be using the same defaults, which makes them match case-insensitive
156 /// automatically.
157 pub ignore_case: bool,
158 /// If `true`, we will stop figuring out if any directory that is a candidate for recursion is also a nested repository,
159 /// which saves time but leads to recurse into it. If `false`, nested repositories will not be traversed.
160 pub recurse_repositories: bool,
161 /// If `true`, entries that are pruned and whose [Kind](crate::entry::Kind) is known will be emitted.
162 pub emit_pruned: bool,
163 /// If `Some(mode)`, entries that are ignored will be emitted according to the given `mode`.
164 /// If `None`, ignored entries will not be emitted at all.
165 pub emit_ignored: Option<EmissionMode>,
166 /// When the walk is for deletion, this must be `Some(_)` to assure we don't collapse directories that have precious files in
167 /// them, and otherwise assure that no entries are observable that shouldn't be deleted.
168 /// If `None`, precious files are treated like expendable files, which is usually what you want when displaying them
169 /// for addition to the repository, and the collapse of folders can be more generous in relation to ignored files.
170 pub for_deletion: Option<ForDeletionMode>,
171 /// If `true`, we will not only find non-bare repositories in untracked directories, but also bare ones.
172 ///
173 /// Note that this is very costly, but without it, bare repositories will appear like untracked directories when collapsed,
174 /// and they will be recursed into.
175 pub classify_untracked_bare_repositories: bool,
176 /// If `true`, we will also emit entries for tracked items. Otherwise these will remain 'hidden', even if a pathspec directly
177 /// refers to it.
178 pub emit_tracked: bool,
179 /// Controls the way untracked files are emitted. By default, this is happening immediately and without any simplification.
180 pub emit_untracked: EmissionMode,
181 /// If `true`, emit empty directories as well. Note that a directory also counts as empty if it has any amount or depth of nested
182 /// subdirectories, as long as none of them includes a file.
183 /// Thus, this makes leaf-level empty directories visible, as those don't have any content.
184 pub emit_empty_directories: bool,
185 /// If `None`, no entries inside of collapsed directories are emitted. Otherwise, act as specified by `Some(mode)`.
186 pub emit_collapsed: Option<CollapsedEntriesEmissionMode>,
187 /// This is a `libgit2` compatibility flag, and if enabled, symlinks that point to directories will be considered a directory
188 /// when checking for exclusion.
189 ///
190 /// This is relevant if `src2` points to `src`, and is excluded with `src2/`. If `false`, `src2` will not be excluded,
191 /// if `true` it will be excluded as the symlink is considered a directory.
192 ///
193 /// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`.
194 pub symlinks_to_directories_are_ignored_like_directories: bool,
195 /// A set of all git worktree checkouts that are located within the main worktree directory.
196 ///
197 /// They will automatically be detected as 'tracked', but without providing index information (as there is no actual index entry).
198 /// Note that the unicode composition must match the `precompose_unicode` field so that paths will match verbatim.
199 pub worktree_relative_worktree_dirs: Option<&'a BTreeSet<BString>>,
200}
201
202/// All information that is required to perform a dirwalk, and classify paths properly.
203pub struct Context<'a> {
204 /// If not `None`, it will be checked before entering any directory to trigger early interruption.
205 ///
206 /// If this flag is `true` at any point in the iteration, it will abort with an error.
207 pub should_interrupt: Option<&'a AtomicBool>,
208 /// The `git_dir` of the parent repository, after a call to [`gix_path::realpath()`].
209 ///
210 /// It's used to help us differentiate our own `.git` directory from nested unrelated repositories,
211 /// which is needed if `core.worktree` is used to nest the `.git` directory deeper within.
212 pub git_dir_realpath: &'a std::path::Path,
213 /// The current working directory as returned by `gix_fs::current_dir()` to assure it respects `core.precomposeUnicode`.
214 /// It's used to produce the realpath of the git-dir of a repository candidate to assure it's not our own repository.
215 ///
216 /// It is also used to assure that when the walk is for deletion, that the current working dir will not be collapsed.
217 pub current_dir: &'a std::path::Path,
218 /// The index to quickly understand if a file or directory is tracked or not.
219 ///
220 /// ### Important
221 ///
222 /// The index must have been validated so that each entry that is considered up-to-date will have the [gix_index::entry::Flags::UPTODATE] flag
223 /// set. Otherwise the index entry is not considered and a disk-access may occur which is costly.
224 pub index: &'a gix_index::State,
225 /// A utility to lookup index entries faster, and deal with ignore-case handling.
226 ///
227 /// Must be set if `ignore_case` is `true`, or else some entries won't be found if their case is different.
228 ///
229 /// ### Deviation
230 ///
231 /// Git uses a name-based hash (for looking up entries, not directories) even when operating
232 /// in case-sensitive mode. It does, however, skip the directory hash creation (for looking
233 /// up directories) unless `core.ignoreCase` is enabled.
234 ///
235 /// We only use the hashmap when available and when [`ignore_case`](Options::ignore_case) is enabled in the options.
236 pub ignore_case_index_lookup: Option<&'a gix_index::AccelerateLookup<'a>>,
237 /// A pathspec to use as filter - we only traverse into directories if it matches.
238 /// Note that the `ignore_case` setting it uses should match our [Options::ignore_case].
239 /// If no such filtering is desired, pass an empty `pathspec` which will match everything.
240 pub pathspec: &'a mut gix_pathspec::Search,
241 /// The `attributes` callback for use in [gix_pathspec::Search::pattern_matching_relative_path()], which happens when
242 /// pathspecs use attributes for filtering.
243 /// If `pathspec` isn't empty, this function may be called if pathspecs perform attribute lookups.
244 pub pathspec_attributes: &'a mut dyn FnMut(
245 &BStr,
246 gix_pathspec::attributes::glob::pattern::Case,
247 bool,
248 &mut gix_pathspec::attributes::search::Outcome,
249 ) -> bool,
250 /// A way to query the `.gitignore` files to see if a directory or file is ignored.
251 /// Set to `None` to not perform any work on checking for ignored, which turns previously ignored files into untracked ones, a useful
252 /// operation when trying to add ignored files to a repository.
253 pub excludes: Option<&'a mut gix_worktree::Stack>,
254 /// Access to the object database for use with `excludes` - it's possible to access `.gitignore` files in the index if configured.
255 pub objects: &'a dyn gix_object::Find,
256 /// If not `None`, override the traversal root that is computed and use this one instead.
257 ///
258 /// This can be useful if the traversal root may be a file, in which case the traversal will
259 /// still be returning possibly matching root entries.
260 ///
261 /// ### Panics
262 ///
263 /// If the `traversal_root` is not in the `worktree_root` passed to [walk()](crate::walk()).
264 pub explicit_traversal_root: Option<&'a std::path::Path>,
265}
266
267/// Additional information collected as outcome of [`walk()`](function::walk()).
268#[derive(Default, Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
269pub struct Outcome {
270 /// The amount of calls to read the directory contents.
271 pub read_dir_calls: u32,
272 /// The amount of returned entries provided to the callback. This number can be lower than `seen_entries`.
273 pub returned_entries: usize,
274 /// The amount of entries, prior to pathspecs filtering them out or otherwise excluding them.
275 pub seen_entries: u32,
276}
277
278/// The error returned by [`walk()`](function::walk()).
279#[derive(Debug, thiserror::Error)]
280#[allow(missing_docs)]
281pub enum Error {
282 #[error("Interrupted")]
283 Interrupted,
284 #[error("Worktree root at '{}' is not a directory", root.display())]
285 WorktreeRootIsFile { root: PathBuf },
286 #[error("Traversal root '{}' contains relative path components and could not be normalized", root.display())]
287 NormalizeRoot { root: PathBuf },
288 #[error("A symlink was found at component {component_index} of traversal root '{}' as seen from worktree root '{}'", root.display(), worktree_root.display())]
289 SymlinkInRoot {
290 root: PathBuf,
291 worktree_root: PathBuf,
292 /// This index starts at 0, with 0 being the first component.
293 component_index: usize,
294 },
295 #[error("Failed to update the excludes stack to see if a path is excluded")]
296 ExcludesAccess(std::io::Error),
297 #[error("Failed to read the directory at '{}'", path.display())]
298 ReadDir { path: PathBuf, source: std::io::Error },
299 #[error("Could not obtain directory entry in root of '{}'", parent_directory.display())]
300 DirEntry {
301 parent_directory: PathBuf,
302 source: std::io::Error,
303 },
304 #[error("Could not obtain filetype of directory entry '{}'", path.display())]
305 DirEntryFileType { path: PathBuf, source: std::io::Error },
306 #[error("Could not obtain symlink metadata on '{}'", path.display())]
307 SymlinkMetadata { path: PathBuf, source: std::io::Error },
308}
309
310mod classify;
311pub(crate) mod function;
312mod readdir;