gix_dir/walk/
function.rs

1use std::{
2    borrow::Cow,
3    path::{Path, PathBuf},
4};
5
6use bstr::{BStr, BString, ByteSlice};
7
8use crate::{
9    entry,
10    walk::{classify, readdir, Action, Context, Delegate, Error, ForDeletionMode, Options, Outcome},
11    EntryRef,
12};
13
14/// A function to perform a git-style, unsorted, directory walk.
15///
16/// * `worktree_root` - the top-most root of the worktree, which must be a prefix to `root`.
17///     - If [`Options::precompose_unicode`] is enabled, this path must be precomposed.
18///     - The starting point of the traversal (traversal root) is calculated from by doing `worktree_root + pathspec.common_prefix()`.
19///     - Note that if the traversal root leading to this directory or it itself is excluded, it will be provided to [`Delegate::emit()`]
20///       without further traversal.
21///     - If [`Options::precompose_unicode`] is enabled, all involved paths must be precomposed.
22///     - Must be contained in `worktree_root`.
23/// * `ctx` - everything needed to classify the paths seen during the traversal.
24/// * `delegate` - an implementation of [`Delegate`] to control details of the traversal and receive its results.
25///
26/// Returns `(outcome, traversal_root)`, with the `traversal_root` actually being used for the traversal,
27/// useful to transform the paths returned for the user. It's always within the `worktree_root`, or the same,
28/// but is hard to guess due to additional logic affecting it.
29///
30/// ### Performance Notes
31///
32/// In theory, parallel directory traversal can be significantly faster, and what's possible for our current
33/// `gix_features::fs::WalkDir` implementation is to abstract a `filter_entry()` method so it works both for
34/// the iterator from the `walkdir` crate as well as from `jwalk`. However, doing so as initial version
35/// has the risk of not being significantly harder if not impossible to implement as flow-control is very
36/// limited.
37///
38/// Thus the decision was made to start out with something akin to the Git implementation, get all tests and
39/// baseline comparison to pass, and see if an iterator with just `filter_entry` would be capable of dealing with
40/// it. Note that `filter_entry` is the only just-in-time traversal control that `walkdir` offers, even though
41/// one could consider switching to `jwalk` and just use its single-threaded implementation if a unified interface
42/// is necessary to make this work - `jwalk` has a more powerful API for this to work.
43///
44/// If that was the case, we are talking about 0.5s for single-threaded traversal (without doing any extra work)
45/// or 0.25s for optimal multi-threaded performance, all in the WebKit directory with 388k items to traverse.
46/// Thus, the speedup could easily be 2x or more and thus worth investigating in due time.
47pub fn walk(
48    worktree_root: &Path,
49    mut ctx: Context<'_>,
50    options: Options<'_>,
51    delegate: &mut dyn Delegate,
52) -> Result<(Outcome, PathBuf), Error> {
53    let root = match ctx.explicit_traversal_root {
54        Some(root) => root.to_owned(),
55        None => ctx
56            .pathspec
57            .longest_common_directory()
58            .and_then(|candidate| {
59                let candidate = worktree_root.join(candidate);
60                candidate.is_dir().then_some(candidate)
61            })
62            .unwrap_or_else(|| worktree_root.join(ctx.pathspec.prefix_directory())),
63    };
64    let _span = gix_trace::coarse!("walk", root = ?root, worktree_root = ?worktree_root, options = ?options);
65    let (mut current, worktree_root_relative) = assure_no_symlink_in_root(worktree_root, &root)?;
66    let mut out = Outcome::default();
67    let mut buf = BString::default();
68    let (root_info, worktree_root_is_repository) = classify::root(
69        worktree_root,
70        &mut buf,
71        worktree_root_relative.as_ref(),
72        options,
73        &mut ctx,
74    )?;
75
76    let can_recurse = can_recurse(
77        buf.as_bstr(),
78        if root == worktree_root && root_info.disk_kind == Some(entry::Kind::Symlink) && current.is_dir() {
79            classify::Outcome {
80                disk_kind: Some(entry::Kind::Directory),
81                ..root_info
82            }
83        } else {
84            root_info
85        },
86        options.for_deletion,
87        worktree_root_is_repository,
88        delegate,
89    );
90    if !can_recurse {
91        if buf.is_empty() && !root_info.disk_kind.is_some_and(|kind| kind.is_dir()) {
92            return Err(Error::WorktreeRootIsFile { root: root.to_owned() });
93        }
94        if options.precompose_unicode {
95            buf = gix_utils::str::precompose_bstr(buf.into()).into_owned();
96        }
97        let _ = emit_entry(
98            Cow::Borrowed(buf.as_bstr()),
99            root_info,
100            None,
101            options,
102            &mut out,
103            delegate,
104        );
105        return Ok((out, root.to_owned()));
106    }
107
108    let mut state = readdir::State::new(worktree_root, ctx.current_dir, options.for_deletion.is_some());
109    let may_collapse = root != worktree_root && state.may_collapse(&current);
110    let (action, _) = readdir::recursive(
111        may_collapse,
112        &mut current,
113        &mut buf,
114        root_info,
115        &mut ctx,
116        options,
117        delegate,
118        &mut out,
119        &mut state,
120    )?;
121    if action != Action::Cancel {
122        state.emit_remaining(may_collapse, options, &mut out, delegate);
123        assert_eq!(state.on_hold.len(), 0, "BUG: after emission, on hold must be empty");
124    }
125    gix_trace::debug!(statistics = ?out);
126    Ok((out, root.to_owned()))
127}
128
129/// Note that we only check symlinks on the way from `worktree_root` to `root`,
130/// so `worktree_root` may go through a symlink.
131/// Returns `(worktree_root, normalized_worktree_relative_root)`.
132fn assure_no_symlink_in_root<'root>(
133    worktree_root: &Path,
134    root: &'root Path,
135) -> Result<(PathBuf, Cow<'root, Path>), Error> {
136    let mut current = worktree_root.to_owned();
137    let worktree_relative = root
138        .strip_prefix(worktree_root)
139        .expect("BUG: root was created from worktree_root + prefix");
140    let worktree_relative = gix_path::normalize(worktree_relative.into(), Path::new(""))
141        .ok_or(Error::NormalizeRoot { root: root.to_owned() })?;
142
143    for (idx, component) in worktree_relative.components().enumerate() {
144        current.push(component);
145        let meta = current.symlink_metadata().map_err(|err| Error::SymlinkMetadata {
146            source: err,
147            path: current.to_owned(),
148        })?;
149        if meta.is_symlink() {
150            return Err(Error::SymlinkInRoot {
151                root: root.to_owned(),
152                worktree_root: worktree_root.to_owned(),
153                component_index: idx,
154            });
155        }
156    }
157    Ok((current, worktree_relative))
158}
159
160pub(super) fn can_recurse(
161    rela_path: &BStr,
162    info: classify::Outcome,
163    for_deletion: Option<ForDeletionMode>,
164    worktree_root_is_repository: bool,
165    delegate: &mut dyn Delegate,
166) -> bool {
167    let is_dir = info.disk_kind.is_some_and(|k| k.is_dir());
168    if !is_dir {
169        return false;
170    }
171    delegate.can_recurse(
172        EntryRef::from_outcome(Cow::Borrowed(rela_path), info),
173        for_deletion,
174        worktree_root_is_repository,
175    )
176}
177
178/// Possibly emit an entry to `for_each` in case the provided information makes that possible.
179#[allow(clippy::too_many_arguments)]
180pub(super) fn emit_entry(
181    rela_path: Cow<'_, BStr>,
182    info: classify::Outcome,
183    dir_status: Option<entry::Status>,
184    Options {
185        emit_pruned,
186        emit_tracked,
187        emit_ignored,
188        emit_empty_directories,
189        ..
190    }: Options<'_>,
191    out: &mut Outcome,
192    delegate: &mut dyn Delegate,
193) -> Action {
194    out.seen_entries += 1;
195
196    if (!emit_empty_directories && info.property == Some(entry::Property::EmptyDirectory)
197        || !emit_tracked && info.status == entry::Status::Tracked)
198        || emit_ignored.is_none() && matches!(info.status, entry::Status::Ignored(_))
199        || !emit_pruned
200            && (info.status.is_pruned()
201                || info
202                    .pathspec_match
203                    .map_or(true, |m| m == entry::PathspecMatch::Excluded))
204    {
205        return Action::Continue;
206    }
207
208    out.returned_entries += 1;
209    delegate.emit(EntryRef::from_outcome(rela_path, info), dir_status)
210}