Skip to main content

recast_core/
walker.rs

1//! File-system enumeration honoring `.gitignore`, `--type`, and `-g`.
2//!
3//! Thin wrapper around the `ignore` crate's `WalkBuilder` with the
4//! ripgrep-equivalent defaults (`.gitignore` respected, hidden files
5//! excluded, symlinks not followed). Globs use the override engine so
6//! `!pattern` works as a per-invocation exclude.
7//!
8//! ## Symlink semantics
9//!
10//! `follow_symlinks=false` (default): symlinks are skipped entirely —
11//! neither the link entry nor its target are visited. Safe by default
12//! for `--apply`: a malicious or accidental link can't redirect a
13//! rewrite onto a file outside the user's chosen root.
14//!
15//! `follow_symlinks=true`: the walker resolves links and visits their
16//! targets (including targets outside the walker root), but still
17//! honors `.gitignore` along the way and breaks cycles via the
18//! `ignore` crate's built-in loop detection. Dangling links surface
19//! as walk errors rather than panicking; cycles abort the walk with a
20//! typed error instead of looping forever.
21
22use std::path::{Path, PathBuf};
23use std::sync::Mutex;
24
25use ignore::overrides::OverrideBuilder;
26use ignore::types::TypesBuilder;
27use ignore::{WalkBuilder, WalkState};
28
29use crate::error::{Error, Result};
30
31/// Filters applied while walking `roots`.
32///
33/// `types` and `types_not` use the same shorthand vocabulary as ripgrep
34/// (`rust`, `js`, `py`, `markdown`, …). `globs` accept ripgrep-style
35/// include/exclude patterns: `"!vendor/**"` excludes, anything else
36/// includes. By default `.gitignore` is honored, hidden files are
37/// excluded, and symlinks are not followed.
38#[derive(Debug, Clone, Default)]
39pub struct WalkOptions {
40    pub hidden: bool,
41    pub no_ignore: bool,
42    pub follow_symlinks: bool,
43    pub types: Vec<String>,
44    pub types_not: Vec<String>,
45    pub globs: Vec<String>,
46}
47
48/// Enumerate every regular file under `roots` (sorted, dedup'd by the
49/// ignore crate) honoring `opts`. Directories, symlinks (unless
50/// `follow_symlinks` is set), and anything filtered out by ignore /
51/// globs / types are skipped.
52///
53/// Uses [`ignore::WalkParallel`] so the walk honors the surrounding
54/// rayon pool's thread count instead of running single-threaded
55/// regardless of `--threads N`. Output is sorted at the end so callers
56/// (and snapshot tests) get a deterministic listing.
57pub fn walk_paths<P: AsRef<Path>>(roots: &[P], opts: &WalkOptions) -> Result<Vec<PathBuf>> {
58    let builder = build_walker(roots, opts)?;
59    let collected: Mutex<Vec<PathBuf>> = Mutex::new(Vec::new());
60    let first_error: Mutex<Option<ignore::Error>> = Mutex::new(None);
61
62    builder.build_parallel().run(|| {
63        Box::new(|result| match result {
64            Ok(entry) => {
65                if matches!(entry.file_type(), Some(ft) if ft.is_file())
66                    && let Ok(mut sink) = collected.lock()
67                {
68                    sink.push(entry.into_path());
69                }
70                WalkState::Continue
71            }
72            Err(e) => {
73                if let Ok(mut slot) = first_error.lock()
74                    && slot.is_none()
75                {
76                    *slot = Some(e);
77                }
78                WalkState::Quit
79            }
80        })
81    });
82
83    if let Some(e) = first_error.into_inner().ok().flatten() {
84        return Err(Error::Walk(e));
85    }
86    let mut out = collected.into_inner().unwrap_or_default();
87    out.sort();
88    Ok(out)
89}
90
91/// Build the [`WalkBuilder`] for `roots` with `opts` applied. Pulled
92/// out of the public entry point so the parallel walker's setup stays
93/// readable; nothing else calls it.
94fn build_walker<P: AsRef<Path>>(roots: &[P], opts: &WalkOptions) -> Result<WalkBuilder> {
95    let mut builder = if let Some(first) = roots.first() {
96        WalkBuilder::new(first.as_ref())
97    } else {
98        WalkBuilder::new(".")
99    };
100    for extra in roots.iter().skip(1) {
101        builder.add(extra.as_ref());
102    }
103    builder
104        .hidden(!opts.hidden)
105        .ignore(!opts.no_ignore)
106        .git_ignore(!opts.no_ignore)
107        .git_global(!opts.no_ignore)
108        .git_exclude(!opts.no_ignore)
109        .require_git(false)
110        .parents(!opts.no_ignore)
111        .follow_links(opts.follow_symlinks)
112        // Honor the surrounding rayon pool's thread count. Falls back
113        // to ignore's own default (num_cpus) outside a rayon scope.
114        .threads(rayon::current_num_threads().max(1));
115
116    if !opts.types.is_empty() || !opts.types_not.is_empty() {
117        let mut tb = TypesBuilder::new();
118        tb.add_defaults();
119        for t in &opts.types {
120            tb.select(t);
121        }
122        for t in &opts.types_not {
123            tb.negate(t);
124        }
125        builder.types(tb.build()?);
126    }
127
128    if !opts.globs.is_empty() {
129        let glob_root = roots.first().map(|p| p.as_ref()).unwrap_or_else(|| Path::new("."));
130        let mut ob = OverrideBuilder::new(glob_root);
131        for g in &opts.globs {
132            ob.add(g)?;
133        }
134        builder.overrides(ob.build()?);
135    }
136
137    Ok(builder)
138}
139
140#[cfg(test)]
141#[path = "walker_tests.rs"]
142mod tests;