ignore/
walk.rs

1use std::{
2    cmp::Ordering,
3    ffi::OsStr,
4    fs::{self, FileType, Metadata},
5    io,
6    path::{Path, PathBuf},
7    sync::atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering},
8    sync::{Arc, OnceLock},
9};
10
11use {
12    crossbeam_deque::{Stealer, Worker as Deque},
13    same_file::Handle,
14    walkdir::WalkDir,
15};
16
17use crate::{
18    Error, PartialErrorBuilder,
19    dir::{Ignore, IgnoreBuilder},
20    gitignore::GitignoreBuilder,
21    overrides::Override,
22    types::Types,
23};
24
25/// A directory entry with a possible error attached.
26///
27/// The error typically refers to a problem parsing ignore files in a
28/// particular directory.
29#[derive(Clone, Debug)]
30pub struct DirEntry {
31    dent: DirEntryInner,
32    err: Option<Error>,
33}
34
35impl DirEntry {
36    /// The full path that this entry represents.
37    pub fn path(&self) -> &Path {
38        self.dent.path()
39    }
40
41    /// The full path that this entry represents.
42    /// Analogous to [`DirEntry::path`], but moves ownership of the path.
43    pub fn into_path(self) -> PathBuf {
44        self.dent.into_path()
45    }
46
47    /// Whether this entry corresponds to a symbolic link or not.
48    pub fn path_is_symlink(&self) -> bool {
49        self.dent.path_is_symlink()
50    }
51
52    /// Returns true if and only if this entry corresponds to stdin.
53    ///
54    /// i.e., The entry has depth 0 and its file name is `-`.
55    pub fn is_stdin(&self) -> bool {
56        self.dent.is_stdin()
57    }
58
59    /// Return the metadata for the file that this entry points to.
60    pub fn metadata(&self) -> Result<Metadata, Error> {
61        self.dent.metadata()
62    }
63
64    /// Return the file type for the file that this entry points to.
65    ///
66    /// This entry doesn't have a file type if it corresponds to stdin.
67    pub fn file_type(&self) -> Option<FileType> {
68        self.dent.file_type()
69    }
70
71    /// Return the file name of this entry.
72    ///
73    /// If this entry has no file name (e.g., `/`), then the full path is
74    /// returned.
75    pub fn file_name(&self) -> &OsStr {
76        self.dent.file_name()
77    }
78
79    /// Returns the depth at which this entry was created relative to the root.
80    pub fn depth(&self) -> usize {
81        self.dent.depth()
82    }
83
84    /// Returns the underlying inode number if one exists.
85    ///
86    /// If this entry doesn't have an inode number, then `None` is returned.
87    #[cfg(unix)]
88    pub fn ino(&self) -> Option<u64> {
89        self.dent.ino()
90    }
91
92    /// Returns an error, if one exists, associated with processing this entry.
93    ///
94    /// An example of an error is one that occurred while parsing an ignore
95    /// file. Errors related to traversing a directory tree itself are reported
96    /// as part of yielding the directory entry, and not with this method.
97    pub fn error(&self) -> Option<&Error> {
98        self.err.as_ref()
99    }
100
101    /// Returns true if and only if this entry points to a directory.
102    pub(crate) fn is_dir(&self) -> bool {
103        self.dent.is_dir()
104    }
105
106    fn new_stdin() -> DirEntry {
107        DirEntry { dent: DirEntryInner::Stdin, err: None }
108    }
109
110    fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
111        DirEntry { dent: DirEntryInner::Walkdir(dent), err }
112    }
113
114    fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
115        DirEntry { dent: DirEntryInner::Raw(dent), err }
116    }
117}
118
119/// DirEntryInner is the implementation of DirEntry.
120///
121/// It specifically represents three distinct sources of directory entries:
122///
123/// 1. From the walkdir crate.
124/// 2. Special entries that represent things like stdin.
125/// 3. From a path.
126///
127/// Specifically, (3) has to essentially re-create the DirEntry implementation
128/// from WalkDir.
129#[derive(Clone, Debug)]
130enum DirEntryInner {
131    Stdin,
132    Walkdir(walkdir::DirEntry),
133    Raw(DirEntryRaw),
134}
135
136impl DirEntryInner {
137    fn path(&self) -> &Path {
138        use self::DirEntryInner::*;
139        match *self {
140            Stdin => Path::new("<stdin>"),
141            Walkdir(ref x) => x.path(),
142            Raw(ref x) => x.path(),
143        }
144    }
145
146    fn into_path(self) -> PathBuf {
147        use self::DirEntryInner::*;
148        match self {
149            Stdin => PathBuf::from("<stdin>"),
150            Walkdir(x) => x.into_path(),
151            Raw(x) => x.into_path(),
152        }
153    }
154
155    fn path_is_symlink(&self) -> bool {
156        use self::DirEntryInner::*;
157        match *self {
158            Stdin => false,
159            Walkdir(ref x) => x.path_is_symlink(),
160            Raw(ref x) => x.path_is_symlink(),
161        }
162    }
163
164    fn is_stdin(&self) -> bool {
165        match *self {
166            DirEntryInner::Stdin => true,
167            _ => false,
168        }
169    }
170
171    fn metadata(&self) -> Result<Metadata, Error> {
172        use self::DirEntryInner::*;
173        match *self {
174            Stdin => {
175                let err = Error::Io(io::Error::new(
176                    io::ErrorKind::Other,
177                    "<stdin> has no metadata",
178                ));
179                Err(err.with_path("<stdin>"))
180            }
181            Walkdir(ref x) => x.metadata().map_err(|err| {
182                Error::Io(io::Error::from(err)).with_path(x.path())
183            }),
184            Raw(ref x) => x.metadata(),
185        }
186    }
187
188    fn file_type(&self) -> Option<FileType> {
189        use self::DirEntryInner::*;
190        match *self {
191            Stdin => None,
192            Walkdir(ref x) => Some(x.file_type()),
193            Raw(ref x) => Some(x.file_type()),
194        }
195    }
196
197    fn file_name(&self) -> &OsStr {
198        use self::DirEntryInner::*;
199        match *self {
200            Stdin => OsStr::new("<stdin>"),
201            Walkdir(ref x) => x.file_name(),
202            Raw(ref x) => x.file_name(),
203        }
204    }
205
206    fn depth(&self) -> usize {
207        use self::DirEntryInner::*;
208        match *self {
209            Stdin => 0,
210            Walkdir(ref x) => x.depth(),
211            Raw(ref x) => x.depth(),
212        }
213    }
214
215    #[cfg(unix)]
216    fn ino(&self) -> Option<u64> {
217        use self::DirEntryInner::*;
218        use walkdir::DirEntryExt;
219        match *self {
220            Stdin => None,
221            Walkdir(ref x) => Some(x.ino()),
222            Raw(ref x) => Some(x.ino()),
223        }
224    }
225
226    /// Returns true if and only if this entry points to a directory.
227    fn is_dir(&self) -> bool {
228        self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
229    }
230}
231
232/// DirEntryRaw is essentially copied from the walkdir crate so that we can
233/// build `DirEntry`s from whole cloth in the parallel iterator.
234#[derive(Clone)]
235struct DirEntryRaw {
236    /// The path as reported by the `fs::ReadDir` iterator (even if it's a
237    /// symbolic link).
238    path: PathBuf,
239    /// The file type. Necessary for recursive iteration, so store it.
240    ty: FileType,
241    /// Is set when this entry was created from a symbolic link and the user
242    /// expects the iterator to follow symbolic links.
243    follow_link: bool,
244    /// The depth at which this entry was generated relative to the root.
245    depth: usize,
246    /// The underlying inode number (Unix only).
247    #[cfg(unix)]
248    ino: u64,
249    /// The underlying metadata (Windows only). We store this on Windows
250    /// because this comes for free while reading a directory.
251    #[cfg(windows)]
252    metadata: fs::Metadata,
253}
254
255impl std::fmt::Debug for DirEntryRaw {
256    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
257        // Leaving out FileType because it doesn't have a debug impl
258        // in Rust 1.9. We could add it if we really wanted to by manually
259        // querying each possibly file type. Meh. ---AG
260        f.debug_struct("DirEntryRaw")
261            .field("path", &self.path)
262            .field("follow_link", &self.follow_link)
263            .field("depth", &self.depth)
264            .finish()
265    }
266}
267
268impl DirEntryRaw {
269    fn path(&self) -> &Path {
270        &self.path
271    }
272
273    fn into_path(self) -> PathBuf {
274        self.path
275    }
276
277    fn path_is_symlink(&self) -> bool {
278        self.ty.is_symlink() || self.follow_link
279    }
280
281    fn metadata(&self) -> Result<Metadata, Error> {
282        self.metadata_internal()
283    }
284
285    #[cfg(windows)]
286    fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
287        if self.follow_link {
288            fs::metadata(&self.path)
289        } else {
290            Ok(self.metadata.clone())
291        }
292        .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
293    }
294
295    #[cfg(not(windows))]
296    fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
297        if self.follow_link {
298            fs::metadata(&self.path)
299        } else {
300            fs::symlink_metadata(&self.path)
301        }
302        .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
303    }
304
305    fn file_type(&self) -> FileType {
306        self.ty
307    }
308
309    fn file_name(&self) -> &OsStr {
310        self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
311    }
312
313    fn depth(&self) -> usize {
314        self.depth
315    }
316
317    #[cfg(unix)]
318    fn ino(&self) -> u64 {
319        self.ino
320    }
321
322    fn from_entry(
323        depth: usize,
324        ent: &fs::DirEntry,
325    ) -> Result<DirEntryRaw, Error> {
326        let ty = ent.file_type().map_err(|err| {
327            let err = Error::Io(io::Error::from(err)).with_path(ent.path());
328            Error::WithDepth { depth, err: Box::new(err) }
329        })?;
330        DirEntryRaw::from_entry_os(depth, ent, ty)
331    }
332
333    #[cfg(windows)]
334    fn from_entry_os(
335        depth: usize,
336        ent: &fs::DirEntry,
337        ty: fs::FileType,
338    ) -> Result<DirEntryRaw, Error> {
339        let md = ent.metadata().map_err(|err| {
340            let err = Error::Io(io::Error::from(err)).with_path(ent.path());
341            Error::WithDepth { depth, err: Box::new(err) }
342        })?;
343        Ok(DirEntryRaw {
344            path: ent.path(),
345            ty,
346            follow_link: false,
347            depth,
348            metadata: md,
349        })
350    }
351
352    #[cfg(unix)]
353    fn from_entry_os(
354        depth: usize,
355        ent: &fs::DirEntry,
356        ty: fs::FileType,
357    ) -> Result<DirEntryRaw, Error> {
358        use std::os::unix::fs::DirEntryExt;
359
360        Ok(DirEntryRaw {
361            path: ent.path(),
362            ty,
363            follow_link: false,
364            depth,
365            ino: ent.ino(),
366        })
367    }
368
369    // Placeholder implementation to allow compiling on non-standard platforms
370    // (e.g. wasm32).
371    #[cfg(not(any(windows, unix)))]
372    fn from_entry_os(
373        depth: usize,
374        ent: &fs::DirEntry,
375        ty: fs::FileType,
376    ) -> Result<DirEntryRaw, Error> {
377        Err(Error::Io(io::Error::new(
378            io::ErrorKind::Other,
379            "unsupported platform",
380        )))
381    }
382
383    #[cfg(windows)]
384    fn from_path(
385        depth: usize,
386        pb: PathBuf,
387        link: bool,
388    ) -> Result<DirEntryRaw, Error> {
389        let md =
390            fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
391        Ok(DirEntryRaw {
392            path: pb,
393            ty: md.file_type(),
394            follow_link: link,
395            depth,
396            metadata: md,
397        })
398    }
399
400    #[cfg(unix)]
401    fn from_path(
402        depth: usize,
403        pb: PathBuf,
404        link: bool,
405    ) -> Result<DirEntryRaw, Error> {
406        use std::os::unix::fs::MetadataExt;
407
408        let md =
409            fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
410        Ok(DirEntryRaw {
411            path: pb,
412            ty: md.file_type(),
413            follow_link: link,
414            depth,
415            ino: md.ino(),
416        })
417    }
418
419    // Placeholder implementation to allow compiling on non-standard platforms
420    // (e.g. wasm32).
421    #[cfg(not(any(windows, unix)))]
422    fn from_path(
423        depth: usize,
424        pb: PathBuf,
425        link: bool,
426    ) -> Result<DirEntryRaw, Error> {
427        Err(Error::Io(io::Error::new(
428            io::ErrorKind::Other,
429            "unsupported platform",
430        )))
431    }
432}
433
434/// WalkBuilder builds a recursive directory iterator.
435///
436/// The builder supports a large number of configurable options. This includes
437/// specific glob overrides, file type matching, toggling whether hidden
438/// files are ignored or not, and of course, support for respecting gitignore
439/// files.
440///
441/// By default, all ignore files found are respected. This includes `.ignore`,
442/// `.gitignore`, `.git/info/exclude` and even your global gitignore
443/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
444///
445/// Some standard recursive directory options are also supported, such as
446/// limiting the recursive depth or whether to follow symbolic links (disabled
447/// by default).
448///
449/// # Ignore rules
450///
451/// There are many rules that influence whether a particular file or directory
452/// is skipped by this iterator. Those rules are documented here. Note that
453/// the rules assume a default configuration.
454///
455/// * First, glob overrides are checked. If a path matches a glob override,
456/// then matching stops. The path is then only skipped if the glob that matched
457/// the path is an ignore glob. (An override glob is a whitelist glob unless it
458/// starts with a `!`, in which case it is an ignore glob.)
459/// * Second, ignore files are checked. Ignore files currently only come from
460/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
461/// global gitignore file), plain `.ignore` files, which have the same format
462/// as gitignore files, or explicitly added ignore files. The precedence order
463/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
464/// finally explicitly added ignore files. Note that precedence between
465/// different types of ignore files is not impacted by the directory hierarchy;
466/// any `.ignore` file overrides all `.gitignore` files. Within each precedence
467/// level, more nested ignore files have a higher precedence than less nested
468/// ignore files.
469/// * Third, if the previous step yields an ignore match, then all matching
470/// is stopped and the path is skipped. If it yields a whitelist match, then
471/// matching continues. A whitelist match can be overridden by a later matcher.
472/// * Fourth, unless the path is a directory, the file type matcher is run on
473/// the path. As above, if it yields an ignore match, then all matching is
474/// stopped and the path is skipped. If it yields a whitelist match, then
475/// matching continues.
476/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
477/// path is skipped.
478/// * Sixth, unless the path is a directory, the size of the file is compared
479/// against the max filesize limit. If it exceeds the limit, it is skipped.
480/// * Seventh, if the path has made it this far then it is yielded in the
481/// iterator.
482#[derive(Clone)]
483pub struct WalkBuilder {
484    paths: Vec<PathBuf>,
485    ig_builder: IgnoreBuilder,
486    max_depth: Option<usize>,
487    min_depth: Option<usize>,
488    max_filesize: Option<u64>,
489    follow_links: bool,
490    same_file_system: bool,
491    sorter: Option<Sorter>,
492    threads: usize,
493    skip: Option<Arc<Handle>>,
494    filter: Option<Filter>,
495    /// The directory that gitignores should be interpreted relative to.
496    ///
497    /// Usually this is the directory containing the gitignore file. But in
498    /// some cases, like for global gitignores or for gitignores specified
499    /// explicitly, this should generally be set to the current working
500    /// directory. This is only used for global gitignores or "explicit"
501    /// gitignores.
502    ///
503    /// When `None`, the CWD is fetched from `std::env::current_dir()`. If
504    /// that fails, then global gitignores are ignored (an error is logged).
505    global_gitignores_relative_to:
506        OnceLock<Result<PathBuf, Arc<std::io::Error>>>,
507}
508
509#[derive(Clone)]
510enum Sorter {
511    ByName(Arc<dyn Fn(&OsStr, &OsStr) -> Ordering + Send + Sync + 'static>),
512    ByPath(Arc<dyn Fn(&Path, &Path) -> Ordering + Send + Sync + 'static>),
513}
514
515#[derive(Clone)]
516struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
517
518impl std::fmt::Debug for WalkBuilder {
519    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
520        f.debug_struct("WalkBuilder")
521            .field("paths", &self.paths)
522            .field("ig_builder", &self.ig_builder)
523            .field("max_depth", &self.max_depth)
524            .field("min_depth", &self.min_depth)
525            .field("max_filesize", &self.max_filesize)
526            .field("follow_links", &self.follow_links)
527            .field("same_file_system", &self.same_file_system)
528            .field("sorter", &"<...>")
529            .field("threads", &self.threads)
530            .field("skip", &self.skip)
531            .field("filter", &"<...>")
532            .field(
533                "global_gitignores_relative_to",
534                &self.global_gitignores_relative_to,
535            )
536            .finish()
537    }
538}
539
540impl WalkBuilder {
541    /// Create a new builder for a recursive directory iterator for the
542    /// directory given.
543    ///
544    /// Note that if you want to traverse multiple different directories, it
545    /// is better to call `add` on this builder than to create multiple
546    /// `Walk` values.
547    pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
548        WalkBuilder {
549            paths: vec![path.as_ref().to_path_buf()],
550            ig_builder: IgnoreBuilder::new(),
551            max_depth: None,
552            min_depth: None,
553            max_filesize: None,
554            follow_links: false,
555            same_file_system: false,
556            sorter: None,
557            threads: 0,
558            skip: None,
559            filter: None,
560            global_gitignores_relative_to: OnceLock::new(),
561        }
562    }
563
564    /// Build a new `Walk` iterator.
565    pub fn build(&self) -> Walk {
566        let follow_links = self.follow_links;
567        let max_depth = self.max_depth;
568        let min_depth = self.min_depth;
569        let sorter = self.sorter.clone();
570        let its = self
571            .paths
572            .iter()
573            .map(move |p| {
574                if p == Path::new("-") {
575                    (p.to_path_buf(), None)
576                } else {
577                    let mut wd = WalkDir::new(p);
578                    wd = wd.follow_links(follow_links || p.is_file());
579                    wd = wd.same_file_system(self.same_file_system);
580                    if let Some(max_depth) = max_depth {
581                        wd = wd.max_depth(max_depth);
582                    }
583                    if let Some(min_depth) = min_depth {
584                        wd = wd.min_depth(min_depth);
585                    }
586                    if let Some(ref sorter) = sorter {
587                        match sorter.clone() {
588                            Sorter::ByName(cmp) => {
589                                wd = wd.sort_by(move |a, b| {
590                                    cmp(a.file_name(), b.file_name())
591                                });
592                            }
593                            Sorter::ByPath(cmp) => {
594                                wd = wd.sort_by(move |a, b| {
595                                    cmp(a.path(), b.path())
596                                });
597                            }
598                        }
599                    }
600                    (p.to_path_buf(), Some(WalkEventIter::from(wd)))
601                }
602            })
603            .collect::<Vec<_>>()
604            .into_iter();
605        let ig_root = self
606            .get_or_set_current_dir()
607            .map(|cwd| self.ig_builder.build_with_cwd(Some(cwd.to_path_buf())))
608            .unwrap_or_else(|| self.ig_builder.build());
609        Walk {
610            its,
611            it: None,
612            ig_root: ig_root.clone(),
613            ig: ig_root.clone(),
614            max_filesize: self.max_filesize,
615            skip: self.skip.clone(),
616            filter: self.filter.clone(),
617        }
618    }
619
620    /// Build a new `WalkParallel` iterator.
621    ///
622    /// Note that this *doesn't* return something that implements `Iterator`.
623    /// Instead, the returned value must be run with a closure. e.g.,
624    /// `builder.build_parallel().run(|| |path| { println!("{path:?}"); WalkState::Continue })`.
625    pub fn build_parallel(&self) -> WalkParallel {
626        let ig_root = self
627            .get_or_set_current_dir()
628            .map(|cwd| self.ig_builder.build_with_cwd(Some(cwd.to_path_buf())))
629            .unwrap_or_else(|| self.ig_builder.build());
630        WalkParallel {
631            paths: self.paths.clone().into_iter(),
632            ig_root,
633            max_depth: self.max_depth,
634            min_depth: self.min_depth,
635            max_filesize: self.max_filesize,
636            follow_links: self.follow_links,
637            same_file_system: self.same_file_system,
638            threads: self.threads,
639            skip: self.skip.clone(),
640            filter: self.filter.clone(),
641        }
642    }
643
644    /// Add a file path to the iterator.
645    ///
646    /// Each additional file path added is traversed recursively. This should
647    /// be preferred over building multiple `Walk` iterators since this
648    /// enables reusing resources across iteration.
649    pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
650        self.paths.push(path.as_ref().to_path_buf());
651        self
652    }
653
654    /// The maximum depth to recurse.
655    ///
656    /// The default, `None`, imposes no depth restriction.
657    pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
658        self.max_depth = depth;
659        if self.min_depth.is_some()
660            && self.max_depth.is_some()
661            && self.max_depth < self.min_depth
662        {
663            self.max_depth = self.min_depth;
664        }
665        self
666    }
667
668    /// The minimum depth to recurse.
669    ///
670    /// The default, `None`, imposes no minimum depth restriction.
671    pub fn min_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
672        self.min_depth = depth;
673        if self.max_depth.is_some()
674            && self.min_depth.is_some()
675            && self.min_depth > self.max_depth
676        {
677            self.min_depth = self.max_depth;
678        }
679        self
680    }
681
682    /// Whether to follow symbolic links or not.
683    pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
684        self.follow_links = yes;
685        self
686    }
687
688    /// Whether to ignore files above the specified limit.
689    pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
690        self.max_filesize = filesize;
691        self
692    }
693
694    /// The number of threads to use for traversal.
695    ///
696    /// Note that this only has an effect when using `build_parallel`.
697    ///
698    /// The default setting is `0`, which chooses the number of threads
699    /// automatically using heuristics.
700    pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
701        self.threads = n;
702        self
703    }
704
705    /// Add a global ignore file to the matcher.
706    ///
707    /// This has lower precedence than all other sources of ignore rules.
708    ///
709    /// # Errors
710    ///
711    /// If there was a problem adding the ignore file, then an error is
712    /// returned. Note that the error may indicate *partial* failure. For
713    /// example, if an ignore file contains an invalid glob, all other globs
714    /// are still applied.
715    ///
716    /// An error will also occur if this walker could not get the current
717    /// working directory (and `WalkBuilder::current_dir` isn't set).
718    pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
719        let path = path.as_ref();
720        let Some(cwd) = self.get_or_set_current_dir() else {
721            let err = std::io::Error::other(format!(
722                "CWD is not known, ignoring global gitignore {}",
723                path.display()
724            ));
725            return Some(err.into());
726        };
727        let mut builder = GitignoreBuilder::new(cwd);
728        let mut errs = PartialErrorBuilder::default();
729        errs.maybe_push(builder.add(path));
730        match builder.build() {
731            Ok(gi) => {
732                self.ig_builder.add_ignore(gi);
733            }
734            Err(err) => {
735                errs.push(err);
736            }
737        }
738        errs.into_error_option()
739    }
740
741    /// Add a custom ignore file name
742    ///
743    /// These ignore files have higher precedence than all other ignore files.
744    ///
745    /// When specifying multiple names, earlier names have lower precedence than
746    /// later names.
747    pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
748        &mut self,
749        file_name: S,
750    ) -> &mut WalkBuilder {
751        self.ig_builder.add_custom_ignore_filename(file_name);
752        self
753    }
754
755    /// Add an override matcher.
756    ///
757    /// By default, no override matcher is used.
758    ///
759    /// This overrides any previous setting.
760    pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
761        self.ig_builder.overrides(overrides);
762        self
763    }
764
765    /// Add a file type matcher.
766    ///
767    /// By default, no file type matcher is used.
768    ///
769    /// This overrides any previous setting.
770    pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
771        self.ig_builder.types(types);
772        self
773    }
774
775    /// Enables all the standard ignore filters.
776    ///
777    /// This toggles, as a group, all the filters that are enabled by default:
778    ///
779    /// - [hidden()](#method.hidden)
780    /// - [parents()](#method.parents)
781    /// - [ignore()](#method.ignore)
782    /// - [git_ignore()](#method.git_ignore)
783    /// - [git_global()](#method.git_global)
784    /// - [git_exclude()](#method.git_exclude)
785    ///
786    /// They may still be toggled individually after calling this function.
787    ///
788    /// This is (by definition) enabled by default.
789    pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
790        self.hidden(yes)
791            .parents(yes)
792            .ignore(yes)
793            .git_ignore(yes)
794            .git_global(yes)
795            .git_exclude(yes)
796    }
797
798    /// Enables ignoring hidden files.
799    ///
800    /// This is enabled by default.
801    pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
802        self.ig_builder.hidden(yes);
803        self
804    }
805
806    /// Enables reading ignore files from parent directories.
807    ///
808    /// If this is enabled, then .gitignore files in parent directories of each
809    /// file path given are respected. Otherwise, they are ignored.
810    ///
811    /// This is enabled by default.
812    pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
813        self.ig_builder.parents(yes);
814        self
815    }
816
817    /// Enables reading `.ignore` files.
818    ///
819    /// `.ignore` files have the same semantics as `gitignore` files and are
820    /// supported by search tools such as ripgrep and The Silver Searcher.
821    ///
822    /// This is enabled by default.
823    pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
824        self.ig_builder.ignore(yes);
825        self
826    }
827
828    /// Enables reading a global gitignore file, whose path is specified in
829    /// git's `core.excludesFile` config option.
830    ///
831    /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
832    /// does not exist or does not specify `core.excludesFile`, then
833    /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
834    /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
835    ///
836    /// This is enabled by default.
837    pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
838        self.ig_builder.git_global(yes);
839        self
840    }
841
842    /// Enables reading `.gitignore` files.
843    ///
844    /// `.gitignore` files have match semantics as described in the `gitignore`
845    /// man page.
846    ///
847    /// This is enabled by default.
848    pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
849        self.ig_builder.git_ignore(yes);
850        self
851    }
852
853    /// Enables reading `.git/info/exclude` files.
854    ///
855    /// `.git/info/exclude` files have match semantics as described in the
856    /// `gitignore` man page.
857    ///
858    /// This is enabled by default.
859    pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
860        self.ig_builder.git_exclude(yes);
861        self
862    }
863
864    /// Whether a git repository is required to apply git-related ignore
865    /// rules (global rules, .gitignore and local exclude rules).
866    ///
867    /// When disabled, git-related ignore rules are applied even when searching
868    /// outside a git repository.
869    ///
870    /// In particular, if this is `false` then `.gitignore` files will be read
871    /// from parent directories above the git root directory containing `.git`,
872    /// which is different from the git behavior.
873    pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
874        self.ig_builder.require_git(yes);
875        self
876    }
877
878    /// Process ignore files case insensitively
879    ///
880    /// This is disabled by default.
881    pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
882        self.ig_builder.ignore_case_insensitive(yes);
883        self
884    }
885
886    /// Set a function for sorting directory entries by their path.
887    ///
888    /// If a compare function is set, the resulting iterator will return all
889    /// paths in sorted order. The compare function will be called to compare
890    /// entries from the same directory.
891    ///
892    /// This is like `sort_by_file_name`, except the comparator accepts
893    /// a `&Path` instead of the base file name, which permits it to sort by
894    /// more criteria.
895    ///
896    /// This method will override any previous sorter set by this method or
897    /// by `sort_by_file_name`.
898    ///
899    /// Note that this is not used in the parallel iterator.
900    pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
901    where
902        F: Fn(&Path, &Path) -> Ordering + Send + Sync + 'static,
903    {
904        self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
905        self
906    }
907
908    /// Set a function for sorting directory entries by file name.
909    ///
910    /// If a compare function is set, the resulting iterator will return all
911    /// paths in sorted order. The compare function will be called to compare
912    /// names from entries from the same directory using only the name of the
913    /// entry.
914    ///
915    /// This method will override any previous sorter set by this method or
916    /// by `sort_by_file_path`.
917    ///
918    /// Note that this is not used in the parallel iterator.
919    pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
920    where
921        F: Fn(&OsStr, &OsStr) -> Ordering + Send + Sync + 'static,
922    {
923        self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
924        self
925    }
926
927    /// Do not cross file system boundaries.
928    ///
929    /// When this option is enabled, directory traversal will not descend into
930    /// directories that are on a different file system from the root path.
931    ///
932    /// Currently, this option is only supported on Unix and Windows. If this
933    /// option is used on an unsupported platform, then directory traversal
934    /// will immediately return an error and will not yield any entries.
935    pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
936        self.same_file_system = yes;
937        self
938    }
939
940    /// Do not yield directory entries that are believed to correspond to
941    /// stdout.
942    ///
943    /// This is useful when a command is invoked via shell redirection to a
944    /// file that is also being read. For example, `grep -r foo ./ > results`
945    /// might end up trying to search `results` even though it is also writing
946    /// to it, which could cause an unbounded feedback loop. Setting this
947    /// option prevents this from happening by skipping over the `results`
948    /// file.
949    ///
950    /// This is disabled by default.
951    pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
952        if yes {
953            self.skip = stdout_handle().map(Arc::new);
954        } else {
955            self.skip = None;
956        }
957        self
958    }
959
960    /// Yields only entries which satisfy the given predicate and skips
961    /// descending into directories that do not satisfy the given predicate.
962    ///
963    /// The predicate is applied to all entries. If the predicate is
964    /// true, iteration carries on as normal. If the predicate is false, the
965    /// entry is ignored and if it is a directory, it is not descended into.
966    ///
967    /// Note that the errors for reading entries that may not satisfy the
968    /// predicate will still be yielded.
969    ///
970    /// Note also that only one filter predicate can be applied to a
971    /// `WalkBuilder`. Calling this subsequent times overrides previous filter
972    /// predicates.
973    pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
974    where
975        P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
976    {
977        self.filter = Some(Filter(Arc::new(filter)));
978        self
979    }
980
981    /// Set the current working directory used for matching global gitignores.
982    ///
983    /// If this is not set, then this walker will attempt to discover the
984    /// correct path from the environment's current working directory. If
985    /// that fails, then global gitignore files will be ignored.
986    ///
987    /// Global gitignore files come from things like a user's git configuration
988    /// or from gitignore files added via [`WalkBuilder::add_ignore`].
989    pub fn current_dir(
990        &mut self,
991        cwd: impl Into<PathBuf>,
992    ) -> &mut WalkBuilder {
993        let cwd = cwd.into();
994        self.ig_builder.current_dir(cwd.clone());
995        if let Err(cwd) = self.global_gitignores_relative_to.set(Ok(cwd)) {
996            // OK because `Err` from `set` implies a value exists.
997            *self.global_gitignores_relative_to.get_mut().unwrap() = cwd;
998        }
999        self
1000    }
1001
1002    /// Gets the currently configured CWD on this walk builder.
1003    ///
1004    /// This is "lazy." That is, we only ask for the CWD from the environment
1005    /// if `WalkBuilder::current_dir` hasn't been called yet. And we ensure
1006    /// that we only do it once.
1007    fn get_or_set_current_dir(&self) -> Option<&Path> {
1008        let result = self.global_gitignores_relative_to.get_or_init(|| {
1009            let result = std::env::current_dir().map_err(Arc::new);
1010            match result {
1011                Ok(ref path) => {
1012                    log::trace!(
1013                        "automatically discovered CWD: {}",
1014                        path.display()
1015                    );
1016                }
1017                Err(ref err) => {
1018                    log::debug!(
1019                        "failed to find CWD \
1020                         (global gitignores will be ignored): \
1021                         {err}"
1022                    );
1023                }
1024            }
1025            result
1026        });
1027        result.as_ref().ok().map(|path| &**path)
1028    }
1029}
1030
1031/// Walk is a recursive directory iterator over file paths in one or more
1032/// directories.
1033///
1034/// Only file and directory paths matching the rules are returned. By default,
1035/// ignore files like `.gitignore` are respected. The precise matching rules
1036/// and precedence is explained in the documentation for `WalkBuilder`.
1037pub struct Walk {
1038    its: std::vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
1039    it: Option<WalkEventIter>,
1040    ig_root: Ignore,
1041    ig: Ignore,
1042    max_filesize: Option<u64>,
1043    skip: Option<Arc<Handle>>,
1044    filter: Option<Filter>,
1045}
1046
1047impl Walk {
1048    /// Creates a new recursive directory iterator for the file path given.
1049    ///
1050    /// Note that this uses default settings, which include respecting
1051    /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
1052    /// instead.
1053    pub fn new<P: AsRef<Path>>(path: P) -> Walk {
1054        WalkBuilder::new(path).build()
1055    }
1056
1057    fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
1058        if ent.depth() == 0 {
1059            return Ok(false);
1060        }
1061        // We ensure that trivial skipping is done before any other potentially
1062        // expensive operations (stat, filesystem other) are done. This seems
1063        // like an obvious optimization but becomes critical when filesystem
1064        // operations even as simple as stat can result in significant
1065        // overheads; an example of this was a bespoke filesystem layer in
1066        // Windows that hosted files remotely and would download them on-demand
1067        // when particular filesystem operations occurred. Users of this system
1068        // who ensured correct file-type filters were being used could still
1069        // get unnecessary file access resulting in large downloads.
1070        if should_skip_entry(&self.ig, ent) {
1071            return Ok(true);
1072        }
1073        if let Some(ref stdout) = self.skip {
1074            if path_equals(ent, stdout)? {
1075                return Ok(true);
1076            }
1077        }
1078        if self.max_filesize.is_some() && !ent.is_dir() {
1079            return Ok(skip_filesize(
1080                self.max_filesize.unwrap(),
1081                ent.path(),
1082                &ent.metadata().ok(),
1083            ));
1084        }
1085        if let Some(Filter(filter)) = &self.filter {
1086            if !filter(ent) {
1087                return Ok(true);
1088            }
1089        }
1090        Ok(false)
1091    }
1092}
1093
1094impl Iterator for Walk {
1095    type Item = Result<DirEntry, Error>;
1096
1097    #[inline(always)]
1098    fn next(&mut self) -> Option<Result<DirEntry, Error>> {
1099        loop {
1100            let ev = match self.it.as_mut().and_then(|it| it.next()) {
1101                Some(ev) => ev,
1102                None => {
1103                    match self.its.next() {
1104                        None => return None,
1105                        Some((_, None)) => {
1106                            return Some(Ok(DirEntry::new_stdin()));
1107                        }
1108                        Some((path, Some(it))) => {
1109                            self.it = Some(it);
1110                            if path.is_dir() {
1111                                let (ig, err) = self.ig_root.add_parents(path);
1112                                self.ig = ig;
1113                                if let Some(err) = err {
1114                                    return Some(Err(err));
1115                                }
1116                            } else {
1117                                self.ig = self.ig_root.clone();
1118                            }
1119                        }
1120                    }
1121                    continue;
1122                }
1123            };
1124            match ev {
1125                Err(err) => {
1126                    return Some(Err(Error::from_walkdir(err)));
1127                }
1128                Ok(WalkEvent::Exit) => {
1129                    self.ig = self.ig.parent().unwrap();
1130                }
1131                Ok(WalkEvent::Dir(ent)) => {
1132                    let mut ent = DirEntry::new_walkdir(ent, None);
1133                    let should_skip = match self.skip_entry(&ent) {
1134                        Err(err) => return Some(Err(err)),
1135                        Ok(should_skip) => should_skip,
1136                    };
1137                    if should_skip {
1138                        self.it.as_mut().unwrap().it.skip_current_dir();
1139                        // Still need to push this on the stack because
1140                        // we'll get a WalkEvent::Exit event for this dir.
1141                        // We don't care if it errors though.
1142                        let (igtmp, _) = self.ig.add_child(ent.path());
1143                        self.ig = igtmp;
1144                        continue;
1145                    }
1146                    let (igtmp, err) = self.ig.add_child(ent.path());
1147                    self.ig = igtmp;
1148                    ent.err = err;
1149                    return Some(Ok(ent));
1150                }
1151                Ok(WalkEvent::File(ent)) => {
1152                    let ent = DirEntry::new_walkdir(ent, None);
1153                    let should_skip = match self.skip_entry(&ent) {
1154                        Err(err) => return Some(Err(err)),
1155                        Ok(should_skip) => should_skip,
1156                    };
1157                    if should_skip {
1158                        continue;
1159                    }
1160                    return Some(Ok(ent));
1161                }
1162            }
1163        }
1164    }
1165}
1166
1167impl std::iter::FusedIterator for Walk {}
1168
1169/// WalkEventIter transforms a WalkDir iterator into an iterator that more
1170/// accurately describes the directory tree. Namely, it emits events that are
1171/// one of three types: directory, file or "exit." An "exit" event means that
1172/// the entire contents of a directory have been enumerated.
1173struct WalkEventIter {
1174    depth: usize,
1175    it: walkdir::IntoIter,
1176    next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
1177}
1178
1179#[derive(Debug)]
1180enum WalkEvent {
1181    Dir(walkdir::DirEntry),
1182    File(walkdir::DirEntry),
1183    Exit,
1184}
1185
1186impl From<WalkDir> for WalkEventIter {
1187    fn from(it: WalkDir) -> WalkEventIter {
1188        WalkEventIter { depth: 0, it: it.into_iter(), next: None }
1189    }
1190}
1191
1192impl Iterator for WalkEventIter {
1193    type Item = walkdir::Result<WalkEvent>;
1194
1195    #[inline(always)]
1196    fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
1197        let dent = self.next.take().or_else(|| self.it.next());
1198        let depth = match dent {
1199            None => 0,
1200            Some(Ok(ref dent)) => dent.depth(),
1201            Some(Err(ref err)) => err.depth(),
1202        };
1203        if depth < self.depth {
1204            self.depth -= 1;
1205            self.next = dent;
1206            return Some(Ok(WalkEvent::Exit));
1207        }
1208        self.depth = depth;
1209        match dent {
1210            None => None,
1211            Some(Err(err)) => Some(Err(err)),
1212            Some(Ok(dent)) => {
1213                if walkdir_is_dir(&dent) {
1214                    self.depth += 1;
1215                    Some(Ok(WalkEvent::Dir(dent)))
1216                } else {
1217                    Some(Ok(WalkEvent::File(dent)))
1218                }
1219            }
1220        }
1221    }
1222}
1223
1224/// WalkState is used in the parallel recursive directory iterator to indicate
1225/// whether walking should continue as normal, skip descending into a
1226/// particular directory or quit the walk entirely.
1227#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1228pub enum WalkState {
1229    /// Continue walking as normal.
1230    Continue,
1231    /// If the directory entry given is a directory, don't descend into it.
1232    /// In all other cases, this has no effect.
1233    Skip,
1234    /// Quit the entire iterator as soon as possible.
1235    ///
1236    /// Note that this is an inherently asynchronous action. It is possible
1237    /// for more entries to be yielded even after instructing the iterator
1238    /// to quit.
1239    Quit,
1240}
1241
1242impl WalkState {
1243    fn is_continue(&self) -> bool {
1244        *self == WalkState::Continue
1245    }
1246
1247    fn is_quit(&self) -> bool {
1248        *self == WalkState::Quit
1249    }
1250}
1251
1252/// A builder for constructing a visitor when using [`WalkParallel::visit`].
1253/// The builder will be called for each thread started by `WalkParallel`. The
1254/// visitor returned from each builder is then called for every directory
1255/// entry.
1256pub trait ParallelVisitorBuilder<'s> {
1257    /// Create per-thread `ParallelVisitor`s for `WalkParallel`.
1258    fn build(&mut self) -> Box<dyn ParallelVisitor + 's>;
1259}
1260
1261impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s>
1262    for &'a mut P
1263{
1264    fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1265        (**self).build()
1266    }
1267}
1268
1269/// Receives files and directories for the current thread.
1270///
1271/// Setup for the traversal can be implemented as part of
1272/// [`ParallelVisitorBuilder::build`]. Teardown when traversal finishes can be
1273/// implemented by implementing the `Drop` trait on your traversal type.
1274pub trait ParallelVisitor: Send {
1275    /// Receives files and directories for the current thread. This is called
1276    /// once for every directory entry visited by traversal.
1277    fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState;
1278}
1279
1280struct FnBuilder<F> {
1281    builder: F,
1282}
1283
1284impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s>
1285    for FnBuilder<F>
1286{
1287    fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1288        let visitor = (self.builder)();
1289        Box::new(FnVisitorImp { visitor })
1290    }
1291}
1292
1293type FnVisitor<'s> =
1294    Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 's>;
1295
1296struct FnVisitorImp<'s> {
1297    visitor: FnVisitor<'s>,
1298}
1299
1300impl<'s> ParallelVisitor for FnVisitorImp<'s> {
1301    fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState {
1302        (self.visitor)(entry)
1303    }
1304}
1305
1306/// WalkParallel is a parallel recursive directory iterator over files paths
1307/// in one or more directories.
1308///
1309/// Only file and directory paths matching the rules are returned. By default,
1310/// ignore files like `.gitignore` are respected. The precise matching rules
1311/// and precedence is explained in the documentation for `WalkBuilder`.
1312///
1313/// Unlike `Walk`, this uses multiple threads for traversing a directory.
1314pub struct WalkParallel {
1315    paths: std::vec::IntoIter<PathBuf>,
1316    ig_root: Ignore,
1317    max_filesize: Option<u64>,
1318    max_depth: Option<usize>,
1319    min_depth: Option<usize>,
1320    follow_links: bool,
1321    same_file_system: bool,
1322    threads: usize,
1323    skip: Option<Arc<Handle>>,
1324    filter: Option<Filter>,
1325}
1326
1327impl WalkParallel {
1328    /// Execute the parallel recursive directory iterator. `mkf` is called
1329    /// for each thread used for iteration. The function produced by `mkf`
1330    /// is then in turn called for each visited file path.
1331    pub fn run<'s, F>(self, mkf: F)
1332    where
1333        F: FnMut() -> FnVisitor<'s>,
1334    {
1335        self.visit(&mut FnBuilder { builder: mkf })
1336    }
1337
1338    /// Execute the parallel recursive directory iterator using a custom
1339    /// visitor.
1340    ///
1341    /// The builder given is used to construct a visitor for every thread
1342    /// used by this traversal. The visitor returned from each builder is then
1343    /// called for every directory entry seen by that thread.
1344    ///
1345    /// Typically, creating a custom visitor is useful if you need to perform
1346    /// some kind of cleanup once traversal is finished. This can be achieved
1347    /// by implementing `Drop` for your builder (or for your visitor, if you
1348    /// want to execute cleanup for every thread that is launched).
1349    ///
1350    /// For example, each visitor might build up a data structure of results
1351    /// corresponding to the directory entries seen for each thread. Since each
1352    /// visitor runs on only one thread, this build-up can be done without
1353    /// synchronization. Then, once traversal is complete, all of the results
1354    /// can be merged together into a single data structure.
1355    pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) {
1356        let threads = self.threads();
1357        let mut stack = vec![];
1358        {
1359            let mut visitor = builder.build();
1360            let mut paths = Vec::new().into_iter();
1361            std::mem::swap(&mut paths, &mut self.paths);
1362            // Send the initial set of root paths to the pool of workers. Note
1363            // that we only send directories. For files, we send to them the
1364            // callback directly.
1365            for path in paths {
1366                let (dent, root_device) = if path == Path::new("-") {
1367                    (DirEntry::new_stdin(), None)
1368                } else {
1369                    let root_device = if !self.same_file_system {
1370                        None
1371                    } else {
1372                        match device_num(&path) {
1373                            Ok(root_device) => Some(root_device),
1374                            Err(err) => {
1375                                let err = Error::Io(err).with_path(path);
1376                                if visitor.visit(Err(err)).is_quit() {
1377                                    return;
1378                                }
1379                                continue;
1380                            }
1381                        }
1382                    };
1383                    match DirEntryRaw::from_path(0, path, false) {
1384                        Ok(dent) => {
1385                            (DirEntry::new_raw(dent, None), root_device)
1386                        }
1387                        Err(err) => {
1388                            if visitor.visit(Err(err)).is_quit() {
1389                                return;
1390                            }
1391                            continue;
1392                        }
1393                    }
1394                };
1395                stack.push(Message::Work(Work {
1396                    dent,
1397                    ignore: self.ig_root.clone(),
1398                    root_device,
1399                }));
1400            }
1401            // ... but there's no need to start workers if we don't need them.
1402            if stack.is_empty() {
1403                return;
1404            }
1405        }
1406        // Create the workers and then wait for them to finish.
1407        let quit_now = Arc::new(AtomicBool::new(false));
1408        let active_workers = Arc::new(AtomicUsize::new(threads));
1409        let stacks = Stack::new_for_each_thread(threads, stack);
1410        std::thread::scope(|s| {
1411            let handles: Vec<_> = stacks
1412                .into_iter()
1413                .map(|stack| Worker {
1414                    visitor: builder.build(),
1415                    stack,
1416                    quit_now: quit_now.clone(),
1417                    active_workers: active_workers.clone(),
1418                    max_depth: self.max_depth,
1419                    min_depth: self.min_depth,
1420                    max_filesize: self.max_filesize,
1421                    follow_links: self.follow_links,
1422                    skip: self.skip.clone(),
1423                    filter: self.filter.clone(),
1424                })
1425                .map(|worker| s.spawn(|| worker.run()))
1426                .collect();
1427            for handle in handles {
1428                handle.join().unwrap();
1429            }
1430        });
1431    }
1432
1433    fn threads(&self) -> usize {
1434        if self.threads == 0 {
1435            std::thread::available_parallelism().map_or(1, |n| n.get()).min(12)
1436        } else {
1437            self.threads
1438        }
1439    }
1440}
1441
1442/// Message is the set of instructions that a worker knows how to process.
1443enum Message {
1444    /// A work item corresponds to a directory that should be descended into.
1445    /// Work items for entries that should be skipped or ignored should not
1446    /// be produced.
1447    Work(Work),
1448    /// This instruction indicates that the worker should quit.
1449    Quit,
1450}
1451
1452/// A unit of work for each worker to process.
1453///
1454/// Each unit of work corresponds to a directory that should be descended
1455/// into.
1456struct Work {
1457    /// The directory entry.
1458    dent: DirEntry,
1459    /// Any ignore matchers that have been built for this directory's parents.
1460    ignore: Ignore,
1461    /// The root device number. When present, only files with the same device
1462    /// number should be considered.
1463    root_device: Option<u64>,
1464}
1465
1466impl Work {
1467    /// Returns true if and only if this work item is a directory.
1468    fn is_dir(&self) -> bool {
1469        self.dent.is_dir()
1470    }
1471
1472    /// Returns true if and only if this work item is a symlink.
1473    fn is_symlink(&self) -> bool {
1474        self.dent.file_type().map_or(false, |ft| ft.is_symlink())
1475    }
1476
1477    /// Adds ignore rules for parent directories.
1478    ///
1479    /// Note that this only applies to entries at depth 0. On all other
1480    /// entries, this is a no-op.
1481    fn add_parents(&mut self) -> Option<Error> {
1482        if self.dent.depth() > 0 {
1483            return None;
1484        }
1485        // At depth 0, the path of this entry is a root path, so we can
1486        // use it directly to add parent ignore rules.
1487        let (ig, err) = self.ignore.add_parents(self.dent.path());
1488        self.ignore = ig;
1489        err
1490    }
1491
1492    /// Reads the directory contents of this work item and adds ignore
1493    /// rules for this directory.
1494    ///
1495    /// If there was a problem with reading the directory contents, then
1496    /// an error is returned. If there was a problem reading the ignore
1497    /// rules for this directory, then the error is attached to this
1498    /// work item's directory entry.
1499    fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
1500        let readdir = match fs::read_dir(self.dent.path()) {
1501            Ok(readdir) => readdir,
1502            Err(err) => {
1503                let err = Error::from(err)
1504                    .with_path(self.dent.path())
1505                    .with_depth(self.dent.depth());
1506                return Err(err);
1507            }
1508        };
1509        let (ig, err) = self.ignore.add_child(self.dent.path());
1510        self.ignore = ig;
1511        self.dent.err = err;
1512        Ok(readdir)
1513    }
1514}
1515
1516/// A work-stealing stack.
1517#[derive(Debug)]
1518struct Stack {
1519    /// This thread's index.
1520    index: usize,
1521    /// The thread-local stack.
1522    deque: Deque<Message>,
1523    /// The work stealers.
1524    stealers: Arc<[Stealer<Message>]>,
1525}
1526
1527impl Stack {
1528    /// Create a work-stealing stack for each thread. The given messages
1529    /// correspond to the initial paths to start the search at. They will
1530    /// be distributed automatically to each stack in a round-robin fashion.
1531    fn new_for_each_thread(threads: usize, init: Vec<Message>) -> Vec<Stack> {
1532        // Using new_lifo() ensures each worker operates depth-first, not
1533        // breadth-first. We do depth-first because a breadth first traversal
1534        // on wide directories with a lot of gitignores is disastrous (for
1535        // example, searching a directory tree containing all of crates.io).
1536        let deques: Vec<Deque<Message>> =
1537            std::iter::repeat_with(Deque::new_lifo).take(threads).collect();
1538        let stealers = Arc::<[Stealer<Message>]>::from(
1539            deques.iter().map(Deque::stealer).collect::<Vec<_>>(),
1540        );
1541        let stacks: Vec<Stack> = deques
1542            .into_iter()
1543            .enumerate()
1544            .map(|(index, deque)| Stack {
1545                index,
1546                deque,
1547                stealers: stealers.clone(),
1548            })
1549            .collect();
1550        // Distribute the initial messages, reverse the order to cancel out
1551        // the other reversal caused by the inherent LIFO processing of the
1552        // per-thread stacks which are filled here.
1553        init.into_iter()
1554            .rev()
1555            .zip(stacks.iter().cycle())
1556            .for_each(|(m, s)| s.push(m));
1557        stacks
1558    }
1559
1560    /// Push a message.
1561    fn push(&self, msg: Message) {
1562        self.deque.push(msg);
1563    }
1564
1565    /// Pop a message.
1566    fn pop(&self) -> Option<Message> {
1567        self.deque.pop().or_else(|| self.steal())
1568    }
1569
1570    /// Steal a message from another queue.
1571    fn steal(&self) -> Option<Message> {
1572        // For fairness, try to steal from index + 1, index + 2, ... len - 1,
1573        // then wrap around to 0, 1, ... index - 1.
1574        let (left, right) = self.stealers.split_at(self.index);
1575        // Don't steal from ourselves
1576        let right = &right[1..];
1577
1578        right
1579            .iter()
1580            .chain(left.iter())
1581            .map(|s| s.steal_batch_and_pop(&self.deque))
1582            .find_map(|s| s.success())
1583    }
1584}
1585
1586/// A worker is responsible for descending into directories, updating the
1587/// ignore matchers, producing new work and invoking the caller's callback.
1588///
1589/// Note that a worker is *both* a producer and a consumer.
1590struct Worker<'s> {
1591    /// The caller's callback.
1592    visitor: Box<dyn ParallelVisitor + 's>,
1593    /// A work-stealing stack of work to do.
1594    ///
1595    /// We use a stack instead of a channel because a stack lets us visit
1596    /// directories in depth first order. This can substantially reduce peak
1597    /// memory usage by keeping both the number of file paths and gitignore
1598    /// matchers in memory lower.
1599    stack: Stack,
1600    /// Whether all workers should terminate at the next opportunity. Note
1601    /// that we need this because we don't want other `Work` to be done after
1602    /// we quit. We wouldn't need this if have a priority channel.
1603    quit_now: Arc<AtomicBool>,
1604    /// The number of currently active workers.
1605    active_workers: Arc<AtomicUsize>,
1606    /// The maximum depth of directories to descend. A value of `0` means no
1607    /// descension at all.
1608    max_depth: Option<usize>,
1609    /// The minimum depth of directories to descend.
1610    min_depth: Option<usize>,
1611    /// The maximum size a searched file can be (in bytes). If a file exceeds
1612    /// this size it will be skipped.
1613    max_filesize: Option<u64>,
1614    /// Whether to follow symbolic links or not. When this is enabled, loop
1615    /// detection is performed.
1616    follow_links: bool,
1617    /// A file handle to skip, currently is either `None` or stdout, if it's
1618    /// a file and it has been requested to skip files identical to stdout.
1619    skip: Option<Arc<Handle>>,
1620    /// A predicate applied to dir entries. If true, the entry and all
1621    /// children will be skipped.
1622    filter: Option<Filter>,
1623}
1624
1625impl<'s> Worker<'s> {
1626    /// Runs this worker until there is no more work left to do.
1627    ///
1628    /// The worker will call the caller's callback for all entries that aren't
1629    /// skipped by the ignore matcher.
1630    fn run(mut self) {
1631        while let Some(work) = self.get_work() {
1632            if let WalkState::Quit = self.run_one(work) {
1633                self.quit_now();
1634            }
1635        }
1636    }
1637
1638    fn run_one(&mut self, mut work: Work) -> WalkState {
1639        let should_visit = self
1640            .min_depth
1641            .map(|min_depth| work.dent.depth() >= min_depth)
1642            .unwrap_or(true);
1643
1644        // If the work is not a directory, then we can just execute the
1645        // caller's callback immediately and move on.
1646        if work.is_symlink() || !work.is_dir() {
1647            return if should_visit {
1648                self.visitor.visit(Ok(work.dent))
1649            } else {
1650                WalkState::Continue
1651            };
1652        }
1653        if let Some(err) = work.add_parents() {
1654            let state = self.visitor.visit(Err(err));
1655            if state.is_quit() {
1656                return state;
1657            }
1658        }
1659
1660        let descend = if let Some(root_device) = work.root_device {
1661            match is_same_file_system(root_device, work.dent.path()) {
1662                Ok(true) => true,
1663                Ok(false) => false,
1664                Err(err) => {
1665                    let state = self.visitor.visit(Err(err));
1666                    if state.is_quit() {
1667                        return state;
1668                    }
1669                    false
1670                }
1671            }
1672        } else {
1673            true
1674        };
1675
1676        // Try to read the directory first before we transfer ownership
1677        // to the provided closure. Do not unwrap it immediately, though,
1678        // as we may receive an `Err` value e.g. in the case when we do not
1679        // have sufficient read permissions to list the directory.
1680        // In that case we still want to provide the closure with a valid
1681        // entry before passing the error value.
1682        let readdir = work.read_dir();
1683        let depth = work.dent.depth();
1684        if should_visit {
1685            let state = self.visitor.visit(Ok(work.dent));
1686            if !state.is_continue() {
1687                return state;
1688            }
1689        }
1690        if !descend {
1691            return WalkState::Skip;
1692        }
1693
1694        let readdir = match readdir {
1695            Ok(readdir) => readdir,
1696            Err(err) => {
1697                return self.visitor.visit(Err(err));
1698            }
1699        };
1700
1701        if self.max_depth.map_or(false, |max| depth >= max) {
1702            return WalkState::Skip;
1703        }
1704        for result in readdir {
1705            let state = self.generate_work(
1706                &work.ignore,
1707                depth + 1,
1708                work.root_device,
1709                result,
1710            );
1711            if state.is_quit() {
1712                return state;
1713            }
1714        }
1715        WalkState::Continue
1716    }
1717
1718    /// Decides whether to submit the given directory entry as a file to
1719    /// search.
1720    ///
1721    /// If the entry is a path that should be ignored, then this is a no-op.
1722    /// Otherwise, the entry is pushed on to the queue. (The actual execution
1723    /// of the callback happens in `run_one`.)
1724    ///
1725    /// If an error occurs while reading the entry, then it is sent to the
1726    /// caller's callback.
1727    ///
1728    /// `ig` is the `Ignore` matcher for the parent directory. `depth` should
1729    /// be the depth of this entry. `result` should be the item yielded by
1730    /// a directory iterator.
1731    fn generate_work(
1732        &mut self,
1733        ig: &Ignore,
1734        depth: usize,
1735        root_device: Option<u64>,
1736        result: Result<fs::DirEntry, io::Error>,
1737    ) -> WalkState {
1738        let fs_dent = match result {
1739            Ok(fs_dent) => fs_dent,
1740            Err(err) => {
1741                return self
1742                    .visitor
1743                    .visit(Err(Error::from(err).with_depth(depth)));
1744            }
1745        };
1746        let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) {
1747            Ok(dent) => DirEntry::new_raw(dent, None),
1748            Err(err) => {
1749                return self.visitor.visit(Err(err));
1750            }
1751        };
1752        let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink());
1753        if self.follow_links && is_symlink {
1754            let path = dent.path().to_path_buf();
1755            dent = match DirEntryRaw::from_path(depth, path, true) {
1756                Ok(dent) => DirEntry::new_raw(dent, None),
1757                Err(err) => {
1758                    return self.visitor.visit(Err(err));
1759                }
1760            };
1761            if dent.is_dir() {
1762                if let Err(err) = check_symlink_loop(ig, dent.path(), depth) {
1763                    return self.visitor.visit(Err(err));
1764                }
1765            }
1766        }
1767        // N.B. See analogous call in the single-threaded implementation about
1768        // why it's important for this to come before the checks below.
1769        if should_skip_entry(ig, &dent) {
1770            return WalkState::Continue;
1771        }
1772        if let Some(ref stdout) = self.skip {
1773            let is_stdout = match path_equals(&dent, stdout) {
1774                Ok(is_stdout) => is_stdout,
1775                Err(err) => return self.visitor.visit(Err(err)),
1776            };
1777            if is_stdout {
1778                return WalkState::Continue;
1779            }
1780        }
1781        let should_skip_filesize =
1782            if self.max_filesize.is_some() && !dent.is_dir() {
1783                skip_filesize(
1784                    self.max_filesize.unwrap(),
1785                    dent.path(),
1786                    &dent.metadata().ok(),
1787                )
1788            } else {
1789                false
1790            };
1791        let should_skip_filtered =
1792            if let Some(Filter(predicate)) = &self.filter {
1793                !predicate(&dent)
1794            } else {
1795                false
1796            };
1797        if !should_skip_filesize && !should_skip_filtered {
1798            self.send(Work { dent, ignore: ig.clone(), root_device });
1799        }
1800        WalkState::Continue
1801    }
1802
1803    /// Returns the next directory to descend into.
1804    ///
1805    /// If all work has been exhausted, then this returns None. The worker
1806    /// should then subsequently quit.
1807    fn get_work(&mut self) -> Option<Work> {
1808        let mut value = self.recv();
1809        loop {
1810            // Simulate a priority channel: If quit_now flag is set, we can
1811            // receive only quit messages.
1812            if self.is_quit_now() {
1813                value = Some(Message::Quit)
1814            }
1815            match value {
1816                Some(Message::Work(work)) => {
1817                    return Some(work);
1818                }
1819                Some(Message::Quit) => {
1820                    // Repeat quit message to wake up sleeping threads, if
1821                    // any. The domino effect will ensure that every thread
1822                    // will quit.
1823                    self.send_quit();
1824                    return None;
1825                }
1826                None => {
1827                    if self.deactivate_worker() == 0 {
1828                        // If deactivate_worker() returns 0, every worker thread
1829                        // is currently within the critical section between the
1830                        // acquire in deactivate_worker() and the release in
1831                        // activate_worker() below.  For this to happen, every
1832                        // worker's local deque must be simultaneously empty,
1833                        // meaning there is no more work left at all.
1834                        self.send_quit();
1835                        return None;
1836                    }
1837                    // Wait for next `Work` or `Quit` message.
1838                    loop {
1839                        if let Some(v) = self.recv() {
1840                            self.activate_worker();
1841                            value = Some(v);
1842                            break;
1843                        }
1844                        // Our stack isn't blocking. Instead of burning the
1845                        // CPU waiting, we let the thread sleep for a bit. In
1846                        // general, this tends to only occur once the search is
1847                        // approaching termination.
1848                        let dur = std::time::Duration::from_millis(1);
1849                        std::thread::sleep(dur);
1850                    }
1851                }
1852            }
1853        }
1854    }
1855
1856    /// Indicates that all workers should quit immediately.
1857    fn quit_now(&self) {
1858        self.quit_now.store(true, AtomicOrdering::SeqCst);
1859    }
1860
1861    /// Returns true if this worker should quit immediately.
1862    fn is_quit_now(&self) -> bool {
1863        self.quit_now.load(AtomicOrdering::SeqCst)
1864    }
1865
1866    /// Send work.
1867    fn send(&self, work: Work) {
1868        self.stack.push(Message::Work(work));
1869    }
1870
1871    /// Send a quit message.
1872    fn send_quit(&self) {
1873        self.stack.push(Message::Quit);
1874    }
1875
1876    /// Receive work.
1877    fn recv(&self) -> Option<Message> {
1878        self.stack.pop()
1879    }
1880
1881    /// Deactivates a worker and returns the number of currently active workers.
1882    fn deactivate_worker(&self) -> usize {
1883        self.active_workers.fetch_sub(1, AtomicOrdering::Acquire) - 1
1884    }
1885
1886    /// Reactivates a worker.
1887    fn activate_worker(&self) {
1888        self.active_workers.fetch_add(1, AtomicOrdering::Release);
1889    }
1890}
1891
1892fn check_symlink_loop(
1893    ig_parent: &Ignore,
1894    child_path: &Path,
1895    child_depth: usize,
1896) -> Result<(), Error> {
1897    let hchild = Handle::from_path(child_path).map_err(|err| {
1898        Error::from(err).with_path(child_path).with_depth(child_depth)
1899    })?;
1900    for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) {
1901        let h = Handle::from_path(ig.path()).map_err(|err| {
1902            Error::from(err).with_path(child_path).with_depth(child_depth)
1903        })?;
1904        if hchild == h {
1905            return Err(Error::Loop {
1906                ancestor: ig.path().to_path_buf(),
1907                child: child_path.to_path_buf(),
1908            }
1909            .with_depth(child_depth));
1910        }
1911    }
1912    Ok(())
1913}
1914
1915// Before calling this function, make sure that you ensure that is really
1916// necessary as the arguments imply a file stat.
1917fn skip_filesize(
1918    max_filesize: u64,
1919    path: &Path,
1920    ent: &Option<Metadata>,
1921) -> bool {
1922    let filesize = match *ent {
1923        Some(ref md) => Some(md.len()),
1924        None => None,
1925    };
1926
1927    if let Some(fs) = filesize {
1928        if fs > max_filesize {
1929            log::debug!("ignoring {}: {} bytes", path.display(), fs);
1930            true
1931        } else {
1932            false
1933        }
1934    } else {
1935        false
1936    }
1937}
1938
1939fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
1940    let m = ig.matched_dir_entry(dent);
1941    if m.is_ignore() {
1942        log::debug!("ignoring {}: {:?}", dent.path().display(), m);
1943        true
1944    } else if m.is_whitelist() {
1945        log::debug!("whitelisting {}: {:?}", dent.path().display(), m);
1946        false
1947    } else {
1948        false
1949    }
1950}
1951
1952/// Returns a handle to stdout for filtering search.
1953///
1954/// A handle is returned if and only if stdout is being redirected to a file.
1955/// The handle returned corresponds to that file.
1956///
1957/// This can be used to ensure that we do not attempt to search a file that we
1958/// may also be writing to.
1959fn stdout_handle() -> Option<Handle> {
1960    let h = match Handle::stdout() {
1961        Err(_) => return None,
1962        Ok(h) => h,
1963    };
1964    let md = match h.as_file().metadata() {
1965        Err(_) => return None,
1966        Ok(md) => md,
1967    };
1968    if !md.is_file() {
1969        return None;
1970    }
1971    Some(h)
1972}
1973
1974/// Returns true if and only if the given directory entry is believed to be
1975/// equivalent to the given handle. If there was a problem querying the path
1976/// for information to determine equality, then that error is returned.
1977fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
1978    #[cfg(unix)]
1979    fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
1980        dent.ino() != Some(handle.ino())
1981    }
1982
1983    #[cfg(not(unix))]
1984    fn never_equal(_: &DirEntry, _: &Handle) -> bool {
1985        false
1986    }
1987
1988    // If we know for sure that these two things aren't equal, then avoid
1989    // the costly extra stat call to determine equality.
1990    if dent.is_stdin() || never_equal(dent, handle) {
1991        return Ok(false);
1992    }
1993    Handle::from_path(dent.path())
1994        .map(|h| &h == handle)
1995        .map_err(|err| Error::Io(err).with_path(dent.path()))
1996}
1997
1998/// Returns true if the given walkdir entry corresponds to a directory.
1999///
2000/// This is normally just `dent.file_type().is_dir()`, but when we aren't
2001/// following symlinks, the root directory entry may be a symlink to a
2002/// directory that we *do* follow---by virtue of it being specified by the user
2003/// explicitly. In that case, we need to follow the symlink and query whether
2004/// it's a directory or not. But we only do this for root entries to avoid an
2005/// additional stat check in most cases.
2006fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool {
2007    if dent.file_type().is_dir() {
2008        return true;
2009    }
2010    if !dent.file_type().is_symlink() || dent.depth() > 0 {
2011        return false;
2012    }
2013    dent.path().metadata().ok().map_or(false, |md| md.file_type().is_dir())
2014}
2015
2016/// Returns true if and only if the given path is on the same device as the
2017/// given root device.
2018fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
2019    let dent_device =
2020        device_num(path).map_err(|err| Error::Io(err).with_path(path))?;
2021    Ok(root_device == dent_device)
2022}
2023
2024#[cfg(unix)]
2025fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
2026    use std::os::unix::fs::MetadataExt;
2027
2028    path.as_ref().metadata().map(|md| md.dev())
2029}
2030
2031#[cfg(windows)]
2032fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
2033    use winapi_util::{Handle, file};
2034
2035    let h = Handle::from_path_any(path)?;
2036    file::information(h).map(|info| info.volume_serial_number())
2037}
2038
2039#[cfg(not(any(unix, windows)))]
2040fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
2041    Err(io::Error::new(
2042        io::ErrorKind::Other,
2043        "walkdir: same_file_system option not supported on this platform",
2044    ))
2045}
2046
2047#[cfg(test)]
2048mod tests {
2049    use std::ffi::OsStr;
2050    use std::fs::{self, File};
2051    use std::io::Write;
2052    use std::path::Path;
2053    use std::sync::{Arc, Mutex};
2054
2055    use super::{DirEntry, WalkBuilder, WalkState};
2056    use crate::tests::TempDir;
2057
2058    fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
2059        let mut file = File::create(path).unwrap();
2060        file.write_all(contents.as_bytes()).unwrap();
2061    }
2062
2063    fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
2064        let file = File::create(path).unwrap();
2065        file.set_len(size).unwrap();
2066    }
2067
2068    #[cfg(unix)]
2069    fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
2070        use std::os::unix::fs::symlink;
2071        symlink(src, dst).unwrap();
2072    }
2073
2074    fn mkdirp<P: AsRef<Path>>(path: P) {
2075        fs::create_dir_all(path).unwrap();
2076    }
2077
2078    fn normal_path(unix: &str) -> String {
2079        if cfg!(windows) { unix.replace("\\", "/") } else { unix.to_string() }
2080    }
2081
2082    fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
2083        let mut paths = vec![];
2084        for result in builder.build() {
2085            let dent = match result {
2086                Err(_) => continue,
2087                Ok(dent) => dent,
2088            };
2089            let path = dent.path().strip_prefix(prefix).unwrap();
2090            if path.as_os_str().is_empty() {
2091                continue;
2092            }
2093            paths.push(normal_path(path.to_str().unwrap()));
2094        }
2095        paths.sort();
2096        paths
2097    }
2098
2099    fn walk_collect_parallel(
2100        prefix: &Path,
2101        builder: &WalkBuilder,
2102    ) -> Vec<String> {
2103        let mut paths = vec![];
2104        for dent in walk_collect_entries_parallel(builder) {
2105            let path = dent.path().strip_prefix(prefix).unwrap();
2106            if path.as_os_str().is_empty() {
2107                continue;
2108            }
2109            paths.push(normal_path(path.to_str().unwrap()));
2110        }
2111        paths.sort();
2112        paths
2113    }
2114
2115    fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> {
2116        let dents = Arc::new(Mutex::new(vec![]));
2117        builder.build_parallel().run(|| {
2118            let dents = dents.clone();
2119            Box::new(move |result| {
2120                if let Ok(dent) = result {
2121                    dents.lock().unwrap().push(dent);
2122                }
2123                WalkState::Continue
2124            })
2125        });
2126
2127        let dents = dents.lock().unwrap();
2128        dents.to_vec()
2129    }
2130
2131    fn mkpaths(paths: &[&str]) -> Vec<String> {
2132        let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
2133        paths.sort();
2134        paths
2135    }
2136
2137    fn tmpdir() -> TempDir {
2138        TempDir::new().unwrap()
2139    }
2140
2141    fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
2142        let got = walk_collect(prefix, builder);
2143        assert_eq!(got, mkpaths(expected), "single threaded");
2144        let got = walk_collect_parallel(prefix, builder);
2145        assert_eq!(got, mkpaths(expected), "parallel");
2146    }
2147
2148    #[test]
2149    fn no_ignores() {
2150        let td = tmpdir();
2151        mkdirp(td.path().join("a/b/c"));
2152        mkdirp(td.path().join("x/y"));
2153        wfile(td.path().join("a/b/foo"), "");
2154        wfile(td.path().join("x/y/foo"), "");
2155
2156        assert_paths(
2157            td.path(),
2158            &WalkBuilder::new(td.path()),
2159            &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2160        );
2161    }
2162
2163    #[test]
2164    fn custom_ignore() {
2165        let td = tmpdir();
2166        let custom_ignore = ".customignore";
2167        mkdirp(td.path().join("a"));
2168        wfile(td.path().join(custom_ignore), "foo");
2169        wfile(td.path().join("foo"), "");
2170        wfile(td.path().join("a/foo"), "");
2171        wfile(td.path().join("bar"), "");
2172        wfile(td.path().join("a/bar"), "");
2173
2174        let mut builder = WalkBuilder::new(td.path());
2175        builder.add_custom_ignore_filename(&custom_ignore);
2176        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2177    }
2178
2179    #[test]
2180    fn custom_ignore_exclusive_use() {
2181        let td = tmpdir();
2182        let custom_ignore = ".customignore";
2183        mkdirp(td.path().join("a"));
2184        wfile(td.path().join(custom_ignore), "foo");
2185        wfile(td.path().join("foo"), "");
2186        wfile(td.path().join("a/foo"), "");
2187        wfile(td.path().join("bar"), "");
2188        wfile(td.path().join("a/bar"), "");
2189
2190        let mut builder = WalkBuilder::new(td.path());
2191        builder.ignore(false);
2192        builder.git_ignore(false);
2193        builder.git_global(false);
2194        builder.git_exclude(false);
2195        builder.add_custom_ignore_filename(&custom_ignore);
2196        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2197    }
2198
2199    #[test]
2200    fn gitignore() {
2201        let td = tmpdir();
2202        mkdirp(td.path().join(".git"));
2203        mkdirp(td.path().join("a"));
2204        wfile(td.path().join(".gitignore"), "foo");
2205        wfile(td.path().join("foo"), "");
2206        wfile(td.path().join("a/foo"), "");
2207        wfile(td.path().join("bar"), "");
2208        wfile(td.path().join("a/bar"), "");
2209
2210        assert_paths(
2211            td.path(),
2212            &WalkBuilder::new(td.path()),
2213            &["bar", "a", "a/bar"],
2214        );
2215    }
2216
2217    #[test]
2218    fn explicit_ignore() {
2219        let td = tmpdir();
2220        let igpath = td.path().join(".not-an-ignore");
2221        mkdirp(td.path().join("a"));
2222        wfile(&igpath, "foo");
2223        wfile(td.path().join("foo"), "");
2224        wfile(td.path().join("a/foo"), "");
2225        wfile(td.path().join("bar"), "");
2226        wfile(td.path().join("a/bar"), "");
2227
2228        let mut builder = WalkBuilder::new(td.path());
2229        assert!(builder.add_ignore(&igpath).is_none());
2230        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2231    }
2232
2233    #[test]
2234    fn explicit_ignore_exclusive_use() {
2235        let td = tmpdir();
2236        let igpath = td.path().join(".not-an-ignore");
2237        mkdirp(td.path().join("a"));
2238        wfile(&igpath, "foo");
2239        wfile(td.path().join("foo"), "");
2240        wfile(td.path().join("a/foo"), "");
2241        wfile(td.path().join("bar"), "");
2242        wfile(td.path().join("a/bar"), "");
2243
2244        let mut builder = WalkBuilder::new(td.path());
2245        builder.standard_filters(false);
2246        assert!(builder.add_ignore(&igpath).is_none());
2247        assert_paths(
2248            td.path(),
2249            &builder,
2250            &[".not-an-ignore", "bar", "a", "a/bar"],
2251        );
2252    }
2253
2254    #[test]
2255    fn gitignore_parent() {
2256        let td = tmpdir();
2257        mkdirp(td.path().join(".git"));
2258        mkdirp(td.path().join("a"));
2259        wfile(td.path().join(".gitignore"), "foo");
2260        wfile(td.path().join("a/foo"), "");
2261        wfile(td.path().join("a/bar"), "");
2262
2263        let root = td.path().join("a");
2264        assert_paths(&root, &WalkBuilder::new(&root), &["bar"]);
2265    }
2266
2267    #[test]
2268    fn max_depth() {
2269        let td = tmpdir();
2270        mkdirp(td.path().join("a/b/c"));
2271        wfile(td.path().join("foo"), "");
2272        wfile(td.path().join("a/foo"), "");
2273        wfile(td.path().join("a/b/foo"), "");
2274        wfile(td.path().join("a/b/c/foo"), "");
2275
2276        let mut builder = WalkBuilder::new(td.path());
2277        assert_paths(
2278            td.path(),
2279            &builder,
2280            &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2281        );
2282        assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
2283        assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
2284        assert_paths(
2285            td.path(),
2286            builder.max_depth(Some(2)),
2287            &["a", "a/b", "foo", "a/foo"],
2288        );
2289    }
2290
2291    #[test]
2292    fn min_depth() {
2293        let td = tmpdir();
2294        mkdirp(td.path().join("a/b/c"));
2295        wfile(td.path().join("foo"), "");
2296        wfile(td.path().join("a/foo"), "");
2297        wfile(td.path().join("a/b/foo"), "");
2298        wfile(td.path().join("a/b/c/foo"), "");
2299
2300        let builder = WalkBuilder::new(td.path());
2301        assert_paths(
2302            td.path(),
2303            &builder,
2304            &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2305        );
2306        let mut builder = WalkBuilder::new(td.path());
2307        assert_paths(
2308            td.path(),
2309            &builder.min_depth(Some(0)),
2310            &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2311        );
2312        assert_paths(
2313            td.path(),
2314            &builder.min_depth(Some(1)),
2315            &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2316        );
2317        assert_paths(
2318            td.path(),
2319            builder.min_depth(Some(2)),
2320            &["a/b", "a/b/c", "a/b/c/foo", "a/b/foo", "a/foo"],
2321        );
2322        assert_paths(
2323            td.path(),
2324            builder.min_depth(Some(3)),
2325            &["a/b/c", "a/b/c/foo", "a/b/foo"],
2326        );
2327        assert_paths(td.path(), builder.min_depth(Some(10)), &[]);
2328
2329        assert_paths(
2330            td.path(),
2331            builder.min_depth(Some(2)).max_depth(Some(1)),
2332            &["a/b", "a/foo"],
2333        );
2334    }
2335
2336    #[test]
2337    fn max_filesize() {
2338        let td = tmpdir();
2339        mkdirp(td.path().join("a/b"));
2340        wfile_size(td.path().join("foo"), 0);
2341        wfile_size(td.path().join("bar"), 400);
2342        wfile_size(td.path().join("baz"), 600);
2343        wfile_size(td.path().join("a/foo"), 600);
2344        wfile_size(td.path().join("a/bar"), 500);
2345        wfile_size(td.path().join("a/baz"), 200);
2346
2347        let mut builder = WalkBuilder::new(td.path());
2348        assert_paths(
2349            td.path(),
2350            &builder,
2351            &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2352        );
2353        assert_paths(
2354            td.path(),
2355            builder.max_filesize(Some(0)),
2356            &["a", "a/b", "foo"],
2357        );
2358        assert_paths(
2359            td.path(),
2360            builder.max_filesize(Some(500)),
2361            &["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
2362        );
2363        assert_paths(
2364            td.path(),
2365            builder.max_filesize(Some(50000)),
2366            &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2367        );
2368    }
2369
2370    #[cfg(unix)] // because symlinks on windows are weird
2371    #[test]
2372    fn symlinks() {
2373        let td = tmpdir();
2374        mkdirp(td.path().join("a/b"));
2375        symlink(td.path().join("a/b"), td.path().join("z"));
2376        wfile(td.path().join("a/b/foo"), "");
2377
2378        let mut builder = WalkBuilder::new(td.path());
2379        assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
2380        assert_paths(
2381            td.path(),
2382            &builder.follow_links(true),
2383            &["a", "a/b", "a/b/foo", "z", "z/foo"],
2384        );
2385    }
2386
2387    #[cfg(unix)] // because symlinks on windows are weird
2388    #[test]
2389    fn first_path_not_symlink() {
2390        let td = tmpdir();
2391        mkdirp(td.path().join("foo"));
2392
2393        let dents = WalkBuilder::new(td.path().join("foo"))
2394            .build()
2395            .into_iter()
2396            .collect::<Result<Vec<_>, _>>()
2397            .unwrap();
2398        assert_eq!(1, dents.len());
2399        assert!(!dents[0].path_is_symlink());
2400
2401        let dents = walk_collect_entries_parallel(&WalkBuilder::new(
2402            td.path().join("foo"),
2403        ));
2404        assert_eq!(1, dents.len());
2405        assert!(!dents[0].path_is_symlink());
2406    }
2407
2408    #[cfg(unix)] // because symlinks on windows are weird
2409    #[test]
2410    fn symlink_loop() {
2411        let td = tmpdir();
2412        mkdirp(td.path().join("a/b"));
2413        symlink(td.path().join("a"), td.path().join("a/b/c"));
2414
2415        let mut builder = WalkBuilder::new(td.path());
2416        assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
2417        assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
2418    }
2419
2420    // It's a little tricky to test the 'same_file_system' option since
2421    // we need an environment with more than one file system. We adopt a
2422    // heuristic where /sys is typically a distinct volume on Linux and roll
2423    // with that.
2424    #[test]
2425    #[cfg(target_os = "linux")]
2426    fn same_file_system() {
2427        use super::device_num;
2428
2429        // If for some reason /sys doesn't exist or isn't a directory, just
2430        // skip this test.
2431        if !Path::new("/sys").is_dir() {
2432            return;
2433        }
2434
2435        // If our test directory actually isn't a different volume from /sys,
2436        // then this test is meaningless and we shouldn't run it.
2437        let td = tmpdir();
2438        if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
2439            return;
2440        }
2441
2442        mkdirp(td.path().join("same_file"));
2443        symlink("/sys", td.path().join("same_file").join("alink"));
2444
2445        // Create a symlink to sys and enable following symlinks. If the
2446        // same_file_system option doesn't work, then this probably will hit a
2447        // permission error. Otherwise, it should just skip over the symlink
2448        // completely.
2449        let mut builder = WalkBuilder::new(td.path());
2450        builder.follow_links(true).same_file_system(true);
2451        assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
2452    }
2453
2454    #[cfg(target_os = "linux")]
2455    #[test]
2456    fn no_read_permissions() {
2457        let dir_path = Path::new("/root");
2458
2459        // There's no /etc/sudoers.d, skip the test.
2460        if !dir_path.is_dir() {
2461            return;
2462        }
2463        // We're the root, so the test won't check what we want it to.
2464        if fs::read_dir(&dir_path).is_ok() {
2465            return;
2466        }
2467
2468        // Check that we can't descend but get an entry for the parent dir.
2469        let builder = WalkBuilder::new(&dir_path);
2470        assert_paths(dir_path.parent().unwrap(), &builder, &["root"]);
2471    }
2472
2473    #[test]
2474    fn filter() {
2475        let td = tmpdir();
2476        mkdirp(td.path().join("a/b/c"));
2477        mkdirp(td.path().join("x/y"));
2478        wfile(td.path().join("a/b/foo"), "");
2479        wfile(td.path().join("x/y/foo"), "");
2480
2481        assert_paths(
2482            td.path(),
2483            &WalkBuilder::new(td.path()),
2484            &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2485        );
2486
2487        assert_paths(
2488            td.path(),
2489            &WalkBuilder::new(td.path())
2490                .filter_entry(|entry| entry.file_name() != OsStr::new("a")),
2491            &["x", "x/y", "x/y/foo"],
2492        );
2493    }
2494}