ignore/
walk.rs

1use std::{
2    cmp::Ordering,
3    ffi::OsStr,
4    fs::{self, FileType, Metadata},
5    io,
6    path::{Path, PathBuf},
7    sync::atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering},
8    sync::Arc,
9};
10
11use {
12    crossbeam_deque::{Stealer, Worker as Deque},
13    same_file::Handle,
14    walkdir::WalkDir,
15};
16
17use crate::{
18    dir::{Ignore, IgnoreBuilder},
19    gitignore::GitignoreBuilder,
20    overrides::Override,
21    types::Types,
22    Error, PartialErrorBuilder,
23};
24
25/// A directory entry with a possible error attached.
26///
27/// The error typically refers to a problem parsing ignore files in a
28/// particular directory.
29#[derive(Clone, Debug)]
30pub struct DirEntry {
31    dent: DirEntryInner,
32    err: Option<Error>,
33}
34
35impl DirEntry {
36    /// The full path that this entry represents.
37    pub fn path(&self) -> &Path {
38        self.dent.path()
39    }
40
41    /// The full path that this entry represents.
42    /// Analogous to [`DirEntry::path`], but moves ownership of the path.
43    pub fn into_path(self) -> PathBuf {
44        self.dent.into_path()
45    }
46
47    /// Whether this entry corresponds to a symbolic link or not.
48    pub fn path_is_symlink(&self) -> bool {
49        self.dent.path_is_symlink()
50    }
51
52    /// Returns true if and only if this entry corresponds to stdin.
53    ///
54    /// i.e., The entry has depth 0 and its file name is `-`.
55    pub fn is_stdin(&self) -> bool {
56        self.dent.is_stdin()
57    }
58
59    /// Return the metadata for the file that this entry points to.
60    pub fn metadata(&self) -> Result<Metadata, Error> {
61        self.dent.metadata()
62    }
63
64    /// Return the file type for the file that this entry points to.
65    ///
66    /// This entry doesn't have a file type if it corresponds to stdin.
67    pub fn file_type(&self) -> Option<FileType> {
68        self.dent.file_type()
69    }
70
71    /// Return the file name of this entry.
72    ///
73    /// If this entry has no file name (e.g., `/`), then the full path is
74    /// returned.
75    pub fn file_name(&self) -> &OsStr {
76        self.dent.file_name()
77    }
78
79    /// Returns the depth at which this entry was created relative to the root.
80    pub fn depth(&self) -> usize {
81        self.dent.depth()
82    }
83
84    /// Returns the underlying inode number if one exists.
85    ///
86    /// If this entry doesn't have an inode number, then `None` is returned.
87    #[cfg(unix)]
88    pub fn ino(&self) -> Option<u64> {
89        self.dent.ino()
90    }
91
92    /// Returns an error, if one exists, associated with processing this entry.
93    ///
94    /// An example of an error is one that occurred while parsing an ignore
95    /// file. Errors related to traversing a directory tree itself are reported
96    /// as part of yielding the directory entry, and not with this method.
97    pub fn error(&self) -> Option<&Error> {
98        self.err.as_ref()
99    }
100
101    /// Returns true if and only if this entry points to a directory.
102    pub(crate) fn is_dir(&self) -> bool {
103        self.dent.is_dir()
104    }
105
106    fn new_stdin() -> DirEntry {
107        DirEntry { dent: DirEntryInner::Stdin, err: None }
108    }
109
110    fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
111        DirEntry { dent: DirEntryInner::Walkdir(dent), err }
112    }
113
114    fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
115        DirEntry { dent: DirEntryInner::Raw(dent), err }
116    }
117}
118
119/// DirEntryInner is the implementation of DirEntry.
120///
121/// It specifically represents three distinct sources of directory entries:
122///
123/// 1. From the walkdir crate.
124/// 2. Special entries that represent things like stdin.
125/// 3. From a path.
126///
127/// Specifically, (3) has to essentially re-create the DirEntry implementation
128/// from WalkDir.
129#[derive(Clone, Debug)]
130enum DirEntryInner {
131    Stdin,
132    Walkdir(walkdir::DirEntry),
133    Raw(DirEntryRaw),
134}
135
136impl DirEntryInner {
137    fn path(&self) -> &Path {
138        use self::DirEntryInner::*;
139        match *self {
140            Stdin => Path::new("<stdin>"),
141            Walkdir(ref x) => x.path(),
142            Raw(ref x) => x.path(),
143        }
144    }
145
146    fn into_path(self) -> PathBuf {
147        use self::DirEntryInner::*;
148        match self {
149            Stdin => PathBuf::from("<stdin>"),
150            Walkdir(x) => x.into_path(),
151            Raw(x) => x.into_path(),
152        }
153    }
154
155    fn path_is_symlink(&self) -> bool {
156        use self::DirEntryInner::*;
157        match *self {
158            Stdin => false,
159            Walkdir(ref x) => x.path_is_symlink(),
160            Raw(ref x) => x.path_is_symlink(),
161        }
162    }
163
164    fn is_stdin(&self) -> bool {
165        match *self {
166            DirEntryInner::Stdin => true,
167            _ => false,
168        }
169    }
170
171    fn metadata(&self) -> Result<Metadata, Error> {
172        use self::DirEntryInner::*;
173        match *self {
174            Stdin => {
175                let err = Error::Io(io::Error::new(
176                    io::ErrorKind::Other,
177                    "<stdin> has no metadata",
178                ));
179                Err(err.with_path("<stdin>"))
180            }
181            Walkdir(ref x) => x.metadata().map_err(|err| {
182                Error::Io(io::Error::from(err)).with_path(x.path())
183            }),
184            Raw(ref x) => x.metadata(),
185        }
186    }
187
188    fn file_type(&self) -> Option<FileType> {
189        use self::DirEntryInner::*;
190        match *self {
191            Stdin => None,
192            Walkdir(ref x) => Some(x.file_type()),
193            Raw(ref x) => Some(x.file_type()),
194        }
195    }
196
197    fn file_name(&self) -> &OsStr {
198        use self::DirEntryInner::*;
199        match *self {
200            Stdin => OsStr::new("<stdin>"),
201            Walkdir(ref x) => x.file_name(),
202            Raw(ref x) => x.file_name(),
203        }
204    }
205
206    fn depth(&self) -> usize {
207        use self::DirEntryInner::*;
208        match *self {
209            Stdin => 0,
210            Walkdir(ref x) => x.depth(),
211            Raw(ref x) => x.depth(),
212        }
213    }
214
215    #[cfg(unix)]
216    fn ino(&self) -> Option<u64> {
217        use self::DirEntryInner::*;
218        use walkdir::DirEntryExt;
219        match *self {
220            Stdin => None,
221            Walkdir(ref x) => Some(x.ino()),
222            Raw(ref x) => Some(x.ino()),
223        }
224    }
225
226    /// Returns true if and only if this entry points to a directory.
227    fn is_dir(&self) -> bool {
228        self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
229    }
230}
231
232/// DirEntryRaw is essentially copied from the walkdir crate so that we can
233/// build `DirEntry`s from whole cloth in the parallel iterator.
234#[derive(Clone)]
235struct DirEntryRaw {
236    /// The path as reported by the `fs::ReadDir` iterator (even if it's a
237    /// symbolic link).
238    path: PathBuf,
239    /// The file type. Necessary for recursive iteration, so store it.
240    ty: FileType,
241    /// Is set when this entry was created from a symbolic link and the user
242    /// expects the iterator to follow symbolic links.
243    follow_link: bool,
244    /// The depth at which this entry was generated relative to the root.
245    depth: usize,
246    /// The underlying inode number (Unix only).
247    #[cfg(unix)]
248    ino: u64,
249    /// The underlying metadata (Windows only). We store this on Windows
250    /// because this comes for free while reading a directory.
251    #[cfg(windows)]
252    metadata: fs::Metadata,
253}
254
255impl std::fmt::Debug for DirEntryRaw {
256    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
257        // Leaving out FileType because it doesn't have a debug impl
258        // in Rust 1.9. We could add it if we really wanted to by manually
259        // querying each possibly file type. Meh. ---AG
260        f.debug_struct("DirEntryRaw")
261            .field("path", &self.path)
262            .field("follow_link", &self.follow_link)
263            .field("depth", &self.depth)
264            .finish()
265    }
266}
267
268impl DirEntryRaw {
269    fn path(&self) -> &Path {
270        &self.path
271    }
272
273    fn into_path(self) -> PathBuf {
274        self.path
275    }
276
277    fn path_is_symlink(&self) -> bool {
278        self.ty.is_symlink() || self.follow_link
279    }
280
281    fn metadata(&self) -> Result<Metadata, Error> {
282        self.metadata_internal()
283    }
284
285    #[cfg(windows)]
286    fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
287        if self.follow_link {
288            fs::metadata(&self.path)
289        } else {
290            Ok(self.metadata.clone())
291        }
292        .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
293    }
294
295    #[cfg(not(windows))]
296    fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
297        if self.follow_link {
298            fs::metadata(&self.path)
299        } else {
300            fs::symlink_metadata(&self.path)
301        }
302        .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
303    }
304
305    fn file_type(&self) -> FileType {
306        self.ty
307    }
308
309    fn file_name(&self) -> &OsStr {
310        self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
311    }
312
313    fn depth(&self) -> usize {
314        self.depth
315    }
316
317    #[cfg(unix)]
318    fn ino(&self) -> u64 {
319        self.ino
320    }
321
322    fn from_entry(
323        depth: usize,
324        ent: &fs::DirEntry,
325    ) -> Result<DirEntryRaw, Error> {
326        let ty = ent.file_type().map_err(|err| {
327            let err = Error::Io(io::Error::from(err)).with_path(ent.path());
328            Error::WithDepth { depth, err: Box::new(err) }
329        })?;
330        DirEntryRaw::from_entry_os(depth, ent, ty)
331    }
332
333    #[cfg(windows)]
334    fn from_entry_os(
335        depth: usize,
336        ent: &fs::DirEntry,
337        ty: fs::FileType,
338    ) -> Result<DirEntryRaw, Error> {
339        let md = ent.metadata().map_err(|err| {
340            let err = Error::Io(io::Error::from(err)).with_path(ent.path());
341            Error::WithDepth { depth, err: Box::new(err) }
342        })?;
343        Ok(DirEntryRaw {
344            path: ent.path(),
345            ty,
346            follow_link: false,
347            depth,
348            metadata: md,
349        })
350    }
351
352    #[cfg(unix)]
353    fn from_entry_os(
354        depth: usize,
355        ent: &fs::DirEntry,
356        ty: fs::FileType,
357    ) -> Result<DirEntryRaw, Error> {
358        use std::os::unix::fs::DirEntryExt;
359
360        Ok(DirEntryRaw {
361            path: ent.path(),
362            ty,
363            follow_link: false,
364            depth,
365            ino: ent.ino(),
366        })
367    }
368
369    // Placeholder implementation to allow compiling on non-standard platforms
370    // (e.g. wasm32).
371    #[cfg(not(any(windows, unix)))]
372    fn from_entry_os(
373        depth: usize,
374        ent: &fs::DirEntry,
375        ty: fs::FileType,
376    ) -> Result<DirEntryRaw, Error> {
377        Err(Error::Io(io::Error::new(
378            io::ErrorKind::Other,
379            "unsupported platform",
380        )))
381    }
382
383    #[cfg(windows)]
384    fn from_path(
385        depth: usize,
386        pb: PathBuf,
387        link: bool,
388    ) -> Result<DirEntryRaw, Error> {
389        let md =
390            fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
391        Ok(DirEntryRaw {
392            path: pb,
393            ty: md.file_type(),
394            follow_link: link,
395            depth,
396            metadata: md,
397        })
398    }
399
400    #[cfg(unix)]
401    fn from_path(
402        depth: usize,
403        pb: PathBuf,
404        link: bool,
405    ) -> Result<DirEntryRaw, Error> {
406        use std::os::unix::fs::MetadataExt;
407
408        let md =
409            fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
410        Ok(DirEntryRaw {
411            path: pb,
412            ty: md.file_type(),
413            follow_link: link,
414            depth,
415            ino: md.ino(),
416        })
417    }
418
419    // Placeholder implementation to allow compiling on non-standard platforms
420    // (e.g. wasm32).
421    #[cfg(not(any(windows, unix)))]
422    fn from_path(
423        depth: usize,
424        pb: PathBuf,
425        link: bool,
426    ) -> Result<DirEntryRaw, Error> {
427        Err(Error::Io(io::Error::new(
428            io::ErrorKind::Other,
429            "unsupported platform",
430        )))
431    }
432}
433
434/// WalkBuilder builds a recursive directory iterator.
435///
436/// The builder supports a large number of configurable options. This includes
437/// specific glob overrides, file type matching, toggling whether hidden
438/// files are ignored or not, and of course, support for respecting gitignore
439/// files.
440///
441/// By default, all ignore files found are respected. This includes `.ignore`,
442/// `.gitignore`, `.git/info/exclude` and even your global gitignore
443/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
444///
445/// Some standard recursive directory options are also supported, such as
446/// limiting the recursive depth or whether to follow symbolic links (disabled
447/// by default).
448///
449/// # Ignore rules
450///
451/// There are many rules that influence whether a particular file or directory
452/// is skipped by this iterator. Those rules are documented here. Note that
453/// the rules assume a default configuration.
454///
455/// * First, glob overrides are checked. If a path matches a glob override,
456/// then matching stops. The path is then only skipped if the glob that matched
457/// the path is an ignore glob. (An override glob is a whitelist glob unless it
458/// starts with a `!`, in which case it is an ignore glob.)
459/// * Second, ignore files are checked. Ignore files currently only come from
460/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
461/// global gitignore file), plain `.ignore` files, which have the same format
462/// as gitignore files, or explicitly added ignore files. The precedence order
463/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
464/// finally explicitly added ignore files. Note that precedence between
465/// different types of ignore files is not impacted by the directory hierarchy;
466/// any `.ignore` file overrides all `.gitignore` files. Within each precedence
467/// level, more nested ignore files have a higher precedence than less nested
468/// ignore files.
469/// * Third, if the previous step yields an ignore match, then all matching
470/// is stopped and the path is skipped. If it yields a whitelist match, then
471/// matching continues. A whitelist match can be overridden by a later matcher.
472/// * Fourth, unless the path is a directory, the file type matcher is run on
473/// the path. As above, if it yields an ignore match, then all matching is
474/// stopped and the path is skipped. If it yields a whitelist match, then
475/// matching continues.
476/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
477/// path is skipped.
478/// * Sixth, unless the path is a directory, the size of the file is compared
479/// against the max filesize limit. If it exceeds the limit, it is skipped.
480/// * Seventh, if the path has made it this far then it is yielded in the
481/// iterator.
482#[derive(Clone)]
483pub struct WalkBuilder {
484    paths: Vec<PathBuf>,
485    ig_builder: IgnoreBuilder,
486    max_depth: Option<usize>,
487    max_filesize: Option<u64>,
488    follow_links: bool,
489    same_file_system: bool,
490    sorter: Option<Sorter>,
491    threads: usize,
492    skip: Option<Arc<Handle>>,
493    filter: Option<Filter>,
494}
495
496#[derive(Clone)]
497enum Sorter {
498    ByName(Arc<dyn Fn(&OsStr, &OsStr) -> Ordering + Send + Sync + 'static>),
499    ByPath(Arc<dyn Fn(&Path, &Path) -> Ordering + Send + Sync + 'static>),
500}
501
502#[derive(Clone)]
503struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
504
505impl std::fmt::Debug for WalkBuilder {
506    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
507        f.debug_struct("WalkBuilder")
508            .field("paths", &self.paths)
509            .field("ig_builder", &self.ig_builder)
510            .field("max_depth", &self.max_depth)
511            .field("max_filesize", &self.max_filesize)
512            .field("follow_links", &self.follow_links)
513            .field("threads", &self.threads)
514            .field("skip", &self.skip)
515            .finish()
516    }
517}
518
519impl WalkBuilder {
520    /// Create a new builder for a recursive directory iterator for the
521    /// directory given.
522    ///
523    /// Note that if you want to traverse multiple different directories, it
524    /// is better to call `add` on this builder than to create multiple
525    /// `Walk` values.
526    pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
527        WalkBuilder {
528            paths: vec![path.as_ref().to_path_buf()],
529            ig_builder: IgnoreBuilder::new(),
530            max_depth: None,
531            max_filesize: None,
532            follow_links: false,
533            same_file_system: false,
534            sorter: None,
535            threads: 0,
536            skip: None,
537            filter: None,
538        }
539    }
540
541    /// Build a new `Walk` iterator.
542    pub fn build(&self) -> Walk {
543        let follow_links = self.follow_links;
544        let max_depth = self.max_depth;
545        let sorter = self.sorter.clone();
546        let its = self
547            .paths
548            .iter()
549            .map(move |p| {
550                if p == Path::new("-") {
551                    (p.to_path_buf(), None)
552                } else {
553                    let mut wd = WalkDir::new(p);
554                    wd = wd.follow_links(follow_links || p.is_file());
555                    wd = wd.same_file_system(self.same_file_system);
556                    if let Some(max_depth) = max_depth {
557                        wd = wd.max_depth(max_depth);
558                    }
559                    if let Some(ref sorter) = sorter {
560                        match sorter.clone() {
561                            Sorter::ByName(cmp) => {
562                                wd = wd.sort_by(move |a, b| {
563                                    cmp(a.file_name(), b.file_name())
564                                });
565                            }
566                            Sorter::ByPath(cmp) => {
567                                wd = wd.sort_by(move |a, b| {
568                                    cmp(a.path(), b.path())
569                                });
570                            }
571                        }
572                    }
573                    (p.to_path_buf(), Some(WalkEventIter::from(wd)))
574                }
575            })
576            .collect::<Vec<_>>()
577            .into_iter();
578        let ig_root = self.ig_builder.build();
579        Walk {
580            its,
581            it: None,
582            ig_root: ig_root.clone(),
583            ig: ig_root.clone(),
584            max_filesize: self.max_filesize,
585            skip: self.skip.clone(),
586            filter: self.filter.clone(),
587        }
588    }
589
590    /// Build a new `WalkParallel` iterator.
591    ///
592    /// Note that this *doesn't* return something that implements `Iterator`.
593    /// Instead, the returned value must be run with a closure. e.g.,
594    /// `builder.build_parallel().run(|| |path| { println!("{path:?}"); WalkState::Continue })`.
595    pub fn build_parallel(&self) -> WalkParallel {
596        WalkParallel {
597            paths: self.paths.clone().into_iter(),
598            ig_root: self.ig_builder.build(),
599            max_depth: self.max_depth,
600            max_filesize: self.max_filesize,
601            follow_links: self.follow_links,
602            same_file_system: self.same_file_system,
603            threads: self.threads,
604            skip: self.skip.clone(),
605            filter: self.filter.clone(),
606        }
607    }
608
609    /// Add a file path to the iterator.
610    ///
611    /// Each additional file path added is traversed recursively. This should
612    /// be preferred over building multiple `Walk` iterators since this
613    /// enables reusing resources across iteration.
614    pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
615        self.paths.push(path.as_ref().to_path_buf());
616        self
617    }
618
619    /// The maximum depth to recurse.
620    ///
621    /// The default, `None`, imposes no depth restriction.
622    pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
623        self.max_depth = depth;
624        self
625    }
626
627    /// Whether to follow symbolic links or not.
628    pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
629        self.follow_links = yes;
630        self
631    }
632
633    /// Whether to ignore files above the specified limit.
634    pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
635        self.max_filesize = filesize;
636        self
637    }
638
639    /// The number of threads to use for traversal.
640    ///
641    /// Note that this only has an effect when using `build_parallel`.
642    ///
643    /// The default setting is `0`, which chooses the number of threads
644    /// automatically using heuristics.
645    pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
646        self.threads = n;
647        self
648    }
649
650    /// Add a global ignore file to the matcher.
651    ///
652    /// This has lower precedence than all other sources of ignore rules.
653    ///
654    /// If there was a problem adding the ignore file, then an error is
655    /// returned. Note that the error may indicate *partial* failure. For
656    /// example, if an ignore file contains an invalid glob, all other globs
657    /// are still applied.
658    pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
659        let mut builder = GitignoreBuilder::new("");
660        let mut errs = PartialErrorBuilder::default();
661        errs.maybe_push(builder.add(path));
662        match builder.build() {
663            Ok(gi) => {
664                self.ig_builder.add_ignore(gi);
665            }
666            Err(err) => {
667                errs.push(err);
668            }
669        }
670        errs.into_error_option()
671    }
672
673    /// Add a custom ignore file name
674    ///
675    /// These ignore files have higher precedence than all other ignore files.
676    ///
677    /// When specifying multiple names, earlier names have lower precedence than
678    /// later names.
679    pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
680        &mut self,
681        file_name: S,
682    ) -> &mut WalkBuilder {
683        self.ig_builder.add_custom_ignore_filename(file_name);
684        self
685    }
686
687    /// Add an override matcher.
688    ///
689    /// By default, no override matcher is used.
690    ///
691    /// This overrides any previous setting.
692    pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
693        self.ig_builder.overrides(overrides);
694        self
695    }
696
697    /// Add a file type matcher.
698    ///
699    /// By default, no file type matcher is used.
700    ///
701    /// This overrides any previous setting.
702    pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
703        self.ig_builder.types(types);
704        self
705    }
706
707    /// Enables all the standard ignore filters.
708    ///
709    /// This toggles, as a group, all the filters that are enabled by default:
710    ///
711    /// - [hidden()](#method.hidden)
712    /// - [parents()](#method.parents)
713    /// - [ignore()](#method.ignore)
714    /// - [git_ignore()](#method.git_ignore)
715    /// - [git_global()](#method.git_global)
716    /// - [git_exclude()](#method.git_exclude)
717    ///
718    /// They may still be toggled individually after calling this function.
719    ///
720    /// This is (by definition) enabled by default.
721    pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
722        self.hidden(yes)
723            .parents(yes)
724            .ignore(yes)
725            .git_ignore(yes)
726            .git_global(yes)
727            .git_exclude(yes)
728    }
729
730    /// Enables ignoring hidden files.
731    ///
732    /// This is enabled by default.
733    pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
734        self.ig_builder.hidden(yes);
735        self
736    }
737
738    /// Enables reading ignore files from parent directories.
739    ///
740    /// If this is enabled, then .gitignore files in parent directories of each
741    /// file path given are respected. Otherwise, they are ignored.
742    ///
743    /// This is enabled by default.
744    pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
745        self.ig_builder.parents(yes);
746        self
747    }
748
749    /// Enables reading `.ignore` files.
750    ///
751    /// `.ignore` files have the same semantics as `gitignore` files and are
752    /// supported by search tools such as ripgrep and The Silver Searcher.
753    ///
754    /// This is enabled by default.
755    pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
756        self.ig_builder.ignore(yes);
757        self
758    }
759
760    /// Enables reading a global gitignore file, whose path is specified in
761    /// git's `core.excludesFile` config option.
762    ///
763    /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
764    /// does not exist or does not specify `core.excludesFile`, then
765    /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
766    /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
767    ///
768    /// This is enabled by default.
769    pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
770        self.ig_builder.git_global(yes);
771        self
772    }
773
774    /// Enables reading `.gitignore` files.
775    ///
776    /// `.gitignore` files have match semantics as described in the `gitignore`
777    /// man page.
778    ///
779    /// This is enabled by default.
780    pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
781        self.ig_builder.git_ignore(yes);
782        self
783    }
784
785    /// Enables reading `.git/info/exclude` files.
786    ///
787    /// `.git/info/exclude` files have match semantics as described in the
788    /// `gitignore` man page.
789    ///
790    /// This is enabled by default.
791    pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
792        self.ig_builder.git_exclude(yes);
793        self
794    }
795
796    /// Whether a git repository is required to apply git-related ignore
797    /// rules (global rules, .gitignore and local exclude rules).
798    ///
799    /// When disabled, git-related ignore rules are applied even when searching
800    /// outside a git repository.
801    pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
802        self.ig_builder.require_git(yes);
803        self
804    }
805
806    /// Process ignore files case insensitively
807    ///
808    /// This is disabled by default.
809    pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
810        self.ig_builder.ignore_case_insensitive(yes);
811        self
812    }
813
814    /// Set a function for sorting directory entries by their path.
815    ///
816    /// If a compare function is set, the resulting iterator will return all
817    /// paths in sorted order. The compare function will be called to compare
818    /// entries from the same directory.
819    ///
820    /// This is like `sort_by_file_name`, except the comparator accepts
821    /// a `&Path` instead of the base file name, which permits it to sort by
822    /// more criteria.
823    ///
824    /// This method will override any previous sorter set by this method or
825    /// by `sort_by_file_name`.
826    ///
827    /// Note that this is not used in the parallel iterator.
828    pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
829    where
830        F: Fn(&Path, &Path) -> Ordering + Send + Sync + 'static,
831    {
832        self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
833        self
834    }
835
836    /// Set a function for sorting directory entries by file name.
837    ///
838    /// If a compare function is set, the resulting iterator will return all
839    /// paths in sorted order. The compare function will be called to compare
840    /// names from entries from the same directory using only the name of the
841    /// entry.
842    ///
843    /// This method will override any previous sorter set by this method or
844    /// by `sort_by_file_path`.
845    ///
846    /// Note that this is not used in the parallel iterator.
847    pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
848    where
849        F: Fn(&OsStr, &OsStr) -> Ordering + Send + Sync + 'static,
850    {
851        self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
852        self
853    }
854
855    /// Do not cross file system boundaries.
856    ///
857    /// When this option is enabled, directory traversal will not descend into
858    /// directories that are on a different file system from the root path.
859    ///
860    /// Currently, this option is only supported on Unix and Windows. If this
861    /// option is used on an unsupported platform, then directory traversal
862    /// will immediately return an error and will not yield any entries.
863    pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
864        self.same_file_system = yes;
865        self
866    }
867
868    /// Do not yield directory entries that are believed to correspond to
869    /// stdout.
870    ///
871    /// This is useful when a command is invoked via shell redirection to a
872    /// file that is also being read. For example, `grep -r foo ./ > results`
873    /// might end up trying to search `results` even though it is also writing
874    /// to it, which could cause an unbounded feedback loop. Setting this
875    /// option prevents this from happening by skipping over the `results`
876    /// file.
877    ///
878    /// This is disabled by default.
879    pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
880        if yes {
881            self.skip = stdout_handle().map(Arc::new);
882        } else {
883            self.skip = None;
884        }
885        self
886    }
887
888    /// Yields only entries which satisfy the given predicate and skips
889    /// descending into directories that do not satisfy the given predicate.
890    ///
891    /// The predicate is applied to all entries. If the predicate is
892    /// true, iteration carries on as normal. If the predicate is false, the
893    /// entry is ignored and if it is a directory, it is not descended into.
894    ///
895    /// Note that the errors for reading entries that may not satisfy the
896    /// predicate will still be yielded.
897    pub fn filter_entry<P>(&mut self, filter: P) -> &mut WalkBuilder
898    where
899        P: Fn(&DirEntry) -> bool + Send + Sync + 'static,
900    {
901        self.filter = Some(Filter(Arc::new(filter)));
902        self
903    }
904}
905
906/// Walk is a recursive directory iterator over file paths in one or more
907/// directories.
908///
909/// Only file and directory paths matching the rules are returned. By default,
910/// ignore files like `.gitignore` are respected. The precise matching rules
911/// and precedence is explained in the documentation for `WalkBuilder`.
912pub struct Walk {
913    its: std::vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
914    it: Option<WalkEventIter>,
915    ig_root: Ignore,
916    ig: Ignore,
917    max_filesize: Option<u64>,
918    skip: Option<Arc<Handle>>,
919    filter: Option<Filter>,
920}
921
922impl Walk {
923    /// Creates a new recursive directory iterator for the file path given.
924    ///
925    /// Note that this uses default settings, which include respecting
926    /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
927    /// instead.
928    pub fn new<P: AsRef<Path>>(path: P) -> Walk {
929        WalkBuilder::new(path).build()
930    }
931
932    fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
933        if ent.depth() == 0 {
934            return Ok(false);
935        }
936        // We ensure that trivial skipping is done before any other potentially
937        // expensive operations (stat, filesystem other) are done. This seems
938        // like an obvious optimization but becomes critical when filesystem
939        // operations even as simple as stat can result in significant
940        // overheads; an example of this was a bespoke filesystem layer in
941        // Windows that hosted files remotely and would download them on-demand
942        // when particular filesystem operations occurred. Users of this system
943        // who ensured correct file-type filters were being used could still
944        // get unnecessary file access resulting in large downloads.
945        if should_skip_entry(&self.ig, ent) {
946            return Ok(true);
947        }
948        if let Some(ref stdout) = self.skip {
949            if path_equals(ent, stdout)? {
950                return Ok(true);
951            }
952        }
953        if self.max_filesize.is_some() && !ent.is_dir() {
954            return Ok(skip_filesize(
955                self.max_filesize.unwrap(),
956                ent.path(),
957                &ent.metadata().ok(),
958            ));
959        }
960        if let Some(Filter(filter)) = &self.filter {
961            if !filter(ent) {
962                return Ok(true);
963            }
964        }
965        Ok(false)
966    }
967}
968
969impl Iterator for Walk {
970    type Item = Result<DirEntry, Error>;
971
972    #[inline(always)]
973    fn next(&mut self) -> Option<Result<DirEntry, Error>> {
974        loop {
975            let ev = match self.it.as_mut().and_then(|it| it.next()) {
976                Some(ev) => ev,
977                None => {
978                    match self.its.next() {
979                        None => return None,
980                        Some((_, None)) => {
981                            return Some(Ok(DirEntry::new_stdin()));
982                        }
983                        Some((path, Some(it))) => {
984                            self.it = Some(it);
985                            if path.is_dir() {
986                                let (ig, err) = self.ig_root.add_parents(path);
987                                self.ig = ig;
988                                if let Some(err) = err {
989                                    return Some(Err(err));
990                                }
991                            } else {
992                                self.ig = self.ig_root.clone();
993                            }
994                        }
995                    }
996                    continue;
997                }
998            };
999            match ev {
1000                Err(err) => {
1001                    return Some(Err(Error::from_walkdir(err)));
1002                }
1003                Ok(WalkEvent::Exit) => {
1004                    self.ig = self.ig.parent().unwrap();
1005                }
1006                Ok(WalkEvent::Dir(ent)) => {
1007                    let mut ent = DirEntry::new_walkdir(ent, None);
1008                    let should_skip = match self.skip_entry(&ent) {
1009                        Err(err) => return Some(Err(err)),
1010                        Ok(should_skip) => should_skip,
1011                    };
1012                    if should_skip {
1013                        self.it.as_mut().unwrap().it.skip_current_dir();
1014                        // Still need to push this on the stack because
1015                        // we'll get a WalkEvent::Exit event for this dir.
1016                        // We don't care if it errors though.
1017                        let (igtmp, _) = self.ig.add_child(ent.path());
1018                        self.ig = igtmp;
1019                        continue;
1020                    }
1021                    let (igtmp, err) = self.ig.add_child(ent.path());
1022                    self.ig = igtmp;
1023                    ent.err = err;
1024                    return Some(Ok(ent));
1025                }
1026                Ok(WalkEvent::File(ent)) => {
1027                    let ent = DirEntry::new_walkdir(ent, None);
1028                    let should_skip = match self.skip_entry(&ent) {
1029                        Err(err) => return Some(Err(err)),
1030                        Ok(should_skip) => should_skip,
1031                    };
1032                    if should_skip {
1033                        continue;
1034                    }
1035                    return Some(Ok(ent));
1036                }
1037            }
1038        }
1039    }
1040}
1041
1042impl std::iter::FusedIterator for Walk {}
1043
1044/// WalkEventIter transforms a WalkDir iterator into an iterator that more
1045/// accurately describes the directory tree. Namely, it emits events that are
1046/// one of three types: directory, file or "exit." An "exit" event means that
1047/// the entire contents of a directory have been enumerated.
1048struct WalkEventIter {
1049    depth: usize,
1050    it: walkdir::IntoIter,
1051    next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
1052}
1053
1054#[derive(Debug)]
1055enum WalkEvent {
1056    Dir(walkdir::DirEntry),
1057    File(walkdir::DirEntry),
1058    Exit,
1059}
1060
1061impl From<WalkDir> for WalkEventIter {
1062    fn from(it: WalkDir) -> WalkEventIter {
1063        WalkEventIter { depth: 0, it: it.into_iter(), next: None }
1064    }
1065}
1066
1067impl Iterator for WalkEventIter {
1068    type Item = walkdir::Result<WalkEvent>;
1069
1070    #[inline(always)]
1071    fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
1072        let dent = self.next.take().or_else(|| self.it.next());
1073        let depth = match dent {
1074            None => 0,
1075            Some(Ok(ref dent)) => dent.depth(),
1076            Some(Err(ref err)) => err.depth(),
1077        };
1078        if depth < self.depth {
1079            self.depth -= 1;
1080            self.next = dent;
1081            return Some(Ok(WalkEvent::Exit));
1082        }
1083        self.depth = depth;
1084        match dent {
1085            None => None,
1086            Some(Err(err)) => Some(Err(err)),
1087            Some(Ok(dent)) => {
1088                if walkdir_is_dir(&dent) {
1089                    self.depth += 1;
1090                    Some(Ok(WalkEvent::Dir(dent)))
1091                } else {
1092                    Some(Ok(WalkEvent::File(dent)))
1093                }
1094            }
1095        }
1096    }
1097}
1098
1099/// WalkState is used in the parallel recursive directory iterator to indicate
1100/// whether walking should continue as normal, skip descending into a
1101/// particular directory or quit the walk entirely.
1102#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1103pub enum WalkState {
1104    /// Continue walking as normal.
1105    Continue,
1106    /// If the directory entry given is a directory, don't descend into it.
1107    /// In all other cases, this has no effect.
1108    Skip,
1109    /// Quit the entire iterator as soon as possible.
1110    ///
1111    /// Note that this is an inherently asynchronous action. It is possible
1112    /// for more entries to be yielded even after instructing the iterator
1113    /// to quit.
1114    Quit,
1115}
1116
1117impl WalkState {
1118    fn is_continue(&self) -> bool {
1119        *self == WalkState::Continue
1120    }
1121
1122    fn is_quit(&self) -> bool {
1123        *self == WalkState::Quit
1124    }
1125}
1126
1127/// A builder for constructing a visitor when using [`WalkParallel::visit`].
1128/// The builder will be called for each thread started by `WalkParallel`. The
1129/// visitor returned from each builder is then called for every directory
1130/// entry.
1131pub trait ParallelVisitorBuilder<'s> {
1132    /// Create per-thread `ParallelVisitor`s for `WalkParallel`.
1133    fn build(&mut self) -> Box<dyn ParallelVisitor + 's>;
1134}
1135
1136impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s>
1137    for &'a mut P
1138{
1139    fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1140        (**self).build()
1141    }
1142}
1143
1144/// Receives files and directories for the current thread.
1145///
1146/// Setup for the traversal can be implemented as part of
1147/// [`ParallelVisitorBuilder::build`]. Teardown when traversal finishes can be
1148/// implemented by implementing the `Drop` trait on your traversal type.
1149pub trait ParallelVisitor: Send {
1150    /// Receives files and directories for the current thread. This is called
1151    /// once for every directory entry visited by traversal.
1152    fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState;
1153}
1154
1155struct FnBuilder<F> {
1156    builder: F,
1157}
1158
1159impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s>
1160    for FnBuilder<F>
1161{
1162    fn build(&mut self) -> Box<dyn ParallelVisitor + 's> {
1163        let visitor = (self.builder)();
1164        Box::new(FnVisitorImp { visitor })
1165    }
1166}
1167
1168type FnVisitor<'s> =
1169    Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 's>;
1170
1171struct FnVisitorImp<'s> {
1172    visitor: FnVisitor<'s>,
1173}
1174
1175impl<'s> ParallelVisitor for FnVisitorImp<'s> {
1176    fn visit(&mut self, entry: Result<DirEntry, Error>) -> WalkState {
1177        (self.visitor)(entry)
1178    }
1179}
1180
1181/// WalkParallel is a parallel recursive directory iterator over files paths
1182/// in one or more directories.
1183///
1184/// Only file and directory paths matching the rules are returned. By default,
1185/// ignore files like `.gitignore` are respected. The precise matching rules
1186/// and precedence is explained in the documentation for `WalkBuilder`.
1187///
1188/// Unlike `Walk`, this uses multiple threads for traversing a directory.
1189pub struct WalkParallel {
1190    paths: std::vec::IntoIter<PathBuf>,
1191    ig_root: Ignore,
1192    max_filesize: Option<u64>,
1193    max_depth: Option<usize>,
1194    follow_links: bool,
1195    same_file_system: bool,
1196    threads: usize,
1197    skip: Option<Arc<Handle>>,
1198    filter: Option<Filter>,
1199}
1200
1201impl WalkParallel {
1202    /// Execute the parallel recursive directory iterator. `mkf` is called
1203    /// for each thread used for iteration. The function produced by `mkf`
1204    /// is then in turn called for each visited file path.
1205    pub fn run<'s, F>(self, mkf: F)
1206    where
1207        F: FnMut() -> FnVisitor<'s>,
1208    {
1209        self.visit(&mut FnBuilder { builder: mkf })
1210    }
1211
1212    /// Execute the parallel recursive directory iterator using a custom
1213    /// visitor.
1214    ///
1215    /// The builder given is used to construct a visitor for every thread
1216    /// used by this traversal. The visitor returned from each builder is then
1217    /// called for every directory entry seen by that thread.
1218    ///
1219    /// Typically, creating a custom visitor is useful if you need to perform
1220    /// some kind of cleanup once traversal is finished. This can be achieved
1221    /// by implementing `Drop` for your builder (or for your visitor, if you
1222    /// want to execute cleanup for every thread that is launched).
1223    ///
1224    /// For example, each visitor might build up a data structure of results
1225    /// corresponding to the directory entries seen for each thread. Since each
1226    /// visitor runs on only one thread, this build-up can be done without
1227    /// synchronization. Then, once traversal is complete, all of the results
1228    /// can be merged together into a single data structure.
1229    pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) {
1230        let threads = self.threads();
1231        let mut stack = vec![];
1232        {
1233            let mut visitor = builder.build();
1234            let mut paths = Vec::new().into_iter();
1235            std::mem::swap(&mut paths, &mut self.paths);
1236            // Send the initial set of root paths to the pool of workers. Note
1237            // that we only send directories. For files, we send to them the
1238            // callback directly.
1239            for path in paths {
1240                let (dent, root_device) = if path == Path::new("-") {
1241                    (DirEntry::new_stdin(), None)
1242                } else {
1243                    let root_device = if !self.same_file_system {
1244                        None
1245                    } else {
1246                        match device_num(&path) {
1247                            Ok(root_device) => Some(root_device),
1248                            Err(err) => {
1249                                let err = Error::Io(err).with_path(path);
1250                                if visitor.visit(Err(err)).is_quit() {
1251                                    return;
1252                                }
1253                                continue;
1254                            }
1255                        }
1256                    };
1257                    match DirEntryRaw::from_path(0, path, false) {
1258                        Ok(dent) => {
1259                            (DirEntry::new_raw(dent, None), root_device)
1260                        }
1261                        Err(err) => {
1262                            if visitor.visit(Err(err)).is_quit() {
1263                                return;
1264                            }
1265                            continue;
1266                        }
1267                    }
1268                };
1269                stack.push(Message::Work(Work {
1270                    dent,
1271                    ignore: self.ig_root.clone(),
1272                    root_device,
1273                }));
1274            }
1275            // ... but there's no need to start workers if we don't need them.
1276            if stack.is_empty() {
1277                return;
1278            }
1279        }
1280        // Create the workers and then wait for them to finish.
1281        let quit_now = Arc::new(AtomicBool::new(false));
1282        let active_workers = Arc::new(AtomicUsize::new(threads));
1283        let stacks = Stack::new_for_each_thread(threads, stack);
1284        std::thread::scope(|s| {
1285            let handles: Vec<_> = stacks
1286                .into_iter()
1287                .map(|stack| Worker {
1288                    visitor: builder.build(),
1289                    stack,
1290                    quit_now: quit_now.clone(),
1291                    active_workers: active_workers.clone(),
1292                    max_depth: self.max_depth,
1293                    max_filesize: self.max_filesize,
1294                    follow_links: self.follow_links,
1295                    skip: self.skip.clone(),
1296                    filter: self.filter.clone(),
1297                })
1298                .map(|worker| s.spawn(|| worker.run()))
1299                .collect();
1300            for handle in handles {
1301                handle.join().unwrap();
1302            }
1303        });
1304    }
1305
1306    fn threads(&self) -> usize {
1307        if self.threads == 0 {
1308            2
1309        } else {
1310            self.threads
1311        }
1312    }
1313}
1314
1315/// Message is the set of instructions that a worker knows how to process.
1316enum Message {
1317    /// A work item corresponds to a directory that should be descended into.
1318    /// Work items for entries that should be skipped or ignored should not
1319    /// be produced.
1320    Work(Work),
1321    /// This instruction indicates that the worker should quit.
1322    Quit,
1323}
1324
1325/// A unit of work for each worker to process.
1326///
1327/// Each unit of work corresponds to a directory that should be descended
1328/// into.
1329struct Work {
1330    /// The directory entry.
1331    dent: DirEntry,
1332    /// Any ignore matchers that have been built for this directory's parents.
1333    ignore: Ignore,
1334    /// The root device number. When present, only files with the same device
1335    /// number should be considered.
1336    root_device: Option<u64>,
1337}
1338
1339impl Work {
1340    /// Returns true if and only if this work item is a directory.
1341    fn is_dir(&self) -> bool {
1342        self.dent.is_dir()
1343    }
1344
1345    /// Returns true if and only if this work item is a symlink.
1346    fn is_symlink(&self) -> bool {
1347        self.dent.file_type().map_or(false, |ft| ft.is_symlink())
1348    }
1349
1350    /// Adds ignore rules for parent directories.
1351    ///
1352    /// Note that this only applies to entries at depth 0. On all other
1353    /// entries, this is a no-op.
1354    fn add_parents(&mut self) -> Option<Error> {
1355        if self.dent.depth() > 0 {
1356            return None;
1357        }
1358        // At depth 0, the path of this entry is a root path, so we can
1359        // use it directly to add parent ignore rules.
1360        let (ig, err) = self.ignore.add_parents(self.dent.path());
1361        self.ignore = ig;
1362        err
1363    }
1364
1365    /// Reads the directory contents of this work item and adds ignore
1366    /// rules for this directory.
1367    ///
1368    /// If there was a problem with reading the directory contents, then
1369    /// an error is returned. If there was a problem reading the ignore
1370    /// rules for this directory, then the error is attached to this
1371    /// work item's directory entry.
1372    fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
1373        let readdir = match fs::read_dir(self.dent.path()) {
1374            Ok(readdir) => readdir,
1375            Err(err) => {
1376                let err = Error::from(err)
1377                    .with_path(self.dent.path())
1378                    .with_depth(self.dent.depth());
1379                return Err(err);
1380            }
1381        };
1382        let (ig, err) = self.ignore.add_child(self.dent.path());
1383        self.ignore = ig;
1384        self.dent.err = err;
1385        Ok(readdir)
1386    }
1387}
1388
1389/// A work-stealing stack.
1390#[derive(Debug)]
1391struct Stack {
1392    /// This thread's index.
1393    index: usize,
1394    /// The thread-local stack.
1395    deque: Deque<Message>,
1396    /// The work stealers.
1397    stealers: Arc<[Stealer<Message>]>,
1398}
1399
1400impl Stack {
1401    /// Create a work-stealing stack for each thread. The given messages
1402    /// correspond to the initial paths to start the search at. They will
1403    /// be distributed automatically to each stack in a round-robin fashion.
1404    fn new_for_each_thread(threads: usize, init: Vec<Message>) -> Vec<Stack> {
1405        // Using new_lifo() ensures each worker operates depth-first, not
1406        // breadth-first. We do depth-first because a breadth first traversal
1407        // on wide directories with a lot of gitignores is disastrous (for
1408        // example, searching a directory tree containing all of crates.io).
1409        let deques: Vec<Deque<Message>> =
1410            std::iter::repeat_with(Deque::new_lifo).take(threads).collect();
1411        let stealers = Arc::<[Stealer<Message>]>::from(
1412            deques.iter().map(Deque::stealer).collect::<Vec<_>>(),
1413        );
1414        let stacks: Vec<Stack> = deques
1415            .into_iter()
1416            .enumerate()
1417            .map(|(index, deque)| Stack {
1418                index,
1419                deque,
1420                stealers: stealers.clone(),
1421            })
1422            .collect();
1423        // Distribute the initial messages.
1424        init.into_iter()
1425            .zip(stacks.iter().cycle())
1426            .for_each(|(m, s)| s.push(m));
1427        stacks
1428    }
1429
1430    /// Push a message.
1431    fn push(&self, msg: Message) {
1432        self.deque.push(msg);
1433    }
1434
1435    /// Pop a message.
1436    fn pop(&self) -> Option<Message> {
1437        self.deque.pop().or_else(|| self.steal())
1438    }
1439
1440    /// Steal a message from another queue.
1441    fn steal(&self) -> Option<Message> {
1442        // For fairness, try to steal from index + 1, index + 2, ... len - 1,
1443        // then wrap around to 0, 1, ... index - 1.
1444        let (left, right) = self.stealers.split_at(self.index);
1445        // Don't steal from ourselves
1446        let right = &right[1..];
1447
1448        right
1449            .iter()
1450            .chain(left.iter())
1451            .map(|s| s.steal_batch_and_pop(&self.deque))
1452            .find_map(|s| s.success())
1453    }
1454}
1455
1456/// A worker is responsible for descending into directories, updating the
1457/// ignore matchers, producing new work and invoking the caller's callback.
1458///
1459/// Note that a worker is *both* a producer and a consumer.
1460struct Worker<'s> {
1461    /// The caller's callback.
1462    visitor: Box<dyn ParallelVisitor + 's>,
1463    /// A work-stealing stack of work to do.
1464    ///
1465    /// We use a stack instead of a channel because a stack lets us visit
1466    /// directories in depth first order. This can substantially reduce peak
1467    /// memory usage by keeping both the number of file paths and gitignore
1468    /// matchers in memory lower.
1469    stack: Stack,
1470    /// Whether all workers should terminate at the next opportunity. Note
1471    /// that we need this because we don't want other `Work` to be done after
1472    /// we quit. We wouldn't need this if have a priority channel.
1473    quit_now: Arc<AtomicBool>,
1474    /// The number of currently active workers.
1475    active_workers: Arc<AtomicUsize>,
1476    /// The maximum depth of directories to descend. A value of `0` means no
1477    /// descension at all.
1478    max_depth: Option<usize>,
1479    /// The maximum size a searched file can be (in bytes). If a file exceeds
1480    /// this size it will be skipped.
1481    max_filesize: Option<u64>,
1482    /// Whether to follow symbolic links or not. When this is enabled, loop
1483    /// detection is performed.
1484    follow_links: bool,
1485    /// A file handle to skip, currently is either `None` or stdout, if it's
1486    /// a file and it has been requested to skip files identical to stdout.
1487    skip: Option<Arc<Handle>>,
1488    /// A predicate applied to dir entries. If true, the entry and all
1489    /// children will be skipped.
1490    filter: Option<Filter>,
1491}
1492
1493impl<'s> Worker<'s> {
1494    /// Runs this worker until there is no more work left to do.
1495    ///
1496    /// The worker will call the caller's callback for all entries that aren't
1497    /// skipped by the ignore matcher.
1498    fn run(mut self) {
1499        while let Some(work) = self.get_work() {
1500            if let WalkState::Quit = self.run_one(work) {
1501                self.quit_now();
1502            }
1503        }
1504    }
1505
1506    fn run_one(&mut self, mut work: Work) -> WalkState {
1507        // If the work is not a directory, then we can just execute the
1508        // caller's callback immediately and move on.
1509        if work.is_symlink() || !work.is_dir() {
1510            return self.visitor.visit(Ok(work.dent));
1511        }
1512        if let Some(err) = work.add_parents() {
1513            let state = self.visitor.visit(Err(err));
1514            if state.is_quit() {
1515                return state;
1516            }
1517        }
1518
1519        let descend = if let Some(root_device) = work.root_device {
1520            match is_same_file_system(root_device, work.dent.path()) {
1521                Ok(true) => true,
1522                Ok(false) => false,
1523                Err(err) => {
1524                    let state = self.visitor.visit(Err(err));
1525                    if state.is_quit() {
1526                        return state;
1527                    }
1528                    false
1529                }
1530            }
1531        } else {
1532            true
1533        };
1534
1535        // Try to read the directory first before we transfer ownership
1536        // to the provided closure. Do not unwrap it immediately, though,
1537        // as we may receive an `Err` value e.g. in the case when we do not
1538        // have sufficient read permissions to list the directory.
1539        // In that case we still want to provide the closure with a valid
1540        // entry before passing the error value.
1541        let readdir = work.read_dir();
1542        let depth = work.dent.depth();
1543        let state = self.visitor.visit(Ok(work.dent));
1544        if !state.is_continue() {
1545            return state;
1546        }
1547        if !descend {
1548            return WalkState::Skip;
1549        }
1550
1551        let readdir = match readdir {
1552            Ok(readdir) => readdir,
1553            Err(err) => {
1554                return self.visitor.visit(Err(err));
1555            }
1556        };
1557
1558        if self.max_depth.map_or(false, |max| depth >= max) {
1559            return WalkState::Skip;
1560        }
1561        for result in readdir {
1562            let state = self.generate_work(
1563                &work.ignore,
1564                depth + 1,
1565                work.root_device,
1566                result,
1567            );
1568            if state.is_quit() {
1569                return state;
1570            }
1571        }
1572        WalkState::Continue
1573    }
1574
1575    /// Decides whether to submit the given directory entry as a file to
1576    /// search.
1577    ///
1578    /// If the entry is a path that should be ignored, then this is a no-op.
1579    /// Otherwise, the entry is pushed on to the queue. (The actual execution
1580    /// of the callback happens in `run_one`.)
1581    ///
1582    /// If an error occurs while reading the entry, then it is sent to the
1583    /// caller's callback.
1584    ///
1585    /// `ig` is the `Ignore` matcher for the parent directory. `depth` should
1586    /// be the depth of this entry. `result` should be the item yielded by
1587    /// a directory iterator.
1588    fn generate_work(
1589        &mut self,
1590        ig: &Ignore,
1591        depth: usize,
1592        root_device: Option<u64>,
1593        result: Result<fs::DirEntry, io::Error>,
1594    ) -> WalkState {
1595        let fs_dent = match result {
1596            Ok(fs_dent) => fs_dent,
1597            Err(err) => {
1598                return self
1599                    .visitor
1600                    .visit(Err(Error::from(err).with_depth(depth)));
1601            }
1602        };
1603        let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) {
1604            Ok(dent) => DirEntry::new_raw(dent, None),
1605            Err(err) => {
1606                return self.visitor.visit(Err(err));
1607            }
1608        };
1609        let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink());
1610        if self.follow_links && is_symlink {
1611            let path = dent.path().to_path_buf();
1612            dent = match DirEntryRaw::from_path(depth, path, true) {
1613                Ok(dent) => DirEntry::new_raw(dent, None),
1614                Err(err) => {
1615                    return self.visitor.visit(Err(err));
1616                }
1617            };
1618            if dent.is_dir() {
1619                if let Err(err) = check_symlink_loop(ig, dent.path(), depth) {
1620                    return self.visitor.visit(Err(err));
1621                }
1622            }
1623        }
1624        // N.B. See analogous call in the single-threaded implementation about
1625        // why it's important for this to come before the checks below.
1626        if should_skip_entry(ig, &dent) {
1627            return WalkState::Continue;
1628        }
1629        if let Some(ref stdout) = self.skip {
1630            let is_stdout = match path_equals(&dent, stdout) {
1631                Ok(is_stdout) => is_stdout,
1632                Err(err) => return self.visitor.visit(Err(err)),
1633            };
1634            if is_stdout {
1635                return WalkState::Continue;
1636            }
1637        }
1638        let should_skip_filesize =
1639            if self.max_filesize.is_some() && !dent.is_dir() {
1640                skip_filesize(
1641                    self.max_filesize.unwrap(),
1642                    dent.path(),
1643                    &dent.metadata().ok(),
1644                )
1645            } else {
1646                false
1647            };
1648        let should_skip_filtered =
1649            if let Some(Filter(predicate)) = &self.filter {
1650                !predicate(&dent)
1651            } else {
1652                false
1653            };
1654        if !should_skip_filesize && !should_skip_filtered {
1655            self.send(Work { dent, ignore: ig.clone(), root_device });
1656        }
1657        WalkState::Continue
1658    }
1659
1660    /// Returns the next directory to descend into.
1661    ///
1662    /// If all work has been exhausted, then this returns None. The worker
1663    /// should then subsequently quit.
1664    fn get_work(&mut self) -> Option<Work> {
1665        let mut value = self.recv();
1666        loop {
1667            // Simulate a priority channel: If quit_now flag is set, we can
1668            // receive only quit messages.
1669            if self.is_quit_now() {
1670                value = Some(Message::Quit)
1671            }
1672            match value {
1673                Some(Message::Work(work)) => {
1674                    return Some(work);
1675                }
1676                Some(Message::Quit) => {
1677                    // Repeat quit message to wake up sleeping threads, if
1678                    // any. The domino effect will ensure that every thread
1679                    // will quit.
1680                    self.send_quit();
1681                    return None;
1682                }
1683                None => {
1684                    if self.deactivate_worker() == 0 {
1685                        // If deactivate_worker() returns 0, every worker thread
1686                        // is currently within the critical section between the
1687                        // acquire in deactivate_worker() and the release in
1688                        // activate_worker() below.  For this to happen, every
1689                        // worker's local deque must be simultaneously empty,
1690                        // meaning there is no more work left at all.
1691                        self.send_quit();
1692                        return None;
1693                    }
1694                    // Wait for next `Work` or `Quit` message.
1695                    loop {
1696                        if let Some(v) = self.recv() {
1697                            self.activate_worker();
1698                            value = Some(v);
1699                            break;
1700                        }
1701                        // Our stack isn't blocking. Instead of burning the
1702                        // CPU waiting, we let the thread sleep for a bit. In
1703                        // general, this tends to only occur once the search is
1704                        // approaching termination.
1705                        let dur = std::time::Duration::from_millis(1);
1706                        std::thread::sleep(dur);
1707                    }
1708                }
1709            }
1710        }
1711    }
1712
1713    /// Indicates that all workers should quit immediately.
1714    fn quit_now(&self) {
1715        self.quit_now.store(true, AtomicOrdering::SeqCst);
1716    }
1717
1718    /// Returns true if this worker should quit immediately.
1719    fn is_quit_now(&self) -> bool {
1720        self.quit_now.load(AtomicOrdering::SeqCst)
1721    }
1722
1723    /// Send work.
1724    fn send(&self, work: Work) {
1725        self.stack.push(Message::Work(work));
1726    }
1727
1728    /// Send a quit message.
1729    fn send_quit(&self) {
1730        self.stack.push(Message::Quit);
1731    }
1732
1733    /// Receive work.
1734    fn recv(&self) -> Option<Message> {
1735        self.stack.pop()
1736    }
1737
1738    /// Deactivates a worker and returns the number of currently active workers.
1739    fn deactivate_worker(&self) -> usize {
1740        self.active_workers.fetch_sub(1, AtomicOrdering::Acquire) - 1
1741    }
1742
1743    /// Reactivates a worker.
1744    fn activate_worker(&self) {
1745        self.active_workers.fetch_add(1, AtomicOrdering::Release);
1746    }
1747}
1748
1749fn check_symlink_loop(
1750    ig_parent: &Ignore,
1751    child_path: &Path,
1752    child_depth: usize,
1753) -> Result<(), Error> {
1754    let hchild = Handle::from_path(child_path).map_err(|err| {
1755        Error::from(err).with_path(child_path).with_depth(child_depth)
1756    })?;
1757    for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) {
1758        let h = Handle::from_path(ig.path()).map_err(|err| {
1759            Error::from(err).with_path(child_path).with_depth(child_depth)
1760        })?;
1761        if hchild == h {
1762            return Err(Error::Loop {
1763                ancestor: ig.path().to_path_buf(),
1764                child: child_path.to_path_buf(),
1765            }
1766            .with_depth(child_depth));
1767        }
1768    }
1769    Ok(())
1770}
1771
1772// Before calling this function, make sure that you ensure that is really
1773// necessary as the arguments imply a file stat.
1774fn skip_filesize(
1775    max_filesize: u64,
1776    path: &Path,
1777    ent: &Option<Metadata>,
1778) -> bool {
1779    let filesize = match *ent {
1780        Some(ref md) => Some(md.len()),
1781        None => None,
1782    };
1783
1784    if let Some(fs) = filesize {
1785        if fs > max_filesize {
1786            log::debug!("ignoring {}: {} bytes", path.display(), fs);
1787            true
1788        } else {
1789            false
1790        }
1791    } else {
1792        false
1793    }
1794}
1795
1796fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
1797    let m = ig.matched_dir_entry(dent);
1798    if m.is_ignore() {
1799        log::debug!("ignoring {}: {:?}", dent.path().display(), m);
1800        true
1801    } else if m.is_whitelist() {
1802        log::debug!("whitelisting {}: {:?}", dent.path().display(), m);
1803        false
1804    } else {
1805        false
1806    }
1807}
1808
1809/// Returns a handle to stdout for filtering search.
1810///
1811/// A handle is returned if and only if stdout is being redirected to a file.
1812/// The handle returned corresponds to that file.
1813///
1814/// This can be used to ensure that we do not attempt to search a file that we
1815/// may also be writing to.
1816fn stdout_handle() -> Option<Handle> {
1817    let h = match Handle::stdout() {
1818        Err(_) => return None,
1819        Ok(h) => h,
1820    };
1821    let md = match h.as_file().metadata() {
1822        Err(_) => return None,
1823        Ok(md) => md,
1824    };
1825    if !md.is_file() {
1826        return None;
1827    }
1828    Some(h)
1829}
1830
1831/// Returns true if and only if the given directory entry is believed to be
1832/// equivalent to the given handle. If there was a problem querying the path
1833/// for information to determine equality, then that error is returned.
1834fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
1835    #[cfg(unix)]
1836    fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
1837        dent.ino() != Some(handle.ino())
1838    }
1839
1840    #[cfg(not(unix))]
1841    fn never_equal(_: &DirEntry, _: &Handle) -> bool {
1842        false
1843    }
1844
1845    // If we know for sure that these two things aren't equal, then avoid
1846    // the costly extra stat call to determine equality.
1847    if dent.is_stdin() || never_equal(dent, handle) {
1848        return Ok(false);
1849    }
1850    Handle::from_path(dent.path())
1851        .map(|h| &h == handle)
1852        .map_err(|err| Error::Io(err).with_path(dent.path()))
1853}
1854
1855/// Returns true if the given walkdir entry corresponds to a directory.
1856///
1857/// This is normally just `dent.file_type().is_dir()`, but when we aren't
1858/// following symlinks, the root directory entry may be a symlink to a
1859/// directory that we *do* follow---by virtue of it being specified by the user
1860/// explicitly. In that case, we need to follow the symlink and query whether
1861/// it's a directory or not. But we only do this for root entries to avoid an
1862/// additional stat check in most cases.
1863fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool {
1864    if dent.file_type().is_dir() {
1865        return true;
1866    }
1867    if !dent.file_type().is_symlink() || dent.depth() > 0 {
1868        return false;
1869    }
1870    dent.path().metadata().ok().map_or(false, |md| md.file_type().is_dir())
1871}
1872
1873/// Returns true if and only if the given path is on the same device as the
1874/// given root device.
1875fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
1876    let dent_device =
1877        device_num(path).map_err(|err| Error::Io(err).with_path(path))?;
1878    Ok(root_device == dent_device)
1879}
1880
1881#[cfg(unix)]
1882fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1883    use std::os::unix::fs::MetadataExt;
1884
1885    path.as_ref().metadata().map(|md| md.dev())
1886}
1887
1888#[cfg(windows)]
1889fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1890    use winapi_util::{file, Handle};
1891
1892    let h = Handle::from_path_any(path)?;
1893    file::information(h).map(|info| info.volume_serial_number())
1894}
1895
1896#[cfg(not(any(unix, windows)))]
1897fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
1898    Err(io::Error::new(
1899        io::ErrorKind::Other,
1900        "walkdir: same_file_system option not supported on this platform",
1901    ))
1902}
1903
1904#[cfg(test)]
1905mod tests {
1906    use std::ffi::OsStr;
1907    use std::fs::{self, File};
1908    use std::io::Write;
1909    use std::path::Path;
1910    use std::sync::{Arc, Mutex};
1911
1912    use super::{DirEntry, WalkBuilder, WalkState};
1913    use crate::tests::TempDir;
1914
1915    fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
1916        let mut file = File::create(path).unwrap();
1917        file.write_all(contents.as_bytes()).unwrap();
1918    }
1919
1920    fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
1921        let file = File::create(path).unwrap();
1922        file.set_len(size).unwrap();
1923    }
1924
1925    #[cfg(unix)]
1926    fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
1927        use std::os::unix::fs::symlink;
1928        symlink(src, dst).unwrap();
1929    }
1930
1931    fn mkdirp<P: AsRef<Path>>(path: P) {
1932        fs::create_dir_all(path).unwrap();
1933    }
1934
1935    fn normal_path(unix: &str) -> String {
1936        if cfg!(windows) {
1937            unix.replace("\\", "/")
1938        } else {
1939            unix.to_string()
1940        }
1941    }
1942
1943    fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
1944        let mut paths = vec![];
1945        for result in builder.build() {
1946            let dent = match result {
1947                Err(_) => continue,
1948                Ok(dent) => dent,
1949            };
1950            let path = dent.path().strip_prefix(prefix).unwrap();
1951            if path.as_os_str().is_empty() {
1952                continue;
1953            }
1954            paths.push(normal_path(path.to_str().unwrap()));
1955        }
1956        paths.sort();
1957        paths
1958    }
1959
1960    fn walk_collect_parallel(
1961        prefix: &Path,
1962        builder: &WalkBuilder,
1963    ) -> Vec<String> {
1964        let mut paths = vec![];
1965        for dent in walk_collect_entries_parallel(builder) {
1966            let path = dent.path().strip_prefix(prefix).unwrap();
1967            if path.as_os_str().is_empty() {
1968                continue;
1969            }
1970            paths.push(normal_path(path.to_str().unwrap()));
1971        }
1972        paths.sort();
1973        paths
1974    }
1975
1976    fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> {
1977        let dents = Arc::new(Mutex::new(vec![]));
1978        builder.build_parallel().run(|| {
1979            let dents = dents.clone();
1980            Box::new(move |result| {
1981                if let Ok(dent) = result {
1982                    dents.lock().unwrap().push(dent);
1983                }
1984                WalkState::Continue
1985            })
1986        });
1987
1988        let dents = dents.lock().unwrap();
1989        dents.to_vec()
1990    }
1991
1992    fn mkpaths(paths: &[&str]) -> Vec<String> {
1993        let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
1994        paths.sort();
1995        paths
1996    }
1997
1998    fn tmpdir() -> TempDir {
1999        TempDir::new().unwrap()
2000    }
2001
2002    fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
2003        let got = walk_collect(prefix, builder);
2004        assert_eq!(got, mkpaths(expected), "single threaded");
2005        let got = walk_collect_parallel(prefix, builder);
2006        assert_eq!(got, mkpaths(expected), "parallel");
2007    }
2008
2009    #[test]
2010    fn no_ignores() {
2011        let td = tmpdir();
2012        mkdirp(td.path().join("a/b/c"));
2013        mkdirp(td.path().join("x/y"));
2014        wfile(td.path().join("a/b/foo"), "");
2015        wfile(td.path().join("x/y/foo"), "");
2016
2017        assert_paths(
2018            td.path(),
2019            &WalkBuilder::new(td.path()),
2020            &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2021        );
2022    }
2023
2024    #[test]
2025    fn custom_ignore() {
2026        let td = tmpdir();
2027        let custom_ignore = ".customignore";
2028        mkdirp(td.path().join("a"));
2029        wfile(td.path().join(custom_ignore), "foo");
2030        wfile(td.path().join("foo"), "");
2031        wfile(td.path().join("a/foo"), "");
2032        wfile(td.path().join("bar"), "");
2033        wfile(td.path().join("a/bar"), "");
2034
2035        let mut builder = WalkBuilder::new(td.path());
2036        builder.add_custom_ignore_filename(&custom_ignore);
2037        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2038    }
2039
2040    #[test]
2041    fn custom_ignore_exclusive_use() {
2042        let td = tmpdir();
2043        let custom_ignore = ".customignore";
2044        mkdirp(td.path().join("a"));
2045        wfile(td.path().join(custom_ignore), "foo");
2046        wfile(td.path().join("foo"), "");
2047        wfile(td.path().join("a/foo"), "");
2048        wfile(td.path().join("bar"), "");
2049        wfile(td.path().join("a/bar"), "");
2050
2051        let mut builder = WalkBuilder::new(td.path());
2052        builder.ignore(false);
2053        builder.git_ignore(false);
2054        builder.git_global(false);
2055        builder.git_exclude(false);
2056        builder.add_custom_ignore_filename(&custom_ignore);
2057        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2058    }
2059
2060    #[test]
2061    fn gitignore() {
2062        let td = tmpdir();
2063        mkdirp(td.path().join(".git"));
2064        mkdirp(td.path().join("a"));
2065        wfile(td.path().join(".gitignore"), "foo");
2066        wfile(td.path().join("foo"), "");
2067        wfile(td.path().join("a/foo"), "");
2068        wfile(td.path().join("bar"), "");
2069        wfile(td.path().join("a/bar"), "");
2070
2071        assert_paths(
2072            td.path(),
2073            &WalkBuilder::new(td.path()),
2074            &["bar", "a", "a/bar"],
2075        );
2076    }
2077
2078    #[test]
2079    fn explicit_ignore() {
2080        let td = tmpdir();
2081        let igpath = td.path().join(".not-an-ignore");
2082        mkdirp(td.path().join("a"));
2083        wfile(&igpath, "foo");
2084        wfile(td.path().join("foo"), "");
2085        wfile(td.path().join("a/foo"), "");
2086        wfile(td.path().join("bar"), "");
2087        wfile(td.path().join("a/bar"), "");
2088
2089        let mut builder = WalkBuilder::new(td.path());
2090        assert!(builder.add_ignore(&igpath).is_none());
2091        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
2092    }
2093
2094    #[test]
2095    fn explicit_ignore_exclusive_use() {
2096        let td = tmpdir();
2097        let igpath = td.path().join(".not-an-ignore");
2098        mkdirp(td.path().join("a"));
2099        wfile(&igpath, "foo");
2100        wfile(td.path().join("foo"), "");
2101        wfile(td.path().join("a/foo"), "");
2102        wfile(td.path().join("bar"), "");
2103        wfile(td.path().join("a/bar"), "");
2104
2105        let mut builder = WalkBuilder::new(td.path());
2106        builder.standard_filters(false);
2107        assert!(builder.add_ignore(&igpath).is_none());
2108        assert_paths(
2109            td.path(),
2110            &builder,
2111            &[".not-an-ignore", "bar", "a", "a/bar"],
2112        );
2113    }
2114
2115    #[test]
2116    fn gitignore_parent() {
2117        let td = tmpdir();
2118        mkdirp(td.path().join(".git"));
2119        mkdirp(td.path().join("a"));
2120        wfile(td.path().join(".gitignore"), "foo");
2121        wfile(td.path().join("a/foo"), "");
2122        wfile(td.path().join("a/bar"), "");
2123
2124        let root = td.path().join("a");
2125        assert_paths(&root, &WalkBuilder::new(&root), &["bar"]);
2126    }
2127
2128    #[test]
2129    fn max_depth() {
2130        let td = tmpdir();
2131        mkdirp(td.path().join("a/b/c"));
2132        wfile(td.path().join("foo"), "");
2133        wfile(td.path().join("a/foo"), "");
2134        wfile(td.path().join("a/b/foo"), "");
2135        wfile(td.path().join("a/b/c/foo"), "");
2136
2137        let mut builder = WalkBuilder::new(td.path());
2138        assert_paths(
2139            td.path(),
2140            &builder,
2141            &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
2142        );
2143        assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
2144        assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
2145        assert_paths(
2146            td.path(),
2147            builder.max_depth(Some(2)),
2148            &["a", "a/b", "foo", "a/foo"],
2149        );
2150    }
2151
2152    #[test]
2153    fn max_filesize() {
2154        let td = tmpdir();
2155        mkdirp(td.path().join("a/b"));
2156        wfile_size(td.path().join("foo"), 0);
2157        wfile_size(td.path().join("bar"), 400);
2158        wfile_size(td.path().join("baz"), 600);
2159        wfile_size(td.path().join("a/foo"), 600);
2160        wfile_size(td.path().join("a/bar"), 500);
2161        wfile_size(td.path().join("a/baz"), 200);
2162
2163        let mut builder = WalkBuilder::new(td.path());
2164        assert_paths(
2165            td.path(),
2166            &builder,
2167            &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2168        );
2169        assert_paths(
2170            td.path(),
2171            builder.max_filesize(Some(0)),
2172            &["a", "a/b", "foo"],
2173        );
2174        assert_paths(
2175            td.path(),
2176            builder.max_filesize(Some(500)),
2177            &["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
2178        );
2179        assert_paths(
2180            td.path(),
2181            builder.max_filesize(Some(50000)),
2182            &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
2183        );
2184    }
2185
2186    #[cfg(unix)] // because symlinks on windows are weird
2187    #[test]
2188    fn symlinks() {
2189        let td = tmpdir();
2190        mkdirp(td.path().join("a/b"));
2191        symlink(td.path().join("a/b"), td.path().join("z"));
2192        wfile(td.path().join("a/b/foo"), "");
2193
2194        let mut builder = WalkBuilder::new(td.path());
2195        assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
2196        assert_paths(
2197            td.path(),
2198            &builder.follow_links(true),
2199            &["a", "a/b", "a/b/foo", "z", "z/foo"],
2200        );
2201    }
2202
2203    #[cfg(unix)] // because symlinks on windows are weird
2204    #[test]
2205    fn first_path_not_symlink() {
2206        let td = tmpdir();
2207        mkdirp(td.path().join("foo"));
2208
2209        let dents = WalkBuilder::new(td.path().join("foo"))
2210            .build()
2211            .into_iter()
2212            .collect::<Result<Vec<_>, _>>()
2213            .unwrap();
2214        assert_eq!(1, dents.len());
2215        assert!(!dents[0].path_is_symlink());
2216
2217        let dents = walk_collect_entries_parallel(&WalkBuilder::new(
2218            td.path().join("foo"),
2219        ));
2220        assert_eq!(1, dents.len());
2221        assert!(!dents[0].path_is_symlink());
2222    }
2223
2224    #[cfg(unix)] // because symlinks on windows are weird
2225    #[test]
2226    fn symlink_loop() {
2227        let td = tmpdir();
2228        mkdirp(td.path().join("a/b"));
2229        symlink(td.path().join("a"), td.path().join("a/b/c"));
2230
2231        let mut builder = WalkBuilder::new(td.path());
2232        assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
2233        assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
2234    }
2235
2236    // It's a little tricky to test the 'same_file_system' option since
2237    // we need an environment with more than one file system. We adopt a
2238    // heuristic where /sys is typically a distinct volume on Linux and roll
2239    // with that.
2240    #[test]
2241    #[cfg(target_os = "linux")]
2242    fn same_file_system() {
2243        use super::device_num;
2244
2245        // If for some reason /sys doesn't exist or isn't a directory, just
2246        // skip this test.
2247        if !Path::new("/sys").is_dir() {
2248            return;
2249        }
2250
2251        // If our test directory actually isn't a different volume from /sys,
2252        // then this test is meaningless and we shouldn't run it.
2253        let td = tmpdir();
2254        if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
2255            return;
2256        }
2257
2258        mkdirp(td.path().join("same_file"));
2259        symlink("/sys", td.path().join("same_file").join("alink"));
2260
2261        // Create a symlink to sys and enable following symlinks. If the
2262        // same_file_system option doesn't work, then this probably will hit a
2263        // permission error. Otherwise, it should just skip over the symlink
2264        // completely.
2265        let mut builder = WalkBuilder::new(td.path());
2266        builder.follow_links(true).same_file_system(true);
2267        assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
2268    }
2269
2270    #[cfg(target_os = "linux")]
2271    #[test]
2272    fn no_read_permissions() {
2273        let dir_path = Path::new("/root");
2274
2275        // There's no /etc/sudoers.d, skip the test.
2276        if !dir_path.is_dir() {
2277            return;
2278        }
2279        // We're the root, so the test won't check what we want it to.
2280        if fs::read_dir(&dir_path).is_ok() {
2281            return;
2282        }
2283
2284        // Check that we can't descend but get an entry for the parent dir.
2285        let builder = WalkBuilder::new(&dir_path);
2286        assert_paths(dir_path.parent().unwrap(), &builder, &["root"]);
2287    }
2288
2289    #[test]
2290    fn filter() {
2291        let td = tmpdir();
2292        mkdirp(td.path().join("a/b/c"));
2293        mkdirp(td.path().join("x/y"));
2294        wfile(td.path().join("a/b/foo"), "");
2295        wfile(td.path().join("x/y/foo"), "");
2296
2297        assert_paths(
2298            td.path(),
2299            &WalkBuilder::new(td.path()),
2300            &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
2301        );
2302
2303        assert_paths(
2304            td.path(),
2305            &WalkBuilder::new(td.path())
2306                .filter_entry(|entry| entry.file_name() != OsStr::new("a")),
2307            &["x", "x/y", "x/y/foo"],
2308        );
2309    }
2310}