ignore/
lib.rs

1/*!
2The ignore crate provides a fast recursive directory iterator that respects
3various filters such as globs, file types and `.gitignore` files. The precise
4matching rules and precedence is explained in the documentation for
5`WalkBuilder`.
6
7Secondarily, this crate exposes gitignore and file type matchers for use cases
8that demand more fine-grained control.
9
10# Example
11
12This example shows the most basic usage of this crate. This code will
13recursively traverse the current directory while automatically filtering out
14files and directories according to ignore globs found in files like
15`.ignore` and `.gitignore`:
16
17
18```rust,no_run
19use ignore::Walk;
20
21for result in Walk::new("./") {
22    // Each item yielded by the iterator is either a directory entry or an
23    // error, so either print the path or the error.
24    match result {
25        Ok(entry) => println!("{}", entry.path().display()),
26        Err(err) => println!("ERROR: {}", err),
27    }
28}
29```
30
31# Example: advanced
32
33By default, the recursive directory iterator will ignore hidden files and
34directories. This can be disabled by building the iterator with `WalkBuilder`:
35
36```rust,no_run
37use ignore::WalkBuilder;
38
39for result in WalkBuilder::new("./").hidden(false).build() {
40    println!("{:?}", result);
41}
42```
43
44See the documentation for `WalkBuilder` for many other options.
45*/
46
47use std::error;
48use std::fmt;
49use std::io;
50use std::path::{Path, PathBuf};
51
52pub use crate::dir::MatchMetadata;
53pub use crate::walk::{
54    DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder,
55    WalkParallel, WalkState,
56};
57
58mod default_types;
59mod dir;
60pub mod gitignore;
61pub mod overrides;
62mod pathutil;
63pub mod types;
64mod walk;
65
66/// Represents an error that can occur when parsing a gitignore file.
67#[derive(Debug)]
68pub enum Error {
69    /// A collection of "soft" errors. These occur when adding an ignore
70    /// file partially succeeded.
71    Partial(Vec<Error>),
72    /// An error associated with a specific line number.
73    WithLineNumber {
74        /// The line number.
75        line: u64,
76        /// The underlying error.
77        err: Box<Error>,
78    },
79    /// An error associated with a particular file path.
80    WithPath {
81        /// The file path.
82        path: PathBuf,
83        /// The underlying error.
84        err: Box<Error>,
85    },
86    /// An error associated with a particular directory depth when recursively
87    /// walking a directory.
88    WithDepth {
89        /// The directory depth.
90        depth: usize,
91        /// The underlying error.
92        err: Box<Error>,
93    },
94    /// An error that occurs when a file loop is detected when traversing
95    /// symbolic links.
96    Loop {
97        /// The ancestor file path in the loop.
98        ancestor: PathBuf,
99        /// The child file path in the loop.
100        child: PathBuf,
101    },
102    /// An error that occurs when doing I/O, such as reading an ignore file.
103    Io(io::Error),
104    /// An error that occurs when trying to parse a glob.
105    Glob {
106        /// The original glob that caused this error. This glob, when
107        /// available, always corresponds to the glob provided by an end user.
108        /// e.g., It is the glob as written in a `.gitignore` file.
109        ///
110        /// (This glob may be distinct from the glob that is actually
111        /// compiled, after accounting for `gitignore` semantics.)
112        glob: Option<String>,
113        /// The underlying glob error as a string.
114        err: String,
115    },
116    /// A type selection for a file type that is not defined.
117    UnrecognizedFileType(String),
118    /// A user specified file type definition could not be parsed.
119    InvalidDefinition,
120}
121
122impl Clone for Error {
123    fn clone(&self) -> Error {
124        match *self {
125            Error::Partial(ref errs) => Error::Partial(errs.clone()),
126            Error::WithLineNumber { line, ref err } => {
127                Error::WithLineNumber { line: line, err: err.clone() }
128            }
129            Error::WithPath { ref path, ref err } => {
130                Error::WithPath { path: path.clone(), err: err.clone() }
131            }
132            Error::WithDepth { depth, ref err } => {
133                Error::WithDepth { depth: depth, err: err.clone() }
134            }
135            Error::Loop { ref ancestor, ref child } => Error::Loop {
136                ancestor: ancestor.clone(),
137                child: child.clone(),
138            },
139            Error::Io(ref err) => match err.raw_os_error() {
140                Some(e) => Error::Io(io::Error::from_raw_os_error(e)),
141                None => Error::Io(io::Error::new(err.kind(), err.to_string())),
142            },
143            Error::Glob { ref glob, ref err } => {
144                Error::Glob { glob: glob.clone(), err: err.clone() }
145            }
146            Error::UnrecognizedFileType(ref err) => {
147                Error::UnrecognizedFileType(err.clone())
148            }
149            Error::InvalidDefinition => Error::InvalidDefinition,
150        }
151    }
152}
153
154impl Error {
155    /// Returns true if this is a partial error.
156    ///
157    /// A partial error occurs when only some operations failed while others
158    /// may have succeeded. For example, an ignore file may contain an invalid
159    /// glob among otherwise valid globs.
160    pub fn is_partial(&self) -> bool {
161        match *self {
162            Error::Partial(_) => true,
163            Error::WithLineNumber { ref err, .. } => err.is_partial(),
164            Error::WithPath { ref err, .. } => err.is_partial(),
165            Error::WithDepth { ref err, .. } => err.is_partial(),
166            _ => false,
167        }
168    }
169
170    /// Returns true if this error is exclusively an I/O error.
171    pub fn is_io(&self) -> bool {
172        match *self {
173            Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(),
174            Error::WithLineNumber { ref err, .. } => err.is_io(),
175            Error::WithPath { ref err, .. } => err.is_io(),
176            Error::WithDepth { ref err, .. } => err.is_io(),
177            Error::Loop { .. } => false,
178            Error::Io(_) => true,
179            Error::Glob { .. } => false,
180            Error::UnrecognizedFileType(_) => false,
181            Error::InvalidDefinition => false,
182        }
183    }
184
185    /// Inspect the original [`io::Error`] if there is one.
186    ///
187    /// [`None`] is returned if the [`Error`] doesn't correspond to an
188    /// [`io::Error`]. This might happen, for example, when the error was
189    /// produced because a cycle was found in the directory tree while
190    /// following symbolic links.
191    ///
192    /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To
193    /// obtain an owned value, the [`into_io_error`] can be used instead.
194    ///
195    /// > This is the original [`io::Error`] and is _not_ the same as
196    /// > [`impl From<Error> for std::io::Error`][impl] which contains additional context about the
197    /// error.
198    ///
199    /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None
200    /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
201    /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html
202    /// [`Error`]: struct.Error.html
203    /// [`into_io_error`]: struct.Error.html#method.into_io_error
204    /// [impl]: struct.Error.html#impl-From%3CError%3E
205    pub fn io_error(&self) -> Option<&std::io::Error> {
206        match *self {
207            Error::Partial(ref errs) => {
208                if errs.len() == 1 {
209                    errs[0].io_error()
210                } else {
211                    None
212                }
213            }
214            Error::WithLineNumber { ref err, .. } => err.io_error(),
215            Error::WithPath { ref err, .. } => err.io_error(),
216            Error::WithDepth { ref err, .. } => err.io_error(),
217            Error::Loop { .. } => None,
218            Error::Io(ref err) => Some(err),
219            Error::Glob { .. } => None,
220            Error::UnrecognizedFileType(_) => None,
221            Error::InvalidDefinition => None,
222        }
223    }
224
225    /// Similar to [`io_error`] except consumes self to convert to the original
226    /// [`io::Error`] if one exists.
227    ///
228    /// [`io_error`]: struct.Error.html#method.io_error
229    /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
230    pub fn into_io_error(self) -> Option<std::io::Error> {
231        match self {
232            Error::Partial(mut errs) => {
233                if errs.len() == 1 {
234                    errs.remove(0).into_io_error()
235                } else {
236                    None
237                }
238            }
239            Error::WithLineNumber { err, .. } => err.into_io_error(),
240            Error::WithPath { err, .. } => err.into_io_error(),
241            Error::WithDepth { err, .. } => err.into_io_error(),
242            Error::Loop { .. } => None,
243            Error::Io(err) => Some(err),
244            Error::Glob { .. } => None,
245            Error::UnrecognizedFileType(_) => None,
246            Error::InvalidDefinition => None,
247        }
248    }
249
250    /// Returns a depth associated with recursively walking a directory (if
251    /// this error was generated from a recursive directory iterator).
252    pub fn depth(&self) -> Option<usize> {
253        match *self {
254            Error::WithPath { ref err, .. } => err.depth(),
255            Error::WithDepth { depth, .. } => Some(depth),
256            _ => None,
257        }
258    }
259
260    /// Turn an error into a tagged error with the given file path.
261    fn with_path<P: AsRef<Path>>(self, path: P) -> Error {
262        Error::WithPath {
263            path: path.as_ref().to_path_buf(),
264            err: Box::new(self),
265        }
266    }
267
268    /// Turn an error into a tagged error with the given depth.
269    fn with_depth(self, depth: usize) -> Error {
270        Error::WithDepth { depth: depth, err: Box::new(self) }
271    }
272
273    /// Turn an error into a tagged error with the given file path and line
274    /// number. If path is empty, then it is omitted from the error.
275    fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error {
276        let errline =
277            Error::WithLineNumber { line: lineno, err: Box::new(self) };
278        if path.as_ref().as_os_str().is_empty() {
279            return errline;
280        }
281        errline.with_path(path)
282    }
283
284    /// Build an error from a walkdir error.
285    fn from_walkdir(err: walkdir::Error) -> Error {
286        let depth = err.depth();
287        if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) {
288            return Error::WithDepth {
289                depth: depth,
290                err: Box::new(Error::Loop {
291                    ancestor: anc.to_path_buf(),
292                    child: child.to_path_buf(),
293                }),
294            };
295        }
296        let path = err.path().map(|p| p.to_path_buf());
297        let mut ig_err = Error::Io(io::Error::from(err));
298        if let Some(path) = path {
299            ig_err = Error::WithPath { path: path, err: Box::new(ig_err) };
300        }
301        ig_err
302    }
303}
304
305impl error::Error for Error {
306    #[allow(deprecated)]
307    fn description(&self) -> &str {
308        match *self {
309            Error::Partial(_) => "partial error",
310            Error::WithLineNumber { ref err, .. } => err.description(),
311            Error::WithPath { ref err, .. } => err.description(),
312            Error::WithDepth { ref err, .. } => err.description(),
313            Error::Loop { .. } => "file system loop found",
314            Error::Io(ref err) => err.description(),
315            Error::Glob { ref err, .. } => err,
316            Error::UnrecognizedFileType(_) => "unrecognized file type",
317            Error::InvalidDefinition => "invalid definition",
318        }
319    }
320}
321
322impl fmt::Display for Error {
323    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
324        match *self {
325            Error::Partial(ref errs) => {
326                let msgs: Vec<String> =
327                    errs.iter().map(|err| err.to_string()).collect();
328                write!(f, "{}", msgs.join("\n"))
329            }
330            Error::WithLineNumber { line, ref err } => {
331                write!(f, "line {}: {}", line, err)
332            }
333            Error::WithPath { ref path, ref err } => {
334                write!(f, "{}: {}", path.display(), err)
335            }
336            Error::WithDepth { ref err, .. } => err.fmt(f),
337            Error::Loop { ref ancestor, ref child } => write!(
338                f,
339                "File system loop found: \
340                           {} points to an ancestor {}",
341                child.display(),
342                ancestor.display()
343            ),
344            Error::Io(ref err) => err.fmt(f),
345            Error::Glob { glob: None, ref err } => write!(f, "{}", err),
346            Error::Glob { glob: Some(ref glob), ref err } => {
347                write!(f, "error parsing glob '{}': {}", glob, err)
348            }
349            Error::UnrecognizedFileType(ref ty) => {
350                write!(f, "unrecognized file type: {}", ty)
351            }
352            Error::InvalidDefinition => write!(
353                f,
354                "invalid definition (format is type:glob, e.g., \
355                           html:*.html)"
356            ),
357        }
358    }
359}
360
361impl From<io::Error> for Error {
362    fn from(err: io::Error) -> Error {
363        Error::Io(err)
364    }
365}
366
367#[derive(Debug, Default)]
368struct PartialErrorBuilder(Vec<Error>);
369
370impl PartialErrorBuilder {
371    fn push(&mut self, err: Error) {
372        self.0.push(err);
373    }
374
375    fn push_ignore_io(&mut self, err: Error) {
376        if !err.is_io() {
377            self.push(err);
378        }
379    }
380
381    fn maybe_push(&mut self, err: Option<Error>) {
382        if let Some(err) = err {
383            self.push(err);
384        }
385    }
386
387    fn maybe_push_ignore_io(&mut self, err: Option<Error>) {
388        if let Some(err) = err {
389            self.push_ignore_io(err);
390        }
391    }
392
393    fn into_error_option(mut self) -> Option<Error> {
394        if self.0.is_empty() {
395            None
396        } else if self.0.len() == 1 {
397            Some(self.0.pop().unwrap())
398        } else {
399            Some(Error::Partial(self.0))
400        }
401    }
402}
403
404/// The result of a glob match.
405///
406/// The type parameter `T` typically refers to a type that provides more
407/// information about a particular match. For example, it might identify
408/// the specific gitignore file and the specific glob pattern that caused
409/// the match.
410#[derive(Clone, Debug)]
411pub enum Match<T> {
412    /// The path didn't match any glob.
413    None,
414    /// The highest precedent glob matched indicates the path should be
415    /// ignored.
416    Ignore(T),
417    /// The highest precedent glob matched indicates the path should be
418    /// whitelisted.
419    Whitelist(T),
420}
421
422impl<T> Match<T> {
423    /// Returns true if the match result didn't match any globs.
424    pub fn is_none(&self) -> bool {
425        match *self {
426            Match::None => true,
427            Match::Ignore(_) | Match::Whitelist(_) => false,
428        }
429    }
430
431    /// Returns true if the match result implies the path should be ignored.
432    pub fn is_ignore(&self) -> bool {
433        match *self {
434            Match::Ignore(_) => true,
435            Match::None | Match::Whitelist(_) => false,
436        }
437    }
438
439    /// Returns true if the match result implies the path should be
440    /// whitelisted.
441    pub fn is_whitelist(&self) -> bool {
442        match *self {
443            Match::Whitelist(_) => true,
444            Match::None | Match::Ignore(_) => false,
445        }
446    }
447
448    /// Inverts the match so that `Ignore` becomes `Whitelist` and
449    /// `Whitelist` becomes `Ignore`. A non-match remains the same.
450    pub fn invert(self) -> Match<T> {
451        match self {
452            Match::None => Match::None,
453            Match::Ignore(t) => Match::Whitelist(t),
454            Match::Whitelist(t) => Match::Ignore(t),
455        }
456    }
457
458    /// Return the value inside this match if it exists.
459    pub fn inner(&self) -> Option<&T> {
460        match *self {
461            Match::None => None,
462            Match::Ignore(ref t) => Some(t),
463            Match::Whitelist(ref t) => Some(t),
464        }
465    }
466
467    /// Apply the given function to the value inside this match.
468    ///
469    /// If the match has no value, then return the match unchanged.
470    pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U> {
471        match self {
472            Match::None => Match::None,
473            Match::Ignore(t) => Match::Ignore(f(t)),
474            Match::Whitelist(t) => Match::Whitelist(f(t)),
475        }
476    }
477
478    /// Return the match if it is not none. Otherwise, return other.
479    pub fn or(self, other: Self) -> Self {
480        if self.is_none() {
481            other
482        } else {
483            self
484        }
485    }
486}
487
488#[cfg(test)]
489mod tests {
490    use std::env;
491    use std::error;
492    use std::fs;
493    use std::path::{Path, PathBuf};
494    use std::result;
495
496    /// A convenient result type alias.
497    pub type Result<T> =
498        result::Result<T, Box<dyn error::Error + Send + Sync>>;
499
500    macro_rules! err {
501        ($($tt:tt)*) => {
502            Box::<dyn error::Error + Send + Sync>::from(format!($($tt)*))
503        }
504    }
505
506    /// A simple wrapper for creating a temporary directory that is
507    /// automatically deleted when it's dropped.
508    ///
509    /// We use this in lieu of tempfile because tempfile brings in too many
510    /// dependencies.
511    #[derive(Debug)]
512    pub struct TempDir(PathBuf);
513
514    impl Drop for TempDir {
515        fn drop(&mut self) {
516            fs::remove_dir_all(&self.0).unwrap();
517        }
518    }
519
520    impl TempDir {
521        /// Create a new empty temporary directory under the system's configured
522        /// temporary directory.
523        pub fn new() -> Result<TempDir> {
524            use std::sync::atomic::{AtomicUsize, Ordering};
525
526            static TRIES: usize = 100;
527            static COUNTER: AtomicUsize = AtomicUsize::new(0);
528
529            let tmpdir = env::temp_dir();
530            for _ in 0..TRIES {
531                let count = COUNTER.fetch_add(1, Ordering::SeqCst);
532                let path = tmpdir.join("rust-ignore").join(count.to_string());
533                if path.is_dir() {
534                    continue;
535                }
536                fs::create_dir_all(&path).map_err(|e| {
537                    err!("failed to create {}: {}", path.display(), e)
538                })?;
539                return Ok(TempDir(path));
540            }
541            Err(err!("failed to create temp dir after {} tries", TRIES))
542        }
543
544        /// Return the underlying path to this temporary directory.
545        pub fn path(&self) -> &Path {
546            &self.0
547        }
548    }
549}