capturing_glob/
lib.rs

1// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Support for matching file paths against Unix shell style patterns.
12//!
13//! The `glob` and `glob_with` functions allow querying the filesystem for all
14//! files that match a particular pattern (similar to the libc `glob` function).
15//! The methods on the `Pattern` type provide functionality for checking if
16//! individual paths match a particular pattern (similar to the libc `fnmatch`
17//! function).
18//!
19//! For consistency across platforms, and for Windows support, this module
20//! is implemented entirely in Rust rather than deferring to the libc
21//! `glob`/`fnmatch` functions.
22//!
23//! # Examples
24//!
25//! To print all jpg files in `/media/` and all of its subdirectories,
26//! extracting stem and a directory name while matching.
27//!
28//! ```rust,no_run
29//! use capturing_glob::glob;
30//!
31//! for entry in glob("/media/(**/*).jpg").expect("Failed to read glob pattern") {
32//!     match entry {
33//!         Ok(entry) => {
34//!             println!("{:?} -> {:?}", entry.path().display(),
35//!                 entry.group(1).unwrap());
36//!         }
37//!         Err(e) => eprintln!("{:?}", e),
38//!     }
39//! }
40//! ```
41//!
42//! To print all files containing the letter "a", case insensitive, in a `local`
43//! directory relative to the current working directory. This ignores errors
44//! instead of printing them.
45//!
46//! ```rust,no_run
47//! use capturing_glob::glob_with;
48//! use capturing_glob::MatchOptions;
49//!
50//! let options = MatchOptions {
51//!     case_sensitive: false,
52//!     require_literal_separator: false,
53//!     require_literal_leading_dot: false,
54//! };
55//! for entry in glob_with("local/*a*", &options).unwrap() {
56//!     if let Ok(entry) = entry {
57//!         println!("{:?}", entry.path().display())
58//!     }
59//! }
60//! ```
61//!
62//! # Substitute Names
63//!
64//! Reverse conversion where you have a name and pattern and want to get
65//! a full path is also possible:
66//!
67//! ```rust
68//! # use std::error::Error;
69//! use capturing_glob::Pattern;
70//!
71//! # fn run() -> Result<(), Box<Error>> {
72//! assert_eq!(Pattern::new("images/(*).jpg")?.substitute(&["cat"])?,
73//!            "images/cat.jpg");
74//! assert_eq!(Pattern::new("images/(*.jpg)")?.substitute(&["cat.jpg"])?,
75//!            "images/cat.jpg");
76//! # Ok(())
77//! # }
78//! # fn main() { run().unwrap() }
79//! ```
80//!
81//! Note: we don't check substituted pattern. So the following is possible:
82//!
83//! ```rust
84//! # use std::error::Error;
85//! use capturing_glob::Pattern;
86//!
87//! # fn run() -> Result<(), Box<Error>> {
88//! let pattern = Pattern::new("images/(*.jpg)")?;
89//! assert_eq!(pattern.substitute(&["cat.png"])?, "images/cat.png");
90//! assert!(!pattern.matches(&pattern.substitute(&["cat.png"])?));
91//! # Ok(())
92//! # }
93//! # fn main() { run().unwrap() }
94//! ```
95//!
96
97#![deny(missing_docs)]
98#![deny(missing_debug_implementations)]
99#![cfg_attr(all(test, windows), feature(std_misc))]
100
101mod entry;
102
103pub use entry::Entry;
104
105use std::ascii::AsciiExt;
106use std::cmp;
107use std::fmt;
108use std::fs;
109use std::io;
110use std::path::{self, Path, PathBuf, Component};
111use std::str::FromStr;
112use std::error::Error;
113
114use CharSpecifier::{SingleChar, CharRange};
115use MatchResult::{Match, SubPatternDoesntMatch, EntirePatternDoesntMatch};
116
117/// An iterator that yields Entry'ies that match a particular pattern.
118///
119/// Each entry conains matching filename and also capture groups.
120///
121/// Note that it yields `GlobResult` in order to report any `IoErrors` that may
122/// arise during iteration. If a directory matches but is unreadable,
123/// thereby preventing its contents from being checked for matches, a
124/// `GlobError` is returned to express this.
125///
126/// See the `glob` function for more details.
127#[derive(Debug)]
128pub struct Entries {
129    whole_pattern: Pattern,
130    dir_patterns: Vec<Pattern>,
131    require_dir: bool,
132    options: MatchOptions,
133    todo: Vec<Result<(PathBuf, usize), GlobError>>,
134    scope: Option<PathBuf>,
135}
136
137/// Return an iterator that produces all the paths and capture groups that
138/// match the given pattern using default match options, which may be absolute
139/// or relative to the current working directory.
140///
141/// This may return an error if the pattern is invalid.
142///
143/// This method uses the default match options and is equivalent to calling
144/// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you
145/// want to use non-default match options.
146///
147/// When iterating, each result is a `GlobResult` which expresses the
148/// possibility that there was an `IoError` when attempting to read the contents
149/// of the matched path.  In other words, each item returned by the iterator
150/// will either be an `Ok(Path)` if the path matched, or an `Err(GlobError)` if
151/// the path (partially) matched _but_ its contents could not be read in order
152/// to determine if its contents matched.
153///
154/// See the `Entries` documentation for more information.
155///
156/// # Examples
157///
158/// Consider a directory `/media/pictures` containing only the files
159/// `kittens.jpg`, `puppies.jpg` and `hamsters.gif`:
160///
161/// ```rust,no_run
162/// use capturing_glob::glob;
163///
164/// for entry in glob("/media/pictures/(*).jpg").unwrap() {
165///     match entry {
166///         Ok(entry) => {
167///             println!("{:?} -> {:?}",
168///                 entry.path().display(),
169///                 entry.group(1).unwrap());
170///         }
171///
172///         // if the path matched but was unreadable,
173///         // thereby preventing its contents from matching
174///         Err(e) => println!("{:?}", e),
175///     }
176/// }
177/// ```
178///
179/// The above code will print:
180///
181/// ```ignore
182/// /media/pictures/kittens.jpg -> kittens
183/// /media/pictures/puppies.jpg -> puppies
184/// ```
185///
186/// If you want to ignore unreadable paths, you can use something like
187/// `filter_map`:
188///
189/// ```rust
190/// use capturing_glob::glob;
191/// use std::result::Result;
192///
193/// for entry in glob("/media/pictures/*.jpg").unwrap().filter_map(Result::ok) {
194///     println!("{}", entry.path().display());
195/// }
196/// ```
197/// Entries are yielded in alphabetical order.
198pub fn glob(pattern: &str) -> Result<Entries, PatternError> {
199    glob_with(pattern, &MatchOptions::new())
200}
201
202/// Return an iterator that produces all the paths with capture groups that
203/// match the given pattern using the specified match options, which may be
204/// absolute or relative to the current working directory.
205///
206/// This may return an error if the pattern is invalid.
207///
208/// This function accepts Unix shell style patterns as described by
209/// `Pattern::new(..)`.  The options given are passed through unchanged to
210/// `Pattern::matches_with(..)` with the exception that
211/// `require_literal_separator` is always set to `true` regardless of the value
212/// passed to this function.
213///
214/// Entries are yielded in alphabetical order.
215pub fn glob_with(pattern: &str, options: &MatchOptions)
216                 -> Result<Entries, PatternError> {
217    let last_is_separator = pattern.chars().next_back().map(path::is_separator);
218    let require_dir = last_is_separator == Some(true);
219
220    let mut txt = pattern;
221    if require_dir {
222        // Need to strip last slash.
223        // I.e. pattern `*/` means we match a directory,
224        // but the real path of a directory is `something` (without slash)
225        txt = &txt[..pattern.len()-1];
226    };
227    if txt.starts_with(".") &&
228        txt[1..].chars().next().map(path::is_separator) == Some(true)
229    {
230        // Similarly a pattern `./*` means we match at current path
231        // but the real path is `something` without dotslash
232        txt = &txt[2..];
233    }
234    // TODO(tailhook) This may mess up error offsets
235    let compiled = Pattern::new(txt)?;
236
237    #[cfg(windows)]
238    fn check_windows_verbatim(p: &Path) -> bool {
239        use std::path::Prefix;
240        match p.components().next() {
241            Some(Component::Prefix(ref p)) => p.kind().is_verbatim(),
242            _ => false,
243        }
244    }
245    #[cfg(not(windows))]
246    fn check_windows_verbatim(_: &Path) -> bool {
247        false
248    }
249
250    #[cfg(windows)]
251    fn to_scope(p: &Path) -> PathBuf {
252        // FIXME handle volume relative paths here
253        p.to_path_buf()
254    }
255    #[cfg(not(windows))]
256    fn to_scope(p: &Path) -> PathBuf {
257        p.to_path_buf()
258    }
259
260    let mut components = Path::new(pattern).components().peekable();
261    loop {
262        match components.peek() {
263            Some(&Component::Prefix(..)) |
264            Some(&Component::RootDir) => {
265                components.next();
266            }
267            _ => break,
268        }
269    }
270    let rest = components.map(|s| s.as_os_str()).collect::<PathBuf>();
271    let normalized_pattern = Path::new(pattern).iter().collect::<PathBuf>();
272    let root_len = normalized_pattern.to_str().unwrap().len() - rest.to_str().unwrap().len();
273    let root = if root_len > 0 {
274        Some(Path::new(&pattern[..root_len]))
275    } else {
276        None
277    };
278
279    if root_len > 0 && check_windows_verbatim(root.unwrap()) {
280        // FIXME: How do we want to handle verbatim paths? I'm inclined to
281        // return nothing, since we can't very well find all UNC shares with a
282        // 1-letter server name.
283        return Ok(Entries {
284            dir_patterns: Vec::new(),
285            whole_pattern: compiled,
286            require_dir: false,
287            options: options.clone(),
288            todo: Vec::new(),
289            scope: None,
290        });
291    }
292
293    let scope = root.map(to_scope).unwrap_or_else(|| PathBuf::from("."));
294
295    let mut dir_patterns = Vec::new();
296    let components = pattern[cmp::min(root_len, pattern.len())..]
297                         .split_terminator(path::is_separator);
298
299    for component in components {
300        let compiled = Pattern::new_options(component, true)?;
301        dir_patterns.push(compiled);
302    }
303
304    if root_len == pattern.len() {
305        dir_patterns.push(Pattern {
306            original: "".to_string(),
307            tokens: Vec::new(),
308            is_recursive: false,
309        });
310    }
311
312    let todo = Vec::new();
313
314    Ok(Entries {
315        dir_patterns: dir_patterns,
316        whole_pattern: compiled,
317        require_dir: require_dir,
318        options: options.clone(),
319        todo: todo,
320        scope: Some(scope),
321    })
322}
323
324/// A glob iteration error.
325///
326/// This is typically returned when a particular path cannot be read
327/// to determine if its contents match the glob pattern. This is possible
328/// if the program lacks the appropriate permissions, for example.
329#[derive(Debug)]
330pub struct GlobError {
331    path: PathBuf,
332    error: io::Error,
333}
334
335impl GlobError {
336    /// The Path that the error corresponds to.
337    pub fn path(&self) -> &Path {
338        &self.path
339    }
340
341    /// The error in question.
342    pub fn error(&self) -> &io::Error {
343        &self.error
344    }
345}
346
347impl Error for GlobError {
348    fn description(&self) -> &str {
349        self.error.description()
350    }
351    fn cause(&self) -> Option<&Error> {
352        Some(&self.error)
353    }
354}
355
356impl fmt::Display for GlobError {
357    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
358        write!(f,
359               "attempting to read `{}` resulted in an error: {}",
360               self.path.display(),
361               self.error)
362    }
363}
364
365fn is_dir(p: &Path) -> bool {
366    fs::metadata(p).map(|m| m.is_dir()).unwrap_or(false)
367}
368
369/// An alias for a glob iteration result.
370///
371/// This represents either a matched path or a glob iteration error,
372/// such as failing to read a particular directory's contents.
373pub type GlobResult = Result<Entry, GlobError>;
374
375impl Iterator for Entries {
376    type Item = GlobResult;
377
378    fn next(&mut self) -> Option<GlobResult> {
379        // the todo buffer hasn't been initialized yet, so it's done at this
380        // point rather than in glob() so that the errors are unified that is,
381        // failing to fill the buffer is an iteration error construction of the
382        // iterator (i.e. glob()) only fails if it fails to compile the Pattern
383        if let Some(scope) = self.scope.take() {
384            if self.dir_patterns.len() > 0 {
385                // Shouldn't happen, but we're using -1 as a special index.
386                assert!(self.dir_patterns.len() < !0 as usize);
387
388                fill_todo(&mut self.todo,
389                          &self.dir_patterns,
390                          0,
391                          &scope,
392                          &self.options);
393            }
394        }
395
396        loop {
397            if self.dir_patterns.is_empty() || self.todo.is_empty() {
398                return None;
399            }
400
401            let (path, mut idx) = match self.todo.pop().unwrap() {
402                Ok(pair) => pair,
403                Err(e) => return Some(Err(e)),
404            };
405
406            // idx -1: was already checked by fill_todo, maybe path was '.' or
407            // '..' that we can't match here because of normalization.
408            if idx == !0 as usize {
409                if self.require_dir && !is_dir(&path) {
410                    continue;
411                }
412                return Some(Ok(Entry::new(path)));
413            }
414
415            if self.dir_patterns[idx].is_recursive {
416                let mut next = idx;
417
418                // collapse consecutive recursive patterns
419                while (next + 1) < self.dir_patterns.len() &&
420                      self.dir_patterns[next + 1].is_recursive {
421                    next += 1;
422                }
423
424                if is_dir(&path) {
425                    // the path is a directory, so it's a match
426
427                    // push this directory's contents
428                    fill_todo(&mut self.todo,
429                              &self.dir_patterns,
430                              next,
431                              &path,
432                              &self.options);
433
434                    if next == self.dir_patterns.len() - 1 {
435                        // pattern ends in recursive pattern, so return this
436                        // directory as a result
437                        return Some(Ok(Entry::new(path)));
438                    } else {
439                        // advanced to the next pattern for this path
440                        idx = next + 1;
441                    }
442                } else if next != self.dir_patterns.len() - 1 {
443                    // advanced to the next pattern for this path
444                    idx = next + 1;
445                } else {
446                    // not a directory and it's the last pattern, meaning no
447                    // match
448                    continue;
449                }
450            }
451
452            // not recursive, so match normally
453            if self.dir_patterns[idx].matches_with({
454                match path.file_name().and_then(|s| s.to_str()) {
455                    // FIXME (#9639): How do we handle non-utf8 filenames?
456                    // Ignore them for now; ideally we'd still match them
457                    // against a *
458                    None => continue,
459                    Some(x) => x
460                }
461            }, &self.options) {
462                if idx == self.dir_patterns.len() - 1 {
463                    // it is not possible for a pattern to match a directory
464                    // *AND* its children so we don't need to check the
465                    // children
466
467                    if !self.require_dir || is_dir(&path) {
468                        let entry = self.whole_pattern
469                            .captures_path_with(&path, &self.options)
470                            .expect("dir patterns consistent with whole pat");
471                        return Some(Ok(entry));
472                    }
473                } else {
474                    fill_todo(&mut self.todo, &self.dir_patterns,
475                              idx + 1, &path, &self.options);
476                }
477            }
478        }
479    }
480}
481
482/// A pattern parsing error.
483#[derive(Debug)]
484#[allow(missing_copy_implementations)]
485pub struct PatternError {
486    /// The approximate character index of where the error occurred.
487    pub pos: usize,
488
489    /// A message describing the error.
490    pub msg: &'static str,
491}
492
493impl Error for PatternError {
494    fn description(&self) -> &str {
495        self.msg
496    }
497}
498
499impl fmt::Display for PatternError {
500    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
501        write!(f,
502               "Pattern syntax error near position {}: {}",
503               self.pos,
504               self.msg)
505    }
506}
507
508/// A pattern substitution error
509#[derive(Debug)]
510#[allow(missing_copy_implementations)]
511pub enum SubstitutionError {
512    /// No value supplied for capture group
513    MissingGroup(usize),
514    /// Wildcard char `*?[..]` is outside of the capture group
515    UnexpectedWildcard,
516}
517
518impl Error for SubstitutionError {
519    fn description(&self) -> &str {
520        "substitution error"
521    }
522}
523
524impl fmt::Display for SubstitutionError {
525    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
526        use self::SubstitutionError::*;
527        match *self {
528            MissingGroup(g) => {
529                write!(f, "substitution error: missing group {}", g)
530            }
531            UnexpectedWildcard => {
532                write!(f, "unexpected wildcard")
533            }
534        }
535    }
536}
537
538/// A compiled Unix shell style pattern.
539///
540/// - `?` matches any single character.
541///
542/// - `*` matches any (possibly empty) sequence of characters.
543///
544/// - `**` matches the current directory and arbitrary subdirectories. This
545///   sequence **must** form a single path component, so both `**a` and `b**`
546///   are invalid and will result in an error.  A sequence of more than two
547///   consecutive `*` characters is also invalid.
548///
549/// - `[...]` matches any character inside the brackets.  Character sequences
550///   can also specify ranges of characters, as ordered by Unicode, so e.g.
551///   `[0-9]` specifies any character between 0 and 9 inclusive. An unclosed
552///   bracket is invalid.
553///
554/// - `[!...]` is the negation of `[...]`, i.e. it matches any characters
555///   **not** in the brackets.
556///
557/// - The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets
558///   (e.g. `[?]`).  When a `]` occurs immediately following `[` or `[!` then it
559///   is interpreted as being part of, rather then ending, the character set, so
560///   `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively.  The `-`
561///   character can be specified inside a character sequence pattern by placing
562///   it at the start or the end, e.g. `[abc-]`.
563#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
564pub struct Pattern {
565    original: String,
566    tokens: Vec<PatternToken>,
567    is_recursive: bool,
568}
569
570/// Show the original glob pattern.
571impl fmt::Display for Pattern {
572    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
573        self.original.fmt(f)
574    }
575}
576
577impl FromStr for Pattern {
578    type Err = PatternError;
579
580    fn from_str(s: &str) -> Result<Pattern, PatternError> {
581        Pattern::new(s)
582    }
583}
584
585#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
586enum PatternToken {
587    Char(char),
588    AnyChar,
589    AnySequence,
590    AnyRecursiveSequence,
591    AnyWithin(Vec<CharSpecifier>),
592    AnyExcept(Vec<CharSpecifier>),
593    StartCapture(usize, bool),
594    EndCapture(usize, bool),
595}
596
597#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
598enum CharSpecifier {
599    SingleChar(char),
600    CharRange(char, char),
601}
602
603#[derive(Copy, Clone, PartialEq)]
604enum MatchResult {
605    Match,
606    SubPatternDoesntMatch,
607    EntirePatternDoesntMatch,
608}
609
610#[derive(Clone, PartialEq)]
611enum CaptureResult {
612    Match(()),
613    SubPatternDoesntMatch,
614    EntirePatternDoesntMatch,
615}
616
617const ERROR_WILDCARDS: &'static str = "wildcards are either regular `*` or recursive `**`";
618const ERROR_RECURSIVE_WILDCARDS: &'static str = "recursive wildcards must form a single path \
619                                                 component";
620const ERROR_INVALID_RANGE: &'static str = "invalid range pattern";
621
622fn ends_with_sep(s: &[char]) -> bool {
623    for &c in s.iter().rev() {
624        if c == '(' || c == ')' {
625            continue;
626        } else if path::is_separator(c) {
627            return true;
628        } else {
629            return false;
630        }
631    }
632    return true;
633}
634
635impl Pattern {
636    /// This function compiles Unix shell style patterns.
637    ///
638    /// An invalid glob pattern will yield a `PatternError`.
639    pub fn new(pattern: &str) -> Result<Pattern, PatternError> {
640        Pattern::new_options(pattern, false)
641    }
642    /// The `skip_groups` of `true` is needed to compile partial patterns in
643    /// glob directory scanner
644    fn new_options(pattern: &str, skip_groups: bool)
645        -> Result<Pattern, PatternError>
646    {
647        use self::PatternToken::*;
648
649        let chars = pattern.chars().collect::<Vec<_>>();
650        let mut tokens = Vec::new();
651        let mut is_recursive = false;
652        let mut i = 0;
653        let mut last_capture = 0;
654        let mut captures_stack = Vec::new();
655
656        while i < chars.len() {
657            match chars[i] {
658                '?' => {
659                    tokens.push(AnyChar);
660                    i += 1;
661                }
662                '*' => {
663                    let old = i;
664
665                    while i < chars.len() && chars[i] == '*' {
666                        i += 1;
667                    }
668
669                    let count = i - old;
670
671                    if count > 2 {
672                        return Err(PatternError {
673                            pos: old + 2,
674                            msg: ERROR_WILDCARDS,
675                        });
676                    } else if count == 2 {
677                        // collapse consecutive AnyRecursiveSequence to a
678                        // single one
679                        let tokens_len = tokens.len();
680                        if !(tokens_len > 1 && tokens[tokens_len - 1] == AnyRecursiveSequence) {
681                            is_recursive = true;
682                            tokens.push(AnyRecursiveSequence);
683                        }
684                        // ** can only be an entire path component
685                        // i.e. a/**/b is valid, but a**/b or a/**b is not
686                        // invalid matches are treated literally
687                        if ends_with_sep(&chars[..i - count]) {
688                            // it ends in a '/' sans parenthesis
689                            while i < chars.len() &&
690                                (chars[i] == '(' || chars[i] == ')')
691                            {
692                                if !skip_groups {
693                                    if chars[i] == '(' {
694                                        captures_stack.push((last_capture, i));
695                                        tokens.push(StartCapture(last_capture, true));
696                                        last_capture += 1;
697                                    } else if chars[i] == ')' {
698                                        if let Some((c, _)) = captures_stack.pop()
699                                        {
700                                            tokens.push(EndCapture(c, true));
701                                        } else {
702                                            return Err(PatternError {
703                                                pos: i,
704                                                msg: "Unmatched closing paren",
705                                            });
706                                        }
707                                    }
708                                }
709                                i += 1;
710                            }
711                            if i < chars.len() && path::is_separator(chars[i]) {
712                                i += 1;
713                                // or the pattern ends here
714                                // this enables the existing globbing mechanism
715                            } else if i == chars.len() {
716                                // `**` ends in non-separator
717                            } else {
718                                return Err(PatternError {
719                                    pos: i,
720                                    msg: ERROR_RECURSIVE_WILDCARDS,
721                                });
722                            }
723                            // `**` begins with non-separator
724                        } else {
725                            return Err(PatternError {
726                                pos: old - 1,
727                                msg: ERROR_RECURSIVE_WILDCARDS,
728                            });
729                        }
730                    } else {
731                        tokens.push(AnySequence);
732                    }
733                }
734                '[' => {
735
736                    if i + 4 <= chars.len() && chars[i + 1] == '!' {
737                        match chars[i + 3..].iter().position(|x| *x == ']') {
738                            None => (),
739                            Some(j) => {
740                                let chars = &chars[i + 2..i + 3 + j];
741                                let cs = parse_char_specifiers(chars);
742                                tokens.push(AnyExcept(cs));
743                                i += j + 4;
744                                continue;
745                            }
746                        }
747                    } else if i + 3 <= chars.len() && chars[i + 1] != '!' {
748                        match chars[i + 2..].iter().position(|x| *x == ']') {
749                            None => (),
750                            Some(j) => {
751                                let cs = parse_char_specifiers(&chars[i + 1..i + 2 + j]);
752                                tokens.push(AnyWithin(cs));
753                                i += j + 3;
754                                continue;
755                            }
756                        }
757                    }
758
759                    // if we get here then this is not a valid range pattern
760                    return Err(PatternError {
761                        pos: i,
762                        msg: ERROR_INVALID_RANGE,
763                    });
764                }
765                '(' => {
766                    if !skip_groups {
767                        captures_stack.push((last_capture, i));
768                        tokens.push(StartCapture(last_capture, false));
769                        last_capture += 1;
770                    }
771                    i += 1;
772                }
773                ')' => {
774                    if !skip_groups {
775                        if let Some((c, _)) = captures_stack.pop() {
776                            tokens.push(EndCapture(c, false));
777                        } else {
778                            return Err(PatternError {
779                                pos: i,
780                                msg: "Unmatched closing paren",
781                            });
782                        }
783                    }
784                    i += 1;
785                }
786                c => {
787                    tokens.push(Char(c));
788                    i += 1;
789                }
790            }
791        }
792
793        for (_, i) in captures_stack {
794            return Err(PatternError {
795                pos: i,
796                msg: "Unmatched opening paren",
797            })
798        }
799
800        Ok(Pattern {
801            tokens: tokens,
802            original: pattern.to_string(),
803            is_recursive: is_recursive,
804        })
805    }
806
807    /// Escape metacharacters within the given string by surrounding them in
808    /// brackets. The resulting string will, when compiled into a `Pattern`,
809    /// match the input string and nothing else.
810    pub fn escape(s: &str) -> String {
811        let mut escaped = String::new();
812        for c in s.chars() {
813            match c {
814                // note that ! does not need escaping because it is only special
815                // inside brackets
816                '?' | '*' | '[' | ']' => {
817                    escaped.push('[');
818                    escaped.push(c);
819                    escaped.push(']');
820                }
821                c => {
822                    escaped.push(c);
823                }
824            }
825        }
826        escaped
827    }
828
829    /// Return if the given `str` matches this `Pattern` using the default
830    /// match options (i.e. `MatchOptions::new()`).
831    ///
832    /// # Examples
833    ///
834    /// ```rust
835    /// use capturing_glob::Pattern;
836    ///
837    /// assert!(Pattern::new("c?t").unwrap().matches("cat"));
838    /// assert!(Pattern::new("k[!e]tteh").unwrap().matches("kitteh"));
839    /// assert!(Pattern::new("d*g").unwrap().matches("doog"));
840    /// ```
841    pub fn matches(&self, str: &str) -> bool {
842        self.matches_with(str, &MatchOptions::new())
843    }
844
845    /// Return if the given `Path`, when converted to a `str`, matches this
846    /// `Pattern` using the default match options (i.e. `MatchOptions::new()`).
847    pub fn matches_path(&self, path: &Path) -> bool {
848        // FIXME (#9639): This needs to handle non-utf8 paths
849        path.to_str().map_or(false, |s| self.matches(s))
850    }
851
852    /// Return if the given `str` matches this `Pattern` using the specified
853    /// match options.
854    pub fn matches_with(&self, str: &str, options: &MatchOptions) -> bool {
855        self.matches_from(true, str.chars(), 0, options) == Match
856    }
857
858    /// Return if the given `Path`, when converted to a `str`, matches this
859    /// `Pattern` using the specified match options.
860    pub fn matches_path_with(&self, path: &Path, options: &MatchOptions) -> bool {
861        // FIXME (#9639): This needs to handle non-utf8 paths
862        path.to_str().map_or(false, |s| self.matches_with(s, options))
863    }
864
865    /// Access the original glob pattern.
866    pub fn as_str<'a>(&'a self) -> &'a str {
867        &self.original
868    }
869
870    /// Return entry if filename matches pattern
871    ///
872    /// Then you can extract capture groups from entry
873    ///
874    /// # Examples
875    ///
876    /// ```rust
877    /// use capturing_glob::Pattern;
878    ///
879    /// assert_eq!(Pattern::new("(*).txt").unwrap()
880    ///     .captures("some.txt").unwrap()
881    ///     .group(1).unwrap(),
882    ///     "some");
883    /// ```
884    pub fn captures(&self, str: &str) -> Option<Entry> {
885        self.captures_with(str, &MatchOptions::new())
886    }
887
888    /// Return an entry if filename converted to str matches pattern
889    pub fn captures_path(&self, path: &Path)
890        -> Option<Entry>
891    {
892        self.captures_path_with(path, &MatchOptions::new())
893    }
894
895    /// Return an entry if filename converted to str matches pattern
896    pub fn captures_path_with(&self, path: &Path, options: &MatchOptions)
897        -> Option<Entry>
898    {
899        // FIXME (#9639): This needs to handle non-utf8 paths
900        path.to_str().map_or(None, |s| self.captures_with(s, options))
901    }
902
903    /// Return entry if filename matches pattern
904    pub fn captures_with(&self, str: &str, options: &MatchOptions)
905        -> Option<Entry>
906    {
907        use self::CaptureResult::Match;
908        let mut buf = Vec::new();
909        let iter = str.chars();
910        match self.captures_from(true, iter, 0, str, &mut buf, options) {
911            Match(()) => {
912                Some(Entry::with_captures(str, buf))
913            }
914            _ => None,
915        }
916    }
917
918    fn matches_from(&self,
919                    mut follows_separator: bool,
920                    mut file: std::str::Chars,
921                    i: usize,
922                    options: &MatchOptions)
923                    -> MatchResult
924    {
925        use self::PatternToken::*;
926
927        for (ti, token) in self.tokens[i..].iter().enumerate() {
928            match *token {
929                AnySequence | AnyRecursiveSequence => {
930                    // ** must be at the start.
931                    debug_assert!(match *token {
932                        AnyRecursiveSequence => follows_separator,
933                        _ => true,
934                    });
935
936                    // Empty match
937                    match self.matches_from(follows_separator, file.clone(), i + ti + 1, options) {
938                        SubPatternDoesntMatch => (), // keep trying
939                        m => return m,
940                    };
941
942                    while let Some(c) = file.next() {
943                        if follows_separator && options.require_literal_leading_dot && c == '.' {
944                            return SubPatternDoesntMatch;
945                        }
946                        follows_separator = path::is_separator(c);
947                        match *token {
948                            AnyRecursiveSequence if !follows_separator => continue,
949                            AnySequence if options.require_literal_separator &&
950                                           follows_separator => return SubPatternDoesntMatch,
951                            _ => (),
952                        }
953                        match self.matches_from(follows_separator,
954                                                file.clone(),
955                                                i + ti + 1,
956                                                options) {
957                            SubPatternDoesntMatch => (), // keep trying
958                            m => return m,
959                        }
960                    }
961                }
962                StartCapture(..) | EndCapture(..) => {}
963                _ => {
964                    let c = match file.next() {
965                        Some(c) => c,
966                        None => return EntirePatternDoesntMatch,
967                    };
968
969                    let is_sep = path::is_separator(c);
970
971                    if !match *token {
972                        AnyChar | AnyWithin(..) | AnyExcept(..)
973                            if (options.require_literal_separator && is_sep) ||
974                            (follows_separator && options.require_literal_leading_dot &&
975                             c == '.') => false,
976                        AnyChar => true,
977                        AnyWithin(ref specifiers) => in_char_specifiers(&specifiers, c, options),
978                        AnyExcept(ref specifiers) => !in_char_specifiers(&specifiers, c, options),
979                        Char(c2) => chars_eq(c, c2, options.case_sensitive),
980                        AnySequence | AnyRecursiveSequence => unreachable!(),
981                        StartCapture(..) | EndCapture(..) => unreachable!(),
982                    } {
983                        return SubPatternDoesntMatch;
984                    }
985                    follows_separator = is_sep;
986                }
987            }
988        }
989
990        // Iter is fused.
991        if file.next().is_none() {
992            Match
993        } else {
994            SubPatternDoesntMatch
995        }
996    }
997
998    fn captures_from(&self,
999                    mut follows_separator: bool,
1000                    mut file: std::str::Chars,
1001                    i: usize, fname: &str,
1002                    captures: &mut Vec<(usize, usize)>,
1003                    options: &MatchOptions)
1004        -> CaptureResult
1005    {
1006        use self::PatternToken::*;
1007        use self::CaptureResult::*;
1008
1009        for (ti, token) in self.tokens[i..].iter().enumerate() {
1010            match *token {
1011                AnySequence | AnyRecursiveSequence => {
1012                    // ** must be at the start.
1013                    debug_assert!(match *token {
1014                        AnyRecursiveSequence => follows_separator,
1015                        _ => true,
1016                    });
1017
1018                    // Empty match
1019                    match self.captures_from(follows_separator, file.clone(),
1020                        i + ti + 1, fname, captures, options)
1021                    {
1022                        SubPatternDoesntMatch => (), // keep trying
1023                        m => return m,
1024                    };
1025
1026                    while let Some(c) = file.next() {
1027                        if follows_separator && options.require_literal_leading_dot && c == '.' {
1028                            return SubPatternDoesntMatch;
1029                        }
1030                        follows_separator = path::is_separator(c);
1031                        match *token {
1032                            AnyRecursiveSequence if !follows_separator => continue,
1033                            AnySequence if options.require_literal_separator &&
1034                                           follows_separator => return SubPatternDoesntMatch,
1035                            _ => (),
1036                        }
1037                        match self.captures_from(follows_separator,
1038                                                file.clone(),
1039                                                i + ti + 1,
1040                                                fname, captures,
1041                                                options) {
1042                            SubPatternDoesntMatch => (), // keep trying
1043                            m => return m,
1044                        }
1045                    }
1046                }
1047                StartCapture(n, flag) => {
1048                    let mut off = fname.len() - file.as_str().len();
1049                    if flag && fname[..off].ends_with('/') {
1050                        off -= 1;
1051                    }
1052                    while captures.len() < n+1 {
1053                        captures.push((0, 0));
1054                    }
1055                    captures[n] = (off, off);
1056                }
1057                EndCapture(n, flag) => {
1058                    let mut off = fname.len() - file.as_str().len();
1059                    if flag && fname[..off].ends_with('/') {
1060                        off -= 1;
1061                    }
1062                    if off < captures[n].0 {
1063                        // if "a/**/b" matches "a/b"
1064                        off = captures[n].0;
1065                    }
1066                    captures[n].1 = off;
1067                }
1068                _ => {
1069                    let c = match file.next() {
1070                        Some(pair) => pair,
1071                        None => return EntirePatternDoesntMatch,
1072                    };
1073
1074                    let is_sep = path::is_separator(c);
1075
1076                    if !match *token {
1077                        AnyChar | AnyWithin(..) | AnyExcept(..)
1078                            if (options.require_literal_separator && is_sep) ||
1079                            (follows_separator && options.require_literal_leading_dot &&
1080                             c == '.') => false,
1081                        AnyChar => true,
1082                        AnyWithin(ref specifiers) => in_char_specifiers(&specifiers, c, options),
1083                        AnyExcept(ref specifiers) => !in_char_specifiers(&specifiers, c, options),
1084                        Char(c2) => chars_eq(c, c2, options.case_sensitive),
1085                        AnySequence | AnyRecursiveSequence => unreachable!(),
1086                        StartCapture(..) | EndCapture(..) => unreachable!(),
1087                    } {
1088                        return SubPatternDoesntMatch;
1089                    }
1090                    follows_separator = is_sep;
1091                }
1092            }
1093        }
1094
1095        // Iter is fused.
1096        if file.next().is_none() {
1097            Match(())
1098        } else {
1099            SubPatternDoesntMatch
1100        }
1101    }
1102    /// Substitute values back into patterns replacing capture groups
1103    ///
1104    /// ```rust
1105    /// # use std::error::Error;
1106    /// use capturing_glob::Pattern;
1107    ///
1108    /// # fn run() -> Result<(), Box<Error>> {
1109    /// assert_eq!(Pattern::new("images/(*).jpg")?.substitute(&["cat"])?,
1110    ///            "images/cat.jpg");
1111    /// # Ok(())
1112    /// # }
1113    /// # fn main() { run().unwrap() }
1114    /// ```
1115    ///
1116    /// Note: we check neither result so it matches pattern.
1117    pub fn substitute(&self, capture_groups: &[&str])
1118        -> Result<String, SubstitutionError>
1119    {
1120        use self::PatternToken::*;
1121
1122        let mut result = String::with_capacity(self.original.len());
1123        let mut iter = self.tokens.iter();
1124        while let Some(tok) = iter.next() {
1125            match *tok {
1126                Char(c) => result.push(c),
1127                AnyChar | AnySequence | AnyRecursiveSequence |
1128                AnyWithin(..) | AnyExcept(..)
1129                => {
1130                    return Err(SubstitutionError::UnexpectedWildcard);
1131                }
1132                StartCapture(idx, _) => {
1133                    if let Some(val) = capture_groups.get(idx) {
1134                        result.push_str(val);
1135                    } else {
1136                        return Err(SubstitutionError::MissingGroup(idx));
1137                    }
1138                    for tok in iter.by_ref() {
1139                        match *tok {
1140                            EndCapture(i, _) if idx == i => break,
1141                            _ => {}
1142                        }
1143                    }
1144                }
1145                EndCapture(_, _) => unreachable!(),
1146            }
1147        }
1148        return Ok(result)
1149    }
1150}
1151
1152// Fills `todo` with paths under `path` to be matched by `patterns[idx]`,
1153// special-casing patterns to match `.` and `..`, and avoiding `readdir()`
1154// calls when there are no metacharacters in the pattern.
1155fn fill_todo(todo: &mut Vec<Result<(PathBuf, usize), GlobError>>,
1156             patterns: &[Pattern],
1157             idx: usize,
1158             path: &Path,
1159             options: &MatchOptions) {
1160    // convert a pattern that's just many Char(_) to a string
1161    fn pattern_as_str(pattern: &Pattern) -> Option<String> {
1162        let mut s = String::new();
1163        for token in pattern.tokens.iter() {
1164            match *token {
1165                PatternToken::Char(c) => s.push(c),
1166                _ => return None,
1167            }
1168        }
1169        return Some(s);
1170    }
1171
1172    let add = |todo: &mut Vec<_>, next_path: PathBuf| {
1173        if idx + 1 == patterns.len() {
1174            // We know it's good, so don't make the iterator match this path
1175            // against the pattern again. In particular, it can't match
1176            // . or .. globs since these never show up as path components.
1177            todo.push(Ok((next_path, !0 as usize)));
1178        } else {
1179            fill_todo(todo, patterns, idx + 1, &next_path, options);
1180        }
1181    };
1182
1183    let pattern = &patterns[idx];
1184    let is_dir = is_dir(path);
1185    let curdir = path == Path::new(".");
1186    match pattern_as_str(pattern) {
1187        Some(s) => {
1188            // This pattern component doesn't have any metacharacters, so we
1189            // don't need to read the current directory to know where to
1190            // continue. So instead of passing control back to the iterator,
1191            // we can just check for that one entry and potentially recurse
1192            // right away.
1193            let special = "." == s || ".." == s;
1194            let next_path = if curdir {
1195                PathBuf::from(s)
1196            } else {
1197                path.join(&s)
1198            };
1199            if (special && is_dir) || (!special && fs::metadata(&next_path).is_ok()) {
1200                add(todo, next_path);
1201            }
1202        }
1203        None if is_dir => {
1204            let dirs = fs::read_dir(path).and_then(|d| {
1205                d.map(|e| {
1206                     e.map(|e| {
1207                         if curdir {
1208                             PathBuf::from(e.path().file_name().unwrap())
1209                         } else {
1210                             e.path()
1211                         }
1212                     })
1213                 })
1214                 .collect::<Result<Vec<_>, _>>()
1215            });
1216            match dirs {
1217                Ok(mut children) => {
1218                    children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name()));
1219                    todo.extend(children.into_iter().map(|x| Ok((x, idx))));
1220
1221                    // Matching the special directory entries . and .. that
1222                    // refer to the current and parent directory respectively
1223                    // requires that the pattern has a leading dot, even if the
1224                    // `MatchOptions` field `require_literal_leading_dot` is not
1225                    // set.
1226                    if pattern.tokens.len() > 0 && pattern.tokens[0] == PatternToken::Char('.') {
1227                        for &special in [".", ".."].iter() {
1228                            if pattern.matches_with(special, options) {
1229                                add(todo, path.join(special));
1230                            }
1231                        }
1232                    }
1233                }
1234                Err(e) => {
1235                    todo.push(Err(GlobError {
1236                        path: path.to_path_buf(),
1237                        error: e,
1238                    }));
1239                }
1240            }
1241        }
1242        None => {
1243            // not a directory, nothing more to find
1244        }
1245    }
1246}
1247
1248fn parse_char_specifiers(s: &[char]) -> Vec<CharSpecifier> {
1249    let mut cs = Vec::new();
1250    let mut i = 0;
1251    while i < s.len() {
1252        if i + 3 <= s.len() && s[i + 1] == '-' {
1253            cs.push(CharRange(s[i], s[i + 2]));
1254            i += 3;
1255        } else {
1256            cs.push(SingleChar(s[i]));
1257            i += 1;
1258        }
1259    }
1260    cs
1261}
1262
1263fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: &MatchOptions) -> bool {
1264
1265    for &specifier in specifiers.iter() {
1266        match specifier {
1267            SingleChar(sc) => {
1268                if chars_eq(c, sc, options.case_sensitive) {
1269                    return true;
1270                }
1271            }
1272            CharRange(start, end) => {
1273
1274                // FIXME: work with non-ascii chars properly (issue #1347)
1275                if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() {
1276
1277                    let start = start.to_ascii_lowercase();
1278                    let end = end.to_ascii_lowercase();
1279
1280                    let start_up = start.to_uppercase().next().unwrap();
1281                    let end_up = end.to_uppercase().next().unwrap();
1282
1283                    // only allow case insensitive matching when
1284                    // both start and end are within a-z or A-Z
1285                    if start != start_up && end != end_up {
1286                        let c = c.to_ascii_lowercase();
1287                        if c >= start && c <= end {
1288                            return true;
1289                        }
1290                    }
1291                }
1292
1293                if c >= start && c <= end {
1294                    return true;
1295                }
1296            }
1297        }
1298    }
1299
1300    false
1301}
1302
1303/// A helper function to determine if two chars are (possibly case-insensitively) equal.
1304fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool {
1305    if cfg!(windows) && path::is_separator(a) && path::is_separator(b) {
1306        true
1307    } else if !case_sensitive && a.is_ascii() && b.is_ascii() {
1308        // FIXME: work with non-ascii chars properly (issue #9084)
1309        a.to_ascii_lowercase() == b.to_ascii_lowercase()
1310    } else {
1311        a == b
1312    }
1313}
1314
1315
1316/// Configuration options to modify the behaviour of `Pattern::matches_with(..)`.
1317#[allow(missing_copy_implementations)]
1318#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
1319pub struct MatchOptions {
1320    /// Whether or not patterns should be matched in a case-sensitive manner.
1321    /// This currently only considers upper/lower case relationships between
1322    /// ASCII characters, but in future this might be extended to work with
1323    /// Unicode.
1324    pub case_sensitive: bool,
1325
1326    /// Whether or not path-component separator characters (e.g. `/` on
1327    /// Posix) must be matched by a literal `/`, rather than by `*` or `?` or
1328    /// `[...]`.
1329    pub require_literal_separator: bool,
1330
1331    /// Whether or not paths that contain components that start with a `.`
1332    /// will require that `.` appears literally in the pattern; `*`, `?`, `**`,
1333    /// or `[...]` will not match. This is useful because such files are
1334    /// conventionally considered hidden on Unix systems and it might be
1335    /// desirable to skip them when listing files.
1336    pub require_literal_leading_dot: bool,
1337}
1338
1339impl MatchOptions {
1340    /// Constructs a new `MatchOptions` with default field values. This is used
1341    /// when calling functions that do not take an explicit `MatchOptions`
1342    /// parameter.
1343    ///
1344    /// This function always returns this value:
1345    ///
1346    /// ```rust,ignore
1347    /// MatchOptions {
1348    ///     case_sensitive: true,
1349    ///     require_literal_separator: false,
1350    ///     require_literal_leading_dot: false
1351    /// }
1352    /// ```
1353    pub fn new() -> MatchOptions {
1354        MatchOptions {
1355            case_sensitive: true,
1356            require_literal_separator: false,
1357            require_literal_leading_dot: false,
1358        }
1359    }
1360}
1361
1362#[cfg(test)]
1363mod test {
1364    use std::path::Path;
1365    use super::{glob, Pattern, MatchOptions};
1366
1367    #[test]
1368    fn test_pattern_from_str() {
1369        assert!("a*b".parse::<Pattern>().unwrap().matches("a_b"));
1370        assert!("a/**b".parse::<Pattern>().unwrap_err().pos == 4);
1371    }
1372
1373    #[test]
1374    fn test_wildcard_errors() {
1375        assert!(Pattern::new("a/**b").unwrap_err().pos == 4);
1376        assert!(Pattern::new("a/bc**").unwrap_err().pos == 3);
1377        assert!(Pattern::new("a/*****").unwrap_err().pos == 4);
1378        assert!(Pattern::new("a/b**c**d").unwrap_err().pos == 2);
1379        assert!(Pattern::new("a**b").unwrap_err().pos == 0);
1380    }
1381
1382    #[test]
1383    fn test_unclosed_bracket_errors() {
1384        assert!(Pattern::new("abc[def").unwrap_err().pos == 3);
1385        assert!(Pattern::new("abc[!def").unwrap_err().pos == 3);
1386        assert!(Pattern::new("abc[").unwrap_err().pos == 3);
1387        assert!(Pattern::new("abc[!").unwrap_err().pos == 3);
1388        assert!(Pattern::new("abc[d").unwrap_err().pos == 3);
1389        assert!(Pattern::new("abc[!d").unwrap_err().pos == 3);
1390        assert!(Pattern::new("abc[]").unwrap_err().pos == 3);
1391        assert!(Pattern::new("abc[!]").unwrap_err().pos == 3);
1392    }
1393
1394    #[test]
1395    fn test_glob_errors() {
1396        assert!(glob("a/**b").err().unwrap().pos == 4);
1397        assert!(glob("abc[def").err().unwrap().pos == 3);
1398    }
1399
1400    // this test assumes that there is a /root directory and that
1401    // the user running this test is not root or otherwise doesn't
1402    // have permission to read its contents
1403    #[cfg(unix)]
1404    #[test]
1405    fn test_iteration_errors() {
1406        use std::io;
1407        let mut iter = glob("/root/*").unwrap();
1408
1409        // GlobErrors shouldn't halt iteration
1410        let next = iter.next();
1411        assert!(next.is_some());
1412
1413        let err = next.unwrap();
1414        assert!(err.is_err());
1415
1416        let err = err.err().unwrap();
1417        assert!(err.path() == Path::new("/root"));
1418        assert!(err.error().kind() == io::ErrorKind::PermissionDenied);
1419    }
1420
1421    #[test]
1422    fn test_absolute_pattern() {
1423        assert!(glob("/").unwrap().next().is_some());
1424        assert!(glob("//").unwrap().next().is_some());
1425
1426        // assume that the filesystem is not empty!
1427        assert!(glob("/*").unwrap().next().is_some());
1428
1429        #[cfg(not(windows))]
1430        fn win() {}
1431
1432        #[cfg(windows)]
1433        fn win() {
1434            use std::env::current_dir;
1435            use std::ffi::AsOsStr;
1436
1437            // check windows absolute paths with host/device components
1438            let root_with_device = current_dir()
1439                                       .ok()
1440                                       .and_then(|p| p.prefix().map(|p| p.join("*")))
1441                                       .unwrap();
1442            // FIXME (#9639): This needs to handle non-utf8 paths
1443            assert!(glob(root_with_device.as_os_str().to_str().unwrap()).unwrap().next().is_some());
1444        }
1445        win()
1446    }
1447
1448    #[test]
1449    fn test_wildcards() {
1450        assert!(Pattern::new("a*b").unwrap().matches("a_b"));
1451        assert!(Pattern::new("a*b*c").unwrap().matches("abc"));
1452        assert!(!Pattern::new("a*b*c").unwrap().matches("abcd"));
1453        assert!(Pattern::new("a*b*c").unwrap().matches("a_b_c"));
1454        assert!(Pattern::new("a*b*c").unwrap().matches("a___b___c"));
1455        assert!(Pattern::new("abc*abc*abc").unwrap().matches("abcabcabcabcabcabcabc"));
1456        assert!(!Pattern::new("abc*abc*abc").unwrap().matches("abcabcabcabcabcabcabca"));
1457        assert!(Pattern::new("a*a*a*a*a*a*a*a*a")
1458                    .unwrap()
1459                    .matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
1460        assert!(Pattern::new("a*b[xyz]c*d").unwrap().matches("abxcdbxcddd"));
1461        assert!(Pattern::new("some/only-(*).txt").unwrap().matches("some/only-file1.txt"));
1462    }
1463
1464    #[test]
1465    fn test_recursive_wildcards() {
1466        let pat = Pattern::new("some/**/needle.txt").unwrap();
1467        assert!(pat.matches("some/needle.txt"));
1468        assert!(pat.matches("some/one/needle.txt"));
1469        assert!(pat.matches("some/one/two/needle.txt"));
1470        assert!(pat.matches("some/other/needle.txt"));
1471        assert!(!pat.matches("some/other/notthis.txt"));
1472
1473        // a single ** should be valid, for globs
1474        // Should accept anything
1475        let pat = Pattern::new("**").unwrap();
1476        assert!(pat.is_recursive);
1477        assert!(pat.matches("abcde"));
1478        assert!(pat.matches(""));
1479        assert!(pat.matches(".asdf"));
1480        assert!(pat.matches("/x/.asdf"));
1481
1482
1483        // collapse consecutive wildcards
1484        let pat = Pattern::new("some/**/**/needle.txt").unwrap();
1485        assert!(pat.matches("some/needle.txt"));
1486        assert!(pat.matches("some/one/needle.txt"));
1487        assert!(pat.matches("some/one/two/needle.txt"));
1488        assert!(pat.matches("some/other/needle.txt"));
1489        assert!(!pat.matches("some/other/notthis.txt"));
1490
1491        // ** can begin the pattern
1492        let pat = Pattern::new("**/test").unwrap();
1493        assert!(pat.matches("one/two/test"));
1494        assert!(pat.matches("one/test"));
1495        assert!(pat.matches("test"));
1496
1497        // /** can begin the pattern
1498        let pat = Pattern::new("/**/test").unwrap();
1499        assert!(pat.matches("/one/two/test"));
1500        assert!(pat.matches("/one/test"));
1501        assert!(pat.matches("/test"));
1502        assert!(!pat.matches("/one/notthis"));
1503        assert!(!pat.matches("/notthis"));
1504
1505        // Only start sub-patterns on start of path segment.
1506        let pat = Pattern::new("**/.*").unwrap();
1507        assert!(pat.matches(".abc"));
1508        assert!(pat.matches("abc/.abc"));
1509        assert!(!pat.matches("ab.c"));
1510        assert!(!pat.matches("abc/ab.c"));
1511    }
1512
1513    #[test]
1514    fn test_lots_of_files() {
1515        // this is a good test because it touches lots of differently named files
1516        glob("/*/*/*/*").unwrap().skip(10000).next();
1517    }
1518
1519    #[test]
1520    fn test_range_pattern() {
1521
1522        let pat = Pattern::new("a[0-9]b").unwrap();
1523        for i in 0..10 {
1524            assert!(pat.matches(&format!("a{}b", i)));
1525        }
1526        assert!(!pat.matches("a_b"));
1527
1528        let pat = Pattern::new("a[!0-9]b").unwrap();
1529        for i in 0..10 {
1530            assert!(!pat.matches(&format!("a{}b", i)));
1531        }
1532        assert!(pat.matches("a_b"));
1533
1534        let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"];
1535        for &p in pats.iter() {
1536            let pat = Pattern::new(p).unwrap();
1537            for c in "abcdefghijklmnopqrstuvwxyz".chars() {
1538                assert!(pat.matches(&c.to_string()));
1539            }
1540            for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars() {
1541                let options = MatchOptions { case_sensitive: false, ..MatchOptions::new() };
1542                assert!(pat.matches_with(&c.to_string(), &options));
1543            }
1544            assert!(pat.matches("1"));
1545            assert!(pat.matches("2"));
1546            assert!(pat.matches("3"));
1547        }
1548
1549        let pats = ["[abc-]", "[-abc]", "[a-c-]"];
1550        for &p in pats.iter() {
1551            let pat = Pattern::new(p).unwrap();
1552            assert!(pat.matches("a"));
1553            assert!(pat.matches("b"));
1554            assert!(pat.matches("c"));
1555            assert!(pat.matches("-"));
1556            assert!(!pat.matches("d"));
1557        }
1558
1559        let pat = Pattern::new("[2-1]").unwrap();
1560        assert!(!pat.matches("1"));
1561        assert!(!pat.matches("2"));
1562
1563        assert!(Pattern::new("[-]").unwrap().matches("-"));
1564        assert!(!Pattern::new("[!-]").unwrap().matches("-"));
1565    }
1566
1567    #[test]
1568    fn test_pattern_matches() {
1569        let txt_pat = Pattern::new("*hello.txt").unwrap();
1570        assert!(txt_pat.matches("hello.txt"));
1571        assert!(txt_pat.matches("gareth_says_hello.txt"));
1572        assert!(txt_pat.matches("some/path/to/hello.txt"));
1573        assert!(txt_pat.matches("some\\path\\to\\hello.txt"));
1574        assert!(txt_pat.matches("/an/absolute/path/to/hello.txt"));
1575        assert!(!txt_pat.matches("hello.txt-and-then-some"));
1576        assert!(!txt_pat.matches("goodbye.txt"));
1577
1578        let dir_pat = Pattern::new("*some/path/to/hello.txt").unwrap();
1579        assert!(dir_pat.matches("some/path/to/hello.txt"));
1580        assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt"));
1581        assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some"));
1582        assert!(!dir_pat.matches("some/other/path/to/hello.txt"));
1583    }
1584
1585    #[test]
1586    fn test_pattern_escape() {
1587        let s = "_[_]_?_*_!_";
1588        assert_eq!(Pattern::escape(s), "_[[]_[]]_[?]_[*]_!_".to_string());
1589        assert!(Pattern::new(&Pattern::escape(s)).unwrap().matches(s));
1590    }
1591
1592    #[test]
1593    fn test_pattern_matches_case_insensitive() {
1594
1595        let pat = Pattern::new("aBcDeFg").unwrap();
1596        let options = MatchOptions {
1597            case_sensitive: false,
1598            require_literal_separator: false,
1599            require_literal_leading_dot: false,
1600        };
1601
1602        assert!(pat.matches_with("aBcDeFg", &options));
1603        assert!(pat.matches_with("abcdefg", &options));
1604        assert!(pat.matches_with("ABCDEFG", &options));
1605        assert!(pat.matches_with("AbCdEfG", &options));
1606    }
1607
1608    #[test]
1609    fn test_pattern_matches_case_insensitive_range() {
1610
1611        let pat_within = Pattern::new("[a]").unwrap();
1612        let pat_except = Pattern::new("[!a]").unwrap();
1613
1614        let options_case_insensitive = MatchOptions {
1615            case_sensitive: false,
1616            require_literal_separator: false,
1617            require_literal_leading_dot: false,
1618        };
1619        let options_case_sensitive = MatchOptions {
1620            case_sensitive: true,
1621            require_literal_separator: false,
1622            require_literal_leading_dot: false,
1623        };
1624
1625        assert!(pat_within.matches_with("a", &options_case_insensitive));
1626        assert!(pat_within.matches_with("A", &options_case_insensitive));
1627        assert!(!pat_within.matches_with("A", &options_case_sensitive));
1628
1629        assert!(!pat_except.matches_with("a", &options_case_insensitive));
1630        assert!(!pat_except.matches_with("A", &options_case_insensitive));
1631        assert!(pat_except.matches_with("A", &options_case_sensitive));
1632    }
1633
1634    #[test]
1635    fn test_pattern_matches_require_literal_separator() {
1636
1637        let options_require_literal = MatchOptions {
1638            case_sensitive: true,
1639            require_literal_separator: true,
1640            require_literal_leading_dot: false,
1641        };
1642        let options_not_require_literal = MatchOptions {
1643            case_sensitive: true,
1644            require_literal_separator: false,
1645            require_literal_leading_dot: false,
1646        };
1647
1648        assert!(Pattern::new("abc/def").unwrap().matches_with("abc/def", &options_require_literal));
1649        assert!(!Pattern::new("abc?def")
1650                     .unwrap()
1651                     .matches_with("abc/def", &options_require_literal));
1652        assert!(!Pattern::new("abc*def")
1653                     .unwrap()
1654                     .matches_with("abc/def", &options_require_literal));
1655        assert!(!Pattern::new("abc[/]def")
1656                     .unwrap()
1657                     .matches_with("abc/def", &options_require_literal));
1658
1659        assert!(Pattern::new("abc/def")
1660                    .unwrap()
1661                    .matches_with("abc/def", &options_not_require_literal));
1662        assert!(Pattern::new("abc?def")
1663                    .unwrap()
1664                    .matches_with("abc/def", &options_not_require_literal));
1665        assert!(Pattern::new("abc*def")
1666                    .unwrap()
1667                    .matches_with("abc/def", &options_not_require_literal));
1668        assert!(Pattern::new("abc[/]def")
1669                    .unwrap()
1670                    .matches_with("abc/def", &options_not_require_literal));
1671    }
1672
1673    #[test]
1674    fn test_pattern_matches_require_literal_leading_dot() {
1675
1676        let options_require_literal_leading_dot = MatchOptions {
1677            case_sensitive: true,
1678            require_literal_separator: false,
1679            require_literal_leading_dot: true,
1680        };
1681        let options_not_require_literal_leading_dot = MatchOptions {
1682            case_sensitive: true,
1683            require_literal_separator: false,
1684            require_literal_leading_dot: false,
1685        };
1686
1687        let f = |options| Pattern::new("*.txt").unwrap().matches_with(".hello.txt", options);
1688        assert!(f(&options_not_require_literal_leading_dot));
1689        assert!(!f(&options_require_literal_leading_dot));
1690
1691        let f = |options| Pattern::new(".*.*").unwrap().matches_with(".hello.txt", options);
1692        assert!(f(&options_not_require_literal_leading_dot));
1693        assert!(f(&options_require_literal_leading_dot));
1694
1695        let f = |options| Pattern::new("aaa/bbb/*").unwrap().matches_with("aaa/bbb/.ccc", options);
1696        assert!(f(&options_not_require_literal_leading_dot));
1697        assert!(!f(&options_require_literal_leading_dot));
1698
1699        let f = |options| {
1700            Pattern::new("aaa/bbb/*").unwrap().matches_with("aaa/bbb/c.c.c.", options)
1701        };
1702        assert!(f(&options_not_require_literal_leading_dot));
1703        assert!(f(&options_require_literal_leading_dot));
1704
1705        let f = |options| Pattern::new("aaa/bbb/.*").unwrap().matches_with("aaa/bbb/.ccc", options);
1706        assert!(f(&options_not_require_literal_leading_dot));
1707        assert!(f(&options_require_literal_leading_dot));
1708
1709        let f = |options| Pattern::new("aaa/?bbb").unwrap().matches_with("aaa/.bbb", options);
1710        assert!(f(&options_not_require_literal_leading_dot));
1711        assert!(!f(&options_require_literal_leading_dot));
1712
1713        let f = |options| Pattern::new("aaa/[.]bbb").unwrap().matches_with("aaa/.bbb", options);
1714        assert!(f(&options_not_require_literal_leading_dot));
1715        assert!(!f(&options_require_literal_leading_dot));
1716
1717        let f = |options| Pattern::new("**/*").unwrap().matches_with(".bbb", options);
1718        assert!(f(&options_not_require_literal_leading_dot));
1719        assert!(!f(&options_require_literal_leading_dot));
1720    }
1721
1722    #[test]
1723    fn test_matches_path() {
1724        // on windows, (Path::new("a/b").as_str().unwrap() == "a\\b"), so this
1725        // tests that / and \ are considered equivalent on windows
1726        assert!(Pattern::new("a/b").unwrap().matches_path(&Path::new("a/b")));
1727    }
1728
1729    #[test]
1730    fn test_path_join() {
1731        let pattern = Path::new("one").join(&Path::new("**/*.rs"));
1732        assert!(Pattern::new(pattern.to_str().unwrap()).is_ok());
1733    }
1734
1735    #[test]
1736    fn test_capture_two_stars() {
1737        let pat = Pattern::new("some/(**)/needle.txt").unwrap();
1738        assert_eq!(pat.captures("some/one/two/needle.txt").unwrap()
1739            .group(1).unwrap(), "one/two");
1740        assert_eq!(pat.captures("some/other/needle.txt").unwrap()
1741            .group(1).unwrap(), "other");
1742        assert!(pat.captures("some/other/not_this.txt").is_none());
1743        assert_eq!(pat.captures("some/needle.txt").unwrap().group(1).unwrap(), "");
1744        assert_eq!(pat.captures("some/one/needle.txt").unwrap()
1745            .group(1).unwrap(), "one");
1746    }
1747
1748    #[test]
1749    fn test_capture_star() {
1750        let opt = MatchOptions {
1751            require_literal_separator: true,
1752            .. MatchOptions::new()
1753        };
1754        let pat = Pattern::new("some/(*)/needle.txt").unwrap();
1755        assert!(pat.captures("some/needle.txt").is_none());
1756        assert_eq!(pat.captures("some/one/needle.txt").unwrap()
1757            .group(1).unwrap(), "one");
1758        assert!(pat.captures_with("some/one/two/needle.txt", &opt).is_none());
1759        assert_eq!(pat.captures("some/other/needle.txt").unwrap()
1760            .group(1).unwrap(), "other");
1761        assert!(pat.captures("some/other/not_this.txt").is_none());
1762    }
1763
1764    #[test]
1765    fn test_capture_name_start() {
1766        let opt = MatchOptions {
1767            require_literal_separator: true,
1768            .. MatchOptions::new()
1769        };
1770        let pat = Pattern::new("some/only-(*).txt").unwrap();
1771        assert!(pat.captures("some/needle.txt").is_none());
1772        assert!(pat.captures("some/one/only-x.txt").is_none());
1773        assert_eq!(pat.captures("some/only-file1.txt").unwrap()
1774            .group(1).unwrap(), "file1");
1775        assert_eq!(pat.captures("some/only-file2.txt").unwrap()
1776            .group(1).unwrap(), "file2");
1777        assert!(pat.captures_with("some/only-dir1/some.txt", &opt).is_none());
1778    }
1779
1780    #[test]
1781    fn test_capture_end() {
1782        let pat = Pattern::new("some/only-(*)").unwrap();
1783        assert!(pat.captures("some/needle.txt").is_none());
1784        assert_eq!(pat.captures("some/only-file1.txt").unwrap()
1785            .group(1).unwrap(), "file1.txt");
1786        assert_eq!(pat.captures("some/only-").unwrap()
1787            .group(1).unwrap(), "");
1788    }
1789
1790    #[test]
1791    fn test_capture_char() {
1792        let pat = Pattern::new("some/file(?).txt").unwrap();
1793        assert_eq!(pat.captures("some/file1.txt").unwrap()
1794            .group(1).unwrap(), "1");
1795        assert_eq!(pat.captures("some/file2.txt").unwrap()
1796            .group(1).unwrap(), "2");
1797        assert!(pat.captures("some/file12.txt").is_none());
1798        assert!(pat.captures("some/file.txt").is_none());
1799    }
1800
1801    #[test]
1802    fn test_paren_two_stars() {
1803        let pat = Pattern::new("some/(**)/needle.txt").unwrap();
1804        assert!(pat.matches("some/one/needle.txt"));
1805        assert!(pat.matches("some/one/two/needle.txt"));
1806        assert!(pat.matches("some/other/needle.txt"));
1807        assert!(!pat.matches("some/other/not_this.txt"));
1808        assert!(pat.matches("some/needle.txt"));
1809    }
1810
1811    #[test]
1812    fn test_paren_star() {
1813        let opt = MatchOptions {
1814            require_literal_separator: true,
1815            .. MatchOptions::new()
1816        };
1817        let pat = Pattern::new("some/(*)/needle.txt").unwrap();
1818        assert!(!pat.matches("some/needle.txt"));
1819        assert!(pat.matches("some/one/needle.txt"));
1820        assert!(!pat.matches_with("some/one/two/needle.txt", &opt));
1821        assert!(pat.matches("some/other/needle.txt"));
1822        assert!(!pat.matches("some/other/not_this.txt"));
1823    }
1824
1825    #[test]
1826    fn test_paren_name_start() {
1827        let opt = MatchOptions {
1828            require_literal_separator: true,
1829            .. MatchOptions::new()
1830        };
1831        let pat = Pattern::new("some/only-(*).txt").unwrap();
1832        assert!(!pat.matches("some/needle.txt"));
1833        assert!(!pat.matches("some/one/only-x.txt"));
1834        assert!(pat.matches("some/only-file1.txt"));
1835        assert!(pat.matches("some/only-file2.txt"));
1836        assert!(!pat.matches_with("some/only-dir1/some.txt", &opt));
1837    }
1838
1839    #[test]
1840    fn test_paren_end() {
1841        let pat = Pattern::new("some/only-(*)").unwrap();
1842        assert!(!pat.matches("some/needle.txt"));
1843        assert!(pat.matches("some/only-file1.txt"));
1844        assert!(pat.matches("some/only-"));
1845    }
1846
1847    #[test]
1848    fn test_paren_char() {
1849        let pat = Pattern::new("some/file(?).txt").unwrap();
1850        assert!(pat.matches("some/file1.txt"));
1851        assert!(pat.matches("some/file2.txt"));
1852        assert!(!pat.matches("some/file12.txt"));
1853        assert!(!pat.matches("some/file.txt"));
1854    }
1855}