globset/
glob.rs

1use std::fmt;
2use std::hash;
3use std::iter;
4use std::ops::{Deref, DerefMut};
5use std::path::{Path, is_separator};
6use std::str;
7
8use regex;
9use regex::bytes::Regex;
10
11use super::{Candidate, Error, ErrorKind, new_regex};
12
13/// Describes a matching strategy for a particular pattern.
14///
15/// This provides a way to more quickly determine whether a pattern matches
16/// a particular file path in a way that scales with a large number of
17/// patterns. For example, if many patterns are of the form `*.ext`, then it's
18/// possible to test whether any of those patterns matches by looking up a
19/// file path's extension in a hash table.
20#[derive(Clone, Debug, Eq, PartialEq)]
21pub enum MatchStrategy {
22    /// A pattern matches if and only if the entire file path matches this
23    /// literal string.
24    Literal(String),
25    /// A pattern matches if and only if the file path's basename matches this
26    /// literal string.
27    BasenameLiteral(String),
28    /// A pattern matches if and only if the file path's extension matches this
29    /// literal string.
30    Extension(String),
31    /// A pattern matches if and only if this prefix literal is a prefix of the
32    /// candidate file path.
33    Prefix(String),
34    /// A pattern matches if and only if this prefix literal is a prefix of the
35    /// candidate file path.
36    ///
37    /// An exception: if `component` is true, then `suffix` must appear at the
38    /// beginning of a file path or immediately following a `/`.
39    Suffix {
40        /// The actual suffix.
41        suffix: String,
42        /// Whether this must start at the beginning of a path component.
43        component: bool,
44    },
45    /// A pattern matches only if the given extension matches the file path's
46    /// extension. Note that this is a necessary but NOT sufficient criterion.
47    /// Namely, if the extension matches, then a full regex search is still
48    /// required.
49    RequiredExtension(String),
50    /// A regex needs to be used for matching.
51    Regex,
52}
53
54impl MatchStrategy {
55    /// Returns a matching strategy for the given pattern.
56    pub fn new(pat: &Glob) -> MatchStrategy {
57        if let Some(lit) = pat.basename_literal() {
58            MatchStrategy::BasenameLiteral(lit)
59        } else if let Some(lit) = pat.literal() {
60            MatchStrategy::Literal(lit)
61        } else if let Some(ext) = pat.ext() {
62            MatchStrategy::Extension(ext)
63        } else if let Some(prefix) = pat.prefix() {
64            MatchStrategy::Prefix(prefix)
65        } else if let Some((suffix, component)) = pat.suffix() {
66            MatchStrategy::Suffix { suffix: suffix, component: component }
67        } else if let Some(ext) = pat.required_ext() {
68            MatchStrategy::RequiredExtension(ext)
69        } else {
70            MatchStrategy::Regex
71        }
72    }
73}
74
75/// Glob represents a successfully parsed shell glob pattern.
76///
77/// It cannot be used directly to match file paths, but it can be converted
78/// to a regular expression string or a matcher.
79#[derive(Clone, Debug, Eq)]
80pub struct Glob {
81    glob: String,
82    re: String,
83    opts: GlobOptions,
84    tokens: Tokens,
85}
86
87impl PartialEq for Glob {
88  fn eq(&self, other: &Glob) -> bool {
89    self.glob == other.glob && self.opts == other.opts
90  }
91}
92
93impl hash::Hash for Glob {
94  fn hash<H: hash::Hasher>(&self, state: &mut H) {
95    self.glob.hash(state);
96    self.opts.hash(state);
97  }
98}
99
100impl fmt::Display for Glob {
101    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
102        self.glob.fmt(f)
103    }
104}
105
106/// A matcher for a single pattern.
107#[derive(Clone, Debug)]
108pub struct GlobMatcher {
109    /// The underlying pattern.
110    pat: Glob,
111    /// The pattern, as a compiled regex.
112    re: Regex,
113}
114
115impl GlobMatcher {
116    /// Tests whether the given path matches this pattern or not.
117    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
118        self.is_match_candidate(&Candidate::new(path.as_ref()))
119    }
120
121    /// Tests whether the given path matches this pattern or not.
122    pub fn is_match_candidate(&self, path: &Candidate) -> bool {
123        self.re.is_match(&path.path)
124    }
125}
126
127/// A strategic matcher for a single pattern.
128#[cfg(test)]
129#[derive(Clone, Debug)]
130struct GlobStrategic {
131    /// The match strategy to use.
132    strategy: MatchStrategy,
133    /// The underlying pattern.
134    pat: Glob,
135    /// The pattern, as a compiled regex.
136    re: Regex,
137}
138
139#[cfg(test)]
140impl GlobStrategic {
141    /// Tests whether the given path matches this pattern or not.
142    fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
143        self.is_match_candidate(&Candidate::new(path.as_ref()))
144    }
145
146    /// Tests whether the given path matches this pattern or not.
147    fn is_match_candidate(&self, candidate: &Candidate) -> bool {
148        let byte_path = &*candidate.path;
149
150        match self.strategy {
151            MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
152            MatchStrategy::BasenameLiteral(ref lit) => {
153                lit.as_bytes() == &*candidate.basename
154            }
155            MatchStrategy::Extension(ref ext) => {
156                ext.as_bytes() == &*candidate.ext
157            }
158            MatchStrategy::Prefix(ref pre) => {
159                starts_with(pre.as_bytes(), byte_path)
160            }
161            MatchStrategy::Suffix { ref suffix, component } => {
162                if component && byte_path == &suffix.as_bytes()[1..] {
163                    return true;
164                }
165                ends_with(suffix.as_bytes(), byte_path)
166            }
167            MatchStrategy::RequiredExtension(ref ext) => {
168                let ext = ext.as_bytes();
169                &*candidate.ext == ext && self.re.is_match(byte_path)
170            }
171            MatchStrategy::Regex => self.re.is_match(byte_path),
172        }
173    }
174}
175
176/// A builder for a pattern.
177///
178/// This builder enables configuring the match semantics of a pattern. For
179/// example, one can make matching case insensitive.
180///
181/// The lifetime `'a` refers to the lifetime of the pattern string.
182#[derive(Clone, Debug)]
183pub struct GlobBuilder<'a> {
184    /// The glob pattern to compile.
185    glob: &'a str,
186    /// Options for the pattern.
187    opts: GlobOptions,
188}
189
190#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
191struct GlobOptions {
192    /// Whether to enable Unicode support.
193    unicode: bool,
194    /// Whether to match case insensitively.
195    case_insensitive: bool,
196    /// Whether to require a literal separator to match a separator in a file
197    /// path. e.g., when enabled, `*` won't match `/`.
198    literal_separator: bool,
199    /// Whether or not to use `\` to escape special characters.
200    /// e.g., when enabled, `\*` will match a literal `*`.
201    backslash_escape: bool,
202}
203
204impl GlobOptions {
205    fn default() -> GlobOptions {
206        GlobOptions {
207            unicode: false,
208            case_insensitive: false,
209            literal_separator: false,
210            backslash_escape: !is_separator('\\'),
211        }
212    }
213}
214
215#[derive(Clone, Debug, Default, Eq, PartialEq)]
216struct Tokens(Vec<Token>);
217
218impl Deref for Tokens {
219    type Target = Vec<Token>;
220    fn deref(&self) -> &Vec<Token> { &self.0 }
221}
222
223impl DerefMut for Tokens {
224    fn deref_mut(&mut self) -> &mut Vec<Token> { &mut self.0 }
225}
226
227#[derive(Clone, Debug, Eq, PartialEq)]
228enum Token {
229    Literal(char),
230    Any,
231    ZeroOrMore,
232    RecursivePrefix,
233    RecursiveSuffix,
234    RecursiveZeroOrMore,
235    Class {
236        negated: bool,
237        ranges: Vec<(char, char)>,
238    },
239    Alternates(Vec<Tokens>),
240}
241
242impl Glob {
243    /// Builds a new pattern with default options.
244    pub fn new(glob: &str) -> Result<Glob, Error> {
245        GlobBuilder::new(glob).build()
246    }
247
248    /// Returns a matcher for this pattern.
249    pub fn compile_matcher(&self) -> GlobMatcher {
250        let re = new_regex(&self.re)
251            .expect("regex compilation shouldn't fail");
252        GlobMatcher {
253            pat: self.clone(),
254            re: re,
255        }
256    }
257
258    /// Returns a strategic matcher.
259    ///
260    /// This isn't exposed because it's not clear whether it's actually
261    /// faster than just running a regex for a *single* pattern. If it
262    /// is faster, then GlobMatcher should do it automatically.
263    #[cfg(test)]
264    fn compile_strategic_matcher(&self) -> GlobStrategic {
265        let strategy = MatchStrategy::new(self);
266        let re = new_regex(&self.re)
267            .expect("regex compilation shouldn't fail");
268        GlobStrategic {
269            strategy: strategy,
270            pat: self.clone(),
271            re: re,
272        }
273    }
274
275    /// Returns the original glob pattern used to build this pattern.
276    pub fn glob(&self) -> &str {
277        &self.glob
278    }
279
280    /// Returns the regular expression string for this glob.
281    ///
282    /// Note that regular expressions for globs are intended to be matched on
283    /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
284    /// particular, globs are frequently used on file paths, where there is no
285    /// general guarantee that file paths are themselves valid UTF-8. As a
286    /// result, callers will need to ensure that they are using a regex API
287    /// that can match on arbitrary bytes. For example, the
288    /// [`regex`](https://crates.io/regex)
289    /// crate's
290    /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
291    /// API is not suitable for this since it matches on `&str`, but its
292    /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
293    /// API is suitable for this.
294    pub fn regex(&self) -> &str {
295        &self.re
296    }
297
298    /// Returns the pattern as a literal if and only if the pattern must match
299    /// an entire path exactly.
300    ///
301    /// The basic format of these patterns is `{literal}`.
302    fn literal(&self) -> Option<String> {
303        if self.opts.case_insensitive {
304            return None;
305        }
306        let mut lit = String::new();
307        for t in &*self.tokens {
308            match *t {
309                Token::Literal(c) => lit.push(c),
310                _ => return None,
311            }
312        }
313        if lit.is_empty() {
314            None
315        } else {
316            Some(lit)
317        }
318    }
319
320    /// Returns an extension if this pattern matches a file path if and only
321    /// if the file path has the extension returned.
322    ///
323    /// Note that this extension returned differs from the extension that
324    /// std::path::Path::extension returns. Namely, this extension includes
325    /// the '.'. Also, paths like `.rs` are considered to have an extension
326    /// of `.rs`.
327    fn ext(&self) -> Option<String> {
328        if self.opts.case_insensitive {
329            return None;
330        }
331        let start = match self.tokens.get(0) {
332            Some(&Token::RecursivePrefix) => 1,
333            Some(_) => 0,
334            _ => return None,
335        };
336        match self.tokens.get(start) {
337            Some(&Token::ZeroOrMore) => {
338                // If there was no recursive prefix, then we only permit
339                // `*` if `*` can match a `/`. For example, if `*` can't
340                // match `/`, then `*.c` doesn't match `foo/bar.c`.
341                if start == 0 && self.opts.literal_separator {
342                    return None;
343                }
344            }
345            _ => return None,
346        }
347        match self.tokens.get(start + 1) {
348            Some(&Token::Literal('.')) => {}
349            _ => return None,
350        }
351        let mut lit = ".".to_string();
352        for t in self.tokens[start + 2..].iter() {
353            match *t {
354                Token::Literal('.') | Token::Literal('/') => return None,
355                Token::Literal(c) => lit.push(c),
356                _ => return None,
357            }
358        }
359        if lit.is_empty() {
360            None
361        } else {
362            Some(lit)
363        }
364    }
365
366    /// This is like `ext`, but returns an extension even if it isn't sufficent
367    /// to imply a match. Namely, if an extension is returned, then it is
368    /// necessary but not sufficient for a match.
369    fn required_ext(&self) -> Option<String> {
370        if self.opts.case_insensitive {
371            return None;
372        }
373        // We don't care at all about the beginning of this pattern. All we
374        // need to check for is if it ends with a literal of the form `.ext`.
375        let mut ext: Vec<char> = vec![]; // built in reverse
376        for t in self.tokens.iter().rev() {
377            match *t {
378                Token::Literal('/') => return None,
379                Token::Literal(c) => {
380                    ext.push(c);
381                    if c == '.' {
382                        break;
383                    }
384                }
385                _ => return None,
386            }
387        }
388        if ext.last() != Some(&'.') {
389            None
390        } else {
391            ext.reverse();
392            Some(ext.into_iter().collect())
393        }
394    }
395
396    /// Returns a literal prefix of this pattern if the entire pattern matches
397    /// if the literal prefix matches.
398    fn prefix(&self) -> Option<String> {
399        if self.opts.case_insensitive {
400            return None;
401        }
402        let end = match self.tokens.last() {
403            Some(&Token::ZeroOrMore) => {
404                if self.opts.literal_separator {
405                    // If a trailing `*` can't match a `/`, then we can't
406                    // assume a match of the prefix corresponds to a match
407                    // of the overall pattern. e.g., `foo/*` with
408                    // `literal_separator` enabled matches `foo/bar` but not
409                    // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
410                    // literal prefix.
411                    return None;
412                }
413                self.tokens.len() - 1
414            }
415            _ => self.tokens.len(),
416        };
417        let mut lit = String::new();
418        for t in &self.tokens[0..end] {
419            match *t {
420                Token::Literal(c) => lit.push(c),
421                _ => return None,
422            }
423        }
424        if lit.is_empty() {
425            None
426        } else {
427            Some(lit)
428        }
429    }
430
431    /// Returns a literal suffix of this pattern if the entire pattern matches
432    /// if the literal suffix matches.
433    ///
434    /// If a literal suffix is returned and it must match either the entire
435    /// file path or be preceded by a `/`, then also return true. This happens
436    /// with a pattern like `**/foo/bar`. Namely, this pattern matches
437    /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
438    /// suffix returned is `/foo/bar` (but should match the entire path
439    /// `foo/bar`).
440    ///
441    /// When this returns true, the suffix literal is guaranteed to start with
442    /// a `/`.
443    fn suffix(&self) -> Option<(String, bool)> {
444        if self.opts.case_insensitive {
445            return None;
446        }
447        let mut lit = String::new();
448        let (start, entire) = match self.tokens.get(0) {
449            Some(&Token::RecursivePrefix) => {
450                // We only care if this follows a path component if the next
451                // token is a literal.
452                if let Some(&Token::Literal(_)) = self.tokens.get(1) {
453                    lit.push('/');
454                    (1, true)
455                } else {
456                    (1, false)
457                }
458            }
459            _ => (0, false),
460        };
461        let start = match self.tokens.get(start) {
462            Some(&Token::ZeroOrMore) => {
463                // If literal_separator is enabled, then a `*` can't
464                // necessarily match everything, so reporting a suffix match
465                // as a match of the pattern would be a false positive.
466                if self.opts.literal_separator {
467                    return None;
468                }
469                start + 1
470            }
471            _ => start,
472        };
473        for t in &self.tokens[start..] {
474            match *t {
475                Token::Literal(c) => lit.push(c),
476                _ => return None,
477            }
478        }
479        if lit.is_empty() || lit == "/" {
480            None
481        } else {
482            Some((lit, entire))
483        }
484    }
485
486    /// If this pattern only needs to inspect the basename of a file path,
487    /// then the tokens corresponding to only the basename match are returned.
488    ///
489    /// For example, given a pattern of `**/*.foo`, only the tokens
490    /// corresponding to `*.foo` are returned.
491    ///
492    /// Note that this will return None if any match of the basename tokens
493    /// doesn't correspond to a match of the entire pattern. For example, the
494    /// glob `foo` only matches when a file path has a basename of `foo`, but
495    /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
496    /// `foo` doesn't match `abc/foo`.
497    fn basename_tokens(&self) -> Option<&[Token]> {
498        if self.opts.case_insensitive {
499            return None;
500        }
501        let start = match self.tokens.get(0) {
502            Some(&Token::RecursivePrefix) => 1,
503            _ => {
504                // With nothing to gobble up the parent portion of a path,
505                // we can't assume that matching on only the basename is
506                // correct.
507                return None;
508            }
509        };
510        if self.tokens[start..].is_empty() {
511            return None;
512        }
513        for t in &self.tokens[start..] {
514            match *t {
515                Token::Literal('/') => return None,
516                Token::Literal(_) => {} // OK
517                Token::Any | Token::ZeroOrMore => {
518                    if !self.opts.literal_separator {
519                        // In this case, `*` and `?` can match a path
520                        // separator, which means this could reach outside
521                        // the basename.
522                        return None;
523                    }
524                }
525                Token::RecursivePrefix
526                | Token::RecursiveSuffix
527                | Token::RecursiveZeroOrMore => {
528                    return None;
529                }
530                Token::Class{..} | Token::Alternates(..) => {
531                    // We *could* be a little smarter here, but either one
532                    // of these is going to prevent our literal optimizations
533                    // anyway, so give up.
534                    return None;
535                }
536            }
537        }
538        Some(&self.tokens[start..])
539    }
540
541    /// Returns the pattern as a literal if and only if the pattern exclusively
542    /// matches the basename of a file path *and* is a literal.
543    ///
544    /// The basic format of these patterns is `**/{literal}`, where `{literal}`
545    /// does not contain a path separator.
546    fn basename_literal(&self) -> Option<String> {
547        let tokens = match self.basename_tokens() {
548            None => return None,
549            Some(tokens) => tokens,
550        };
551        let mut lit = String::new();
552        for t in tokens {
553            match *t {
554                Token::Literal(c) => lit.push(c),
555                _ => return None,
556            }
557        }
558        Some(lit)
559    }
560}
561
562impl<'a> GlobBuilder<'a> {
563    /// Create a new builder for the pattern given.
564    ///
565    /// The pattern is not compiled until `build` is called.
566    pub fn new(glob: &'a str) -> GlobBuilder<'a> {
567        GlobBuilder {
568            glob: glob,
569            opts: GlobOptions::default(),
570        }
571    }
572
573    /// Parses and builds the pattern.
574    pub fn build(&self) -> Result<Glob, Error> {
575        let mut p = Parser {
576            glob: &self.glob,
577            stack: vec![Tokens::default()],
578            chars: self.glob.chars().peekable(),
579            prev: None,
580            cur: None,
581            opts: &self.opts,
582        };
583        p.parse()?;
584        if p.stack.is_empty() {
585            Err(Error {
586                glob: Some(self.glob.to_string()),
587                kind: ErrorKind::UnopenedAlternates,
588            })
589        } else if p.stack.len() > 1 {
590            Err(Error {
591                glob: Some(self.glob.to_string()),
592                kind: ErrorKind::UnclosedAlternates,
593            })
594        } else {
595            let tokens = p.stack.pop().unwrap();
596            Ok(Glob {
597                glob: self.glob.to_string(),
598                re: tokens.to_regex_with(&self.opts),
599                opts: self.opts,
600                tokens: tokens,
601            })
602        }
603    }
604
605    /// Toggle whether the pattern matches Unicode scalar values.
606    ///
607    /// This is disabled by default.
608    pub fn unicode(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
609        self.opts.unicode = yes;
610        self
611    }
612
613    /// Toggle whether the pattern matches case insensitively or not.
614    ///
615    /// This is disabled by default.
616    pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
617        self.opts.case_insensitive = yes;
618        self
619    }
620
621    /// Toggle whether a literal `/` is required to match a path separator.
622    pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
623        self.opts.literal_separator = yes;
624        self
625    }
626
627    /// When enabled, a back slash (`\`) may be used to escape
628    /// special characters in a glob pattern. Additionally, this will
629    /// prevent `\` from being interpreted as a path separator on all
630    /// platforms.
631    ///
632    /// This is enabled by default on platforms where `\` is not a
633    /// path separator and disabled by default on platforms where `\`
634    /// is a path separator.
635    pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
636        self.opts.backslash_escape = yes;
637        self
638    }
639}
640
641impl Tokens {
642    /// Convert this pattern to a string that is guaranteed to be a valid
643    /// regular expression and will represent the matching semantics of this
644    /// glob pattern and the options given.
645    fn to_regex_with(&self, options: &GlobOptions) -> String {
646        let mut re = String::new();
647        if !options.unicode {
648            re.push_str("(?-u)");
649        }
650        if options.case_insensitive {
651            re.push_str("(?i)");
652        }
653        re.push('^');
654        // Special case. If the entire glob is just `**`, then it should match
655        // everything.
656        if self.len() == 1 && self[0] == Token::RecursivePrefix {
657            re.push_str(".*");
658            re.push('$');
659            return re;
660        }
661        self.tokens_to_regex(options, &self, &mut re);
662        re.push('$');
663        re
664    }
665
666    fn tokens_to_regex(
667        &self,
668        options: &GlobOptions,
669        tokens: &[Token],
670        re: &mut String,
671    ) {
672        for tok in tokens {
673            match *tok {
674                Token::Literal(c) => {
675                    if !options.unicode {
676                        re.push_str(&char_to_escaped_literal(c));
677                    } else {
678                        re.push_str(&regex::escape(&c.to_string()));
679                    }
680                }
681                Token::Any => {
682                    if options.literal_separator {
683                        re.push_str("[^/]");
684                    } else {
685                        re.push_str(".");
686                    }
687                }
688                Token::ZeroOrMore => {
689                    if options.literal_separator {
690                        re.push_str("[^/]*");
691                    } else {
692                        re.push_str(".*");
693                    }
694                }
695                Token::RecursivePrefix => {
696                    re.push_str("(?:/?|.*/)");
697                }
698                Token::RecursiveSuffix => {
699                    re.push_str("(?:/?|/.*)");
700                }
701                Token::RecursiveZeroOrMore => {
702                    re.push_str("(?:/|/.*/)");
703                }
704                Token::Class { negated, ref ranges } => {
705                    re.push('[');
706                    if negated {
707                        re.push('^');
708                    }
709                    for r in ranges {
710                        if !options.unicode {
711                            if r.0 == r.1 {
712                                // Not strictly necessary, but nicer to look at.
713                                re.push_str(&char_to_escaped_literal(r.0));
714                            } else {
715                                re.push_str(&char_to_escaped_literal(r.0));
716                                re.push('-');
717                                re.push_str(&char_to_escaped_literal(r.1));
718                            }
719                        } else {
720                            if r.0 == r.1 {
721                                re.push_str(&regex::escape(&r.0.to_string()));
722                            } else {
723                                re.push_str(&regex::escape(&r.0.to_string()));
724                                re.push('-');
725                                re.push_str(&regex::escape(&r.1.to_string()));
726                            }
727                        }
728                    }
729                    re.push(']');
730                }
731                Token::Alternates(ref patterns) => {
732                    let mut parts = vec![];
733                    for pat in patterns {
734                        let mut altre = String::new();
735                        self.tokens_to_regex(options, &pat, &mut altre);
736                        if !altre.is_empty() {
737                            parts.push(altre);
738                        }
739                    }
740
741                    // It is possible to have an empty set in which case the
742                    // resulting alternation '()' would be an error.
743                    if !parts.is_empty() {
744                        re.push('(');
745                        re.push_str(&parts.join("|"));
746                        re.push(')');
747                    }
748                }
749            }
750        }
751    }
752}
753
754/// Convert a Unicode scalar value to an escaped string suitable for use as
755/// a literal in a non-Unicode regex.
756fn char_to_escaped_literal(c: char) -> String {
757    bytes_to_escaped_literal(&c.to_string().into_bytes())
758}
759
760/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
761/// code units are converted to their escaped form.
762fn bytes_to_escaped_literal(bs: &[u8]) -> String {
763    let mut s = String::with_capacity(bs.len());
764    for &b in bs {
765        if b <= 0x7F {
766            s.push_str(&regex::escape(&(b as char).to_string()));
767        } else {
768            s.push_str(&format!("\\x{:02x}", b));
769        }
770    }
771    s
772}
773
774struct Parser<'a> {
775    glob: &'a str,
776    stack: Vec<Tokens>,
777    chars: iter::Peekable<str::Chars<'a>>,
778    prev: Option<char>,
779    cur: Option<char>,
780    opts: &'a GlobOptions,
781}
782
783impl<'a> Parser<'a> {
784    fn error(&self, kind: ErrorKind) -> Error {
785        Error { glob: Some(self.glob.to_string()), kind: kind }
786    }
787
788    fn parse(&mut self) -> Result<(), Error> {
789        while let Some(c) = self.bump() {
790            match c {
791                '?' => self.push_token(Token::Any)?,
792                '*' => self.parse_star()?,
793                '[' => self.parse_class()?,
794                '{' => self.push_alternate()?,
795                '}' => self.pop_alternate()?,
796                ',' => self.parse_comma()?,
797                '\\' => self.parse_backslash()?,
798                c => self.push_token(Token::Literal(c))?,
799            }
800        }
801        Ok(())
802    }
803
804    fn push_alternate(&mut self) -> Result<(), Error> {
805        if self.stack.len() > 1 {
806            return Err(self.error(ErrorKind::NestedAlternates));
807        }
808        Ok(self.stack.push(Tokens::default()))
809    }
810
811    fn pop_alternate(&mut self) -> Result<(), Error> {
812        let mut alts = vec![];
813        while self.stack.len() >= 2 {
814            alts.push(self.stack.pop().unwrap());
815        }
816        self.push_token(Token::Alternates(alts))
817    }
818
819    fn push_token(&mut self, tok: Token) -> Result<(), Error> {
820        if let Some(ref mut pat) = self.stack.last_mut() {
821            return Ok(pat.push(tok));
822        }
823        Err(self.error(ErrorKind::UnopenedAlternates))
824    }
825
826    fn pop_token(&mut self) -> Result<Token, Error> {
827        if let Some(ref mut pat) = self.stack.last_mut() {
828            return Ok(pat.pop().unwrap());
829        }
830        Err(self.error(ErrorKind::UnopenedAlternates))
831    }
832
833    fn have_tokens(&self) -> Result<bool, Error> {
834        match self.stack.last() {
835            None => Err(self.error(ErrorKind::UnopenedAlternates)),
836            Some(ref pat) => Ok(!pat.is_empty()),
837        }
838    }
839
840    fn parse_comma(&mut self) -> Result<(), Error> {
841        // If we aren't inside a group alternation, then don't
842        // treat commas specially. Otherwise, we need to start
843        // a new alternate.
844        if self.stack.len() <= 1 {
845            self.push_token(Token::Literal(','))
846        } else {
847            Ok(self.stack.push(Tokens::default()))
848        }
849    }
850
851    fn parse_backslash(&mut self) -> Result<(), Error> {
852        match self.apply_backslash() {
853            None => Err(self.error(ErrorKind::DanglingEscape)),
854            Some(c) => self.push_token(Token::Literal(c)),
855        }
856    }
857
858    fn apply_backslash(&mut self) -> Option<char> {
859        if self.opts.backslash_escape {
860            self.bump()
861        } else if is_separator('\\') {
862            // Normalize all patterns to use / as a separator.
863            Some('/')
864        } else {
865            Some('\\')
866        }
867    }
868
869    fn parse_star(&mut self) -> Result<(), Error> {
870        let prev = self.prev;
871        if self.peek() != Some('*') {
872            self.push_token(Token::ZeroOrMore)?;
873            return Ok(());
874        }
875        assert!(self.bump() == Some('*'));
876        if !self.have_tokens()? {
877            if !self.peek().map_or(true, is_separator) {
878                self.push_token(Token::ZeroOrMore)?;
879                self.push_token(Token::ZeroOrMore)?;
880            } else {
881                self.push_token(Token::RecursivePrefix)?;
882                assert!(self.bump().map_or(true, is_separator));
883            }
884            return Ok(());
885        }
886
887        if !prev.map(is_separator).unwrap_or(false) {
888            if self.stack.len() <= 1
889                || (prev != Some(',') && prev != Some('{'))
890            {
891                self.push_token(Token::ZeroOrMore)?;
892                self.push_token(Token::ZeroOrMore)?;
893                return Ok(());
894            }
895        }
896        let is_suffix =
897            match self.peek() {
898                None => {
899                    assert!(self.bump().is_none());
900                    true
901                }
902                Some(',') | Some('}') if self.stack.len() >= 2 => {
903                    true
904                }
905                Some(c) if is_separator(c) => {
906                    assert!(self.bump().map(is_separator).unwrap_or(false));
907                    false
908                }
909                _ => {
910                    self.push_token(Token::ZeroOrMore)?;
911                    self.push_token(Token::ZeroOrMore)?;
912                    return Ok(());
913                }
914            };
915        match self.pop_token()? {
916            Token::RecursivePrefix => {
917                self.push_token(Token::RecursivePrefix)?;
918            }
919            Token::RecursiveSuffix => {
920                self.push_token(Token::RecursiveSuffix)?;
921            }
922            _ => {
923                if is_suffix {
924                    self.push_token(Token::RecursiveSuffix)?;
925                } else {
926                    self.push_token(Token::RecursiveZeroOrMore)?;
927                }
928            }
929        }
930        Ok(())
931    }
932
933    fn parse_class(&mut self) -> Result<(), Error> {
934        fn add_to_last_range(
935            glob: &str,
936            r: &mut (char, char),
937            add: char,
938        ) -> Result<(), Error> {
939            r.1 = add;
940            if r.1 < r.0 {
941                Err(Error {
942                    glob: Some(glob.to_string()),
943                    kind: ErrorKind::InvalidRange(r.0, r.1),
944                })
945            } else {
946                Ok(())
947            }
948        }
949        let mut ranges = vec![];
950        let negated = match self.chars.peek() {
951            Some(&'!') | Some(&'^') => {
952                let bump = self.bump();
953                assert!(bump == Some('!') || bump == Some('^'));
954                true
955            }
956            _ => false,
957        };
958        let mut first = true;
959        let mut in_range = false;
960        loop {
961            let c = match self.bump() {
962                Some(c) => c,
963                // The only way to successfully break this loop is to observe
964                // a ']'.
965                None => return Err(self.error(ErrorKind::UnclosedClass)),
966            };
967            match c {
968                ']' => {
969                    if first {
970                        ranges.push((']', ']'));
971                    } else {
972                        break;
973                    }
974                }
975                '-' => {
976                    if first {
977                        ranges.push(('-', '-'));
978                    } else if in_range {
979                        // invariant: in_range is only set when there is
980                        // already at least one character seen.
981                        let r = ranges.last_mut().unwrap();
982                        add_to_last_range(&self.glob, r, '-')?;
983                        in_range = false;
984                    } else {
985                        assert!(!ranges.is_empty());
986                        in_range = true;
987                    }
988                }
989                c => {
990                    let c = if c == '\\' {
991                        match self.apply_backslash() {
992                            None => return Err(self.error(ErrorKind::DanglingEscape)),
993                            Some(c) => c,
994                        }
995                    } else {
996                        c
997                    };
998                    if in_range {
999                        // invariant: in_range is only set when there is
1000                        // already at least one character seen.
1001                        add_to_last_range(
1002                            &self.glob, ranges.last_mut().unwrap(), c)?;
1003                    } else {
1004                        ranges.push((c, c));
1005                    }
1006                    in_range = false;
1007                }
1008            }
1009            first = false;
1010        }
1011        if in_range {
1012            // Means that the last character in the class was a '-', so add
1013            // it as a literal.
1014            ranges.push(('-', '-'));
1015        }
1016        self.push_token(Token::Class {
1017            negated: negated,
1018            ranges: ranges,
1019        })
1020    }
1021
1022    fn bump(&mut self) -> Option<char> {
1023        self.prev = self.cur;
1024        self.cur = self.chars.next();
1025        self.cur
1026    }
1027
1028    fn peek(&mut self) -> Option<char> {
1029        self.chars.peek().map(|&ch| ch)
1030    }
1031}
1032
1033#[cfg(test)]
1034fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1035    needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1036}
1037
1038#[cfg(test)]
1039fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1040    if needle.len() > haystack.len() {
1041        return false;
1042    }
1043    needle == &haystack[haystack.len() - needle.len()..]
1044}
1045
1046#[cfg(test)]
1047mod tests {
1048    use super::super::GlobSetBuilder;
1049    use super::{ErrorKind, Glob, GlobBuilder, Token};
1050    use super::Token::*;
1051
1052    #[derive(Clone, Copy, Debug, Default)]
1053    struct Options {
1054        unicode: Option<bool>,
1055        casei: Option<bool>,
1056        litsep: Option<bool>,
1057        bsesc: Option<bool>,
1058    }
1059
1060    macro_rules! syntax {
1061        ($name:ident, $pat:expr, $tokens:expr) => {
1062            #[test]
1063            fn $name() {
1064                let pat = Glob::new($pat).unwrap();
1065                assert_eq!($tokens, pat.tokens.0);
1066            }
1067        }
1068    }
1069
1070    macro_rules! syntaxerr {
1071        ($name:ident, $pat:expr, $err:expr) => {
1072            #[test]
1073            fn $name() {
1074                let err = Glob::new($pat).unwrap_err();
1075                assert_eq!(&$err, err.kind());
1076            }
1077        }
1078    }
1079
1080    macro_rules! toregex {
1081        ($name:ident, $pat:expr, $re:expr) => {
1082            toregex!($name, $pat, $re, Options::default());
1083        };
1084        ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1085            #[test]
1086            fn $name() {
1087                let mut builder = GlobBuilder::new($pat);
1088                if let Some(unicode) = $options.unicode {
1089                    builder.unicode(unicode);
1090                }
1091                if let Some(casei) = $options.casei {
1092                    builder.case_insensitive(casei);
1093                }
1094                if let Some(litsep) = $options.litsep {
1095                    builder.literal_separator(litsep);
1096                }
1097                if let Some(bsesc) = $options.bsesc {
1098                    builder.backslash_escape(bsesc);
1099                }
1100                let pat = builder.build().unwrap();
1101                if !$options.unicode.unwrap_or(false) {
1102                    assert_eq!(format!("(?-u){}", $re), pat.regex());
1103                } else {
1104                    assert_eq!($re, pat.regex());
1105                }
1106            }
1107        };
1108    }
1109
1110    macro_rules! matches {
1111        ($name:ident, $pat:expr, $path:expr) => {
1112            matches!($name, $pat, $path, Options::default());
1113        };
1114        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1115            #[test]
1116            fn $name() {
1117                let mut builder = GlobBuilder::new($pat);
1118                if let Some(unicode) = $options.unicode {
1119                    builder.unicode(unicode);
1120                }
1121                if let Some(casei) = $options.casei {
1122                    builder.case_insensitive(casei);
1123                }
1124                if let Some(litsep) = $options.litsep {
1125                    builder.literal_separator(litsep);
1126                }
1127                if let Some(bsesc) = $options.bsesc {
1128                    builder.backslash_escape(bsesc);
1129                }
1130                let pat = builder.build().unwrap();
1131                let matcher = pat.compile_matcher();
1132                let strategic = pat.compile_strategic_matcher();
1133                let set = GlobSetBuilder::new().add(pat).build().unwrap();
1134                assert!(matcher.is_match($path));
1135                assert!(strategic.is_match($path));
1136                assert!(set.is_match($path));
1137            }
1138        };
1139    }
1140
1141    macro_rules! nmatches {
1142        ($name:ident, $pat:expr, $path:expr) => {
1143            nmatches!($name, $pat, $path, Options::default());
1144        };
1145        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1146            #[test]
1147            fn $name() {
1148                let mut builder = GlobBuilder::new($pat);
1149                if let Some(unicode) = $options.unicode {
1150                    builder.unicode(unicode);
1151                }
1152                if let Some(casei) = $options.casei {
1153                    builder.case_insensitive(casei);
1154                }
1155                if let Some(litsep) = $options.litsep {
1156                    builder.literal_separator(litsep);
1157                }
1158                if let Some(bsesc) = $options.bsesc {
1159                    builder.backslash_escape(bsesc);
1160                }
1161                let pat = builder.build().unwrap();
1162                let matcher = pat.compile_matcher();
1163                let strategic = pat.compile_strategic_matcher();
1164                let set = GlobSetBuilder::new().add(pat).build().unwrap();
1165                assert!(!matcher.is_match($path));
1166                assert!(!strategic.is_match($path));
1167                assert!(!set.is_match($path));
1168            }
1169        };
1170    }
1171
1172    fn s(string: &str) -> String { string.to_string() }
1173
1174    fn class(s: char, e: char) -> Token {
1175        Class { negated: false, ranges: vec![(s, e)] }
1176    }
1177
1178    fn classn(s: char, e: char) -> Token {
1179        Class { negated: true, ranges: vec![(s, e)] }
1180    }
1181
1182    fn rclass(ranges: &[(char, char)]) -> Token {
1183        Class { negated: false, ranges: ranges.to_vec() }
1184    }
1185
1186    fn rclassn(ranges: &[(char, char)]) -> Token {
1187        Class { negated: true, ranges: ranges.to_vec() }
1188    }
1189
1190    syntax!(literal1, "a", vec![Literal('a')]);
1191    syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1192    syntax!(any1, "?", vec![Any]);
1193    syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1194    syntax!(seq1, "*", vec![ZeroOrMore]);
1195    syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1196    syntax!(seq3, "*a*b*", vec![
1197        ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,
1198    ]);
1199    syntax!(rseq1, "**", vec![RecursivePrefix]);
1200    syntax!(rseq2, "**/", vec![RecursivePrefix]);
1201    syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1202    syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1203    syntax!(rseq5, "a/**/b", vec![
1204        Literal('a'), RecursiveZeroOrMore, Literal('b'),
1205    ]);
1206    syntax!(cls1, "[a]", vec![class('a', 'a')]);
1207    syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1208    syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1209    syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1210    syntax!(cls5, "[-]", vec![class('-', '-')]);
1211    syntax!(cls6, "[]]", vec![class(']', ']')]);
1212    syntax!(cls7, "[*]", vec![class('*', '*')]);
1213    syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1214    syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1215    syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1216    syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1217    syntax!(cls12, "[-a-z-]", vec![
1218        rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),
1219    ]);
1220    syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1221    syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1222    syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1223    syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1224    syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1225    syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1226    syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1227    syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1228    syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1229    syntax!(cls_bsesc_1, r"\\", vec![Literal('\\')]);
1230    syntax!(cls_bsesc_2, r"[\\]", vec![class('\\', '\\')]);
1231    syntax!(cls_bsesc_3, r"[\-]", vec![class('-', '-')]);
1232    syntax!(cls_unicode_1, "☃", vec![Literal('☃')]);
1233    syntax!(cls_unicode_2, "[☃]", vec![class('☃', '☃')]);
1234
1235    syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1236    syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1237    syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1238    syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1239    syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1240    syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1241    syntaxerr!(err_bsesc1, r"\", ErrorKind::DanglingEscape);
1242    syntaxerr!(err_bsesc2, r"[\]", ErrorKind::UnclosedClass);
1243
1244    const CASEI: Options = Options {
1245        unicode: None,
1246        casei: Some(true),
1247        litsep: None,
1248        bsesc: None,
1249    };
1250    const SLASHLIT: Options = Options {
1251        unicode: None,
1252        casei: None,
1253        litsep: Some(true),
1254        bsesc: None,
1255    };
1256    const NOBSESC: Options = Options {
1257        unicode: None,
1258        casei: None,
1259        litsep: None,
1260        bsesc: Some(false),
1261    };
1262    const BSESC: Options = Options {
1263        unicode: None,
1264        casei: None,
1265        litsep: None,
1266        bsesc: Some(true),
1267    };
1268    const UNICODE: Options = Options {
1269        unicode: Some(true),
1270        casei: None,
1271        litsep: None,
1272        bsesc: None,
1273    };
1274
1275    toregex!(re_bsesc1, r"\\", r"^\\$", BSESC);
1276    toregex!(re_bsesc2, r"[\\]", r"^[\\]$", BSESC);
1277    toregex!(re_bsesc3, r"[\-]", r"^[\-]$", BSESC);
1278    toregex!(re_unicode1, "☃", "^☃$", UNICODE);
1279    toregex!(re_unicode2, "[☃]", "^[☃]$", UNICODE);
1280    matches!(match_bsesc1, r"\\", r"\", BSESC);
1281    matches!(match_bsesc2, r"[\\]", r"\", BSESC);
1282    matches!(match_bsesc3, r"[\-]", r"-", BSESC);
1283    matches!(match_bsesc4, r"{\\}", r"\", BSESC);
1284    matches!(match_bsesc5, r"{\,}", r",", BSESC);
1285    matches!(match_bsesc6, r"{\{}", r"{", BSESC);
1286    matches!(match_bsesc7, r"{\}}", r"}", BSESC);
1287    matches!(match_unicode1, "☃", "☃", UNICODE);
1288    matches!(match_unicode2, "[☃]", "☃", UNICODE);
1289    matches!(match_unicode3, "^[☃]$", "^☃$", UNICODE);
1290
1291    toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1292
1293    toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1294    toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1295
1296    toregex!(re1, "a", "^a$");
1297    toregex!(re2, "?", "^.$");
1298    toregex!(re3, "*", "^.*$");
1299    toregex!(re4, "a?", "^a.$");
1300    toregex!(re5, "?a", "^.a$");
1301    toregex!(re6, "a*", "^a.*$");
1302    toregex!(re7, "*a", "^.*a$");
1303    toregex!(re8, "[*]", r"^[\*]$");
1304    toregex!(re9, "[+]", r"^[\+]$");
1305    toregex!(re10, "+", r"^\+$");
1306    toregex!(re11, "☃", r"^\xe2\x98\x83$");
1307    toregex!(re12, "**", r"^.*$");
1308    toregex!(re13, "**/", r"^.*$");
1309    toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1310    toregex!(re15, "**/**", r"^.*$");
1311    toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1312    toregex!(re17, "**/**/**", r"^.*$");
1313    toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1314    toregex!(re19, "a/**", r"^a(?:/?|/.*)$");
1315    toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$");
1316    toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$");
1317    toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1318    toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1319    toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1320    toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1321    toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1322    toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1323    toregex!(re28, "a**", r"^a.*.*$");
1324    toregex!(re29, "**a", r"^.*.*a$");
1325    toregex!(re30, "a**b", r"^a.*.*b$");
1326    toregex!(re31, "***", r"^.*.*.*$");
1327    toregex!(re32, "/a**", r"^/a.*.*$");
1328    toregex!(re33, "/**a", r"^/.*.*a$");
1329    toregex!(re34, "/a**b", r"^/a.*.*b$");
1330
1331    matches!(match1, "a", "a");
1332    matches!(match2, "a*b", "a_b");
1333    matches!(match3, "a*b*c", "abc");
1334    matches!(match4, "a*b*c", "a_b_c");
1335    matches!(match5, "a*b*c", "a___b___c");
1336    matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1337    matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1338    matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1339    matches!(match9, "*.rs", ".rs");
1340    matches!(match10, "☃", "☃");
1341
1342    matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1343    matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1344    matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1345    matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1346    matches!(matchrec5, "**", "abcde");
1347    matches!(matchrec6, "**", "");
1348    matches!(matchrec7, "**", ".asdf");
1349    matches!(matchrec8, "**", "/x/.asdf");
1350    matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1351    matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1352    matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1353    matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1354    matches!(matchrec13, "**/test", "one/two/test");
1355    matches!(matchrec14, "**/test", "one/test");
1356    matches!(matchrec15, "**/test", "test");
1357    matches!(matchrec16, "/**/test", "/one/two/test");
1358    matches!(matchrec17, "/**/test", "/one/test");
1359    matches!(matchrec18, "/**/test", "/test");
1360    matches!(matchrec19, "**/.*", ".abc");
1361    matches!(matchrec20, "**/.*", "abc/.abc");
1362    matches!(matchrec21, ".*/**", ".abc");
1363    matches!(matchrec22, ".*/**", ".abc/abc");
1364    matches!(matchrec23, "foo/**", "foo");
1365    matches!(matchrec24, "**/foo/bar", "foo/bar");
1366    matches!(matchrec25, "some/*/needle.txt", "some/one/needle.txt");
1367
1368    matches!(matchrange1, "a[0-9]b", "a0b");
1369    matches!(matchrange2, "a[0-9]b", "a9b");
1370    matches!(matchrange3, "a[!0-9]b", "a_b");
1371    matches!(matchrange4, "[a-z123]", "1");
1372    matches!(matchrange5, "[1a-z23]", "1");
1373    matches!(matchrange6, "[123a-z]", "1");
1374    matches!(matchrange7, "[abc-]", "-");
1375    matches!(matchrange8, "[-abc]", "-");
1376    matches!(matchrange9, "[-a-c]", "b");
1377    matches!(matchrange10, "[a-c-]", "b");
1378    matches!(matchrange11, "[-]", "-");
1379    matches!(matchrange12, "a[^0-9]b", "a_b");
1380
1381    matches!(matchpat1, "*hello.txt", "hello.txt");
1382    matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1383    matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1384    matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1385    matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1386    matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1387    matches!(matchpat7, "*some/path/to/hello.txt",
1388             "a/bigger/some/path/to/hello.txt");
1389
1390    matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1391
1392    matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1393    matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1394    matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1395    matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1396
1397    matches!(matchalt1, "a,b", "a,b");
1398    matches!(matchalt2, ",", ",");
1399    matches!(matchalt3, "{a,b}", "a");
1400    matches!(matchalt4, "{a,b}", "b");
1401    matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1402    matches!(matchalt6, "{**/src/**,foo}", "foo");
1403    matches!(matchalt7, "{[}],foo}", "}");
1404    matches!(matchalt8, "{foo}", "foo");
1405    matches!(matchalt9, "{}", "");
1406    matches!(matchalt10, "{,}", "");
1407    matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1408    matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1409    matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1410
1411    matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1412    #[cfg(unix)]
1413    nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1414    #[cfg(not(unix))]
1415    nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1416    nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1417    matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1418    #[cfg(unix)]
1419    nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1420    #[cfg(not(unix))]
1421    matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1422
1423    matches!(matchbackslash1, "\\[", "[", BSESC);
1424    matches!(matchbackslash2, "\\?", "?", BSESC);
1425    matches!(matchbackslash3, "\\*", "*", BSESC);
1426    matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1427    matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1428    matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1429    #[cfg(unix)]
1430    matches!(matchbackslash7, "\\a", "a");
1431    #[cfg(not(unix))]
1432    matches!(matchbackslash8, "\\a", "/a");
1433
1434    nmatches!(matchnot1, "a*b*c", "abcd");
1435    nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1436    nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1437    nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1438    nmatches!(matchnot5, "/**/test", "test");
1439    nmatches!(matchnot6, "/**/test", "/one/notthis");
1440    nmatches!(matchnot7, "/**/test", "/notthis");
1441    nmatches!(matchnot8, "**/.*", "ab.c");
1442    nmatches!(matchnot9, "**/.*", "abc/ab.c");
1443    nmatches!(matchnot10, ".*/**", "a.bc");
1444    nmatches!(matchnot11, ".*/**", "abc/a.bc");
1445    nmatches!(matchnot12, "a[0-9]b", "a_b");
1446    nmatches!(matchnot13, "a[!0-9]b", "a0b");
1447    nmatches!(matchnot14, "a[!0-9]b", "a9b");
1448    nmatches!(matchnot15, "[!-]", "-");
1449    nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1450    nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1451    nmatches!(matchnot18, "*some/path/to/hello.txt",
1452              "some/path/to/hello.txt-and-then-some");
1453    nmatches!(matchnot19, "*some/path/to/hello.txt",
1454              "some/other/path/to/hello.txt");
1455    nmatches!(matchnot20, "a", "foo/a");
1456    nmatches!(matchnot21, "./foo", "foo");
1457    nmatches!(matchnot22, "**/foo", "foofoo");
1458    nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1459    nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1460    nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1461    nmatches!(matchnot26, "**/m4/ltoptions.m4",
1462              "csharp/src/packages/repositories.config", SLASHLIT);
1463    nmatches!(matchnot27, "a[^0-9]b", "a0b");
1464    nmatches!(matchnot28, "a[^0-9]b", "a9b");
1465    nmatches!(matchnot29, "[^-]", "-");
1466    nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1467    nmatches!(
1468        matchrec31,
1469        "some/*/needle.txt", "some/one/two/needle.txt", SLASHLIT);
1470    nmatches!(
1471        matchrec32,
1472        "some/*/needle.txt", "some/one/two/three/needle.txt", SLASHLIT);
1473
1474    macro_rules! extract {
1475        ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1476            extract!($which, $name, $pat, $expect, Options::default());
1477        };
1478        ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1479            #[test]
1480            fn $name() {
1481                let mut builder = GlobBuilder::new($pat);
1482                if let Some(unicode) = $options.unicode {
1483                    builder.unicode(unicode);
1484                }
1485                if let Some(casei) = $options.casei {
1486                    builder.case_insensitive(casei);
1487                }
1488                if let Some(litsep) = $options.litsep {
1489                    builder.literal_separator(litsep);
1490                }
1491                if let Some(bsesc) = $options.bsesc {
1492                    builder.backslash_escape(bsesc);
1493                }
1494                let pat = builder.build().unwrap();
1495                assert_eq!($expect, pat.$which());
1496            }
1497        };
1498    }
1499
1500    macro_rules! literal {
1501        ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1502    }
1503
1504    macro_rules! basetokens {
1505        ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1506    }
1507
1508    macro_rules! ext {
1509        ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1510    }
1511
1512    macro_rules! required_ext {
1513        ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1514    }
1515
1516    macro_rules! prefix {
1517        ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1518    }
1519
1520    macro_rules! suffix {
1521        ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1522    }
1523
1524    macro_rules! baseliteral {
1525        ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1526    }
1527
1528    literal!(extract_lit1, "foo", Some(s("foo")));
1529    literal!(extract_lit2, "foo", None, CASEI);
1530    literal!(extract_lit3, "/foo", Some(s("/foo")));
1531    literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1532    literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1533    literal!(extract_lit6, "*.foo", None);
1534    literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1535    literal!(extract_lit8, "**/foo/bar", None);
1536
1537    basetokens!(extract_basetoks1, "**/foo", Some(&*vec![
1538        Literal('f'), Literal('o'), Literal('o'),
1539    ]));
1540    basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1541    basetokens!(extract_basetoks3, "**/foo", Some(&*vec![
1542        Literal('f'), Literal('o'), Literal('o'),
1543    ]), SLASHLIT);
1544    basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1545    basetokens!(extract_basetoks5, "*foo", None);
1546    basetokens!(extract_basetoks6, "**/fo*o", None);
1547    basetokens!(extract_basetoks7, "**/fo*o", Some(&*vec![
1548        Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),
1549    ]), SLASHLIT);
1550
1551    ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1552    ext!(extract_ext2, "**/*.rs.bak", None);
1553    ext!(extract_ext3, "*.rs", Some(s(".rs")));
1554    ext!(extract_ext4, "a*.rs", None);
1555    ext!(extract_ext5, "/*.c", None);
1556    ext!(extract_ext6, "*.c", None, SLASHLIT);
1557    ext!(extract_ext7, "*.c", Some(s(".c")));
1558
1559    required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1560    required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1561    required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1562    required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1563    required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1564    required_ext!(extract_req_ext6, "./rs", None);
1565    required_ext!(extract_req_ext7, "foo", None);
1566    required_ext!(extract_req_ext8, ".foo/", None);
1567    required_ext!(extract_req_ext9, "foo/", None);
1568
1569    prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1570    prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1571    prefix!(extract_prefix3, "**/foo", None);
1572    prefix!(extract_prefix4, "foo/**", None);
1573
1574    suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1575    suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1576    suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1577    suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1578    suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1579    suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1580    suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1581
1582    baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1583    baseliteral!(extract_baselit2, "foo", None);
1584    baseliteral!(extract_baselit3, "*foo", None);
1585    baseliteral!(extract_baselit4, "*/foo", None);
1586}
globset/glob.rs

globset/
glob.rs