globset/
lib.rs

1/*!
2The globset crate provides cross platform single glob and glob set matching.
3
4Glob set matching is the process of matching one or more glob patterns against
5a single candidate path simultaneously, and returning all of the globs that
6matched. For example, given this set of globs:
7
8```ignore
9*.rs
10src/lib.rs
11src/**/foo.rs
12```
13
14and a path `src/bar/baz/foo.rs`, then the set would report the first and third
15globs as matching.
16
17# Example: one glob
18
19This example shows how to match a single glob against a single file path.
20
21```
22# fn example() -> Result<(), globset::Error> {
23use globset::Glob;
24
25let glob = Glob::new("*.rs")?.compile_matcher();
26
27assert!(glob.is_match("foo.rs"));
28assert!(glob.is_match("foo/bar.rs"));
29assert!(!glob.is_match("Cargo.toml"));
30# Ok(()) } example().unwrap();
31```
32
33# Example: configuring a glob matcher
34
35This example shows how to use a `GlobBuilder` to configure aspects of match
36semantics. In this example, we prevent wildcards from matching path separators.
37
38```
39# fn example() -> Result<(), globset::Error> {
40use globset::GlobBuilder;
41
42let glob = GlobBuilder::new("*.rs")
43    .literal_separator(true).build()?.compile_matcher();
44
45assert!(glob.is_match("foo.rs"));
46assert!(!glob.is_match("foo/bar.rs")); // no longer matches
47assert!(!glob.is_match("Cargo.toml"));
48# Ok(()) } example().unwrap();
49```
50
51# Example: match multiple globs at once
52
53This example shows how to match multiple glob patterns at once.
54
55```
56# fn example() -> Result<(), globset::Error> {
57use globset::{Glob, GlobSetBuilder};
58
59let mut builder = GlobSetBuilder::new();
60// A GlobBuilder can be used to configure each glob's match semantics
61// independently.
62builder.add(Glob::new("*.rs")?);
63builder.add(Glob::new("src/lib.rs")?);
64builder.add(Glob::new("src/**/foo.rs")?);
65let set = builder.build()?;
66
67assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
68# Ok(()) } example().unwrap();
69```
70
71# Syntax
72
73Standard Unix-style glob syntax is supported:
74
75* `?` matches any single character. (If the `literal_separator` option is
76  enabled, then `?` can never match a path separator.)
77* `*` matches zero or more characters. (If the `literal_separator` option is
78  enabled, then `*` can never match a path separator.)
79* `**` recursively matches directories but are only legal in three situations.
80  First, if the glob starts with <code>\*\*&#x2F;</code>, then it matches
81  all directories. For example, <code>\*\*&#x2F;foo</code> matches `foo`
82  and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
83  <code>&#x2F;\*\*</code>, then it matches all sub-entries. For example,
84  <code>foo&#x2F;\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
85  Thirdly, if the glob contains <code>&#x2F;\*\*&#x2F;</code> anywhere within
86  the pattern, then it matches zero or more directories. Using `**` anywhere
87  else is illegal (N.B. the glob `**` is allowed and means "match everything").
88* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
89  (N.B. Nesting `{...}` is not currently allowed.)
90* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
91  `[!ab]` to match any character except for `a` and `b`.
92* Metacharacters such as `*` and `?` can be escaped with character class
93  notation. e.g., `[*]` matches `*`.
94* When backslash escapes are enabled, a backslash (`\`) will escape all meta
95  characters in a glob. If it precedes a non-meta character, then the slash is
96  ignored. A `\\` will match a literal `\\`. Note that this mode is only
97  enabled on Unix platforms by default, but can be enabled on any platform
98  via the `backslash_escape` setting on `Glob`.
99
100A `GlobBuilder` can be used to prevent wildcards from matching path separators,
101or to enable case insensitive matching.
102*/
103
104#![deny(missing_docs)]
105
106extern crate aho_corasick;
107extern crate fnv;
108#[macro_use]
109extern crate log;
110extern crate memchr;
111extern crate regex;
112
113use std::borrow::Cow;
114use std::collections::{BTreeMap, HashMap};
115use std::error::Error as StdError;
116use std::ffi::OsStr;
117use std::fmt;
118use std::hash;
119use std::path::Path;
120use std::str;
121
122use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
123use regex::bytes::{Regex, RegexBuilder, RegexSet};
124
125use pathutil::{
126    file_name, file_name_ext, normalize_path, os_str_bytes, path_bytes,
127};
128use self::glob::MatchStrategy;
129pub use self::glob::{Glob, GlobBuilder, GlobMatcher};
130
131mod glob;
132mod pathutil;
133
134/// Represents an error that can occur when parsing a glob pattern.
135#[derive(Clone, Debug, Eq, PartialEq)]
136pub struct Error {
137    /// The original glob provided by the caller.
138    glob: Option<String>,
139    /// The kind of error.
140    kind: ErrorKind,
141}
142
143/// The kind of error that can occur when parsing a glob pattern.
144#[derive(Clone, Debug, Eq, PartialEq)]
145pub enum ErrorKind {
146    /// **DEPRECATED**.
147    ///
148    /// This error used to occur for consistency with git's glob specification,
149    /// but the specification now accepts all uses of `**`. When `**` does not
150    /// appear adjacent to a path separator or at the beginning/end of a glob,
151    /// it is now treated as two consecutive `*` patterns. As such, this error
152    /// is no longer used.
153    InvalidRecursive,
154    /// Occurs when a character class (e.g., `[abc]`) is not closed.
155    UnclosedClass,
156    /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
157    /// example, if the range starts with a lexicographically larger character
158    /// than it ends with.
159    InvalidRange(char, char),
160    /// Occurs when a `}` is found without a matching `{`.
161    UnopenedAlternates,
162    /// Occurs when a `{` is found without a matching `}`.
163    UnclosedAlternates,
164    /// Occurs when an alternating group is nested inside another alternating
165    /// group, e.g., `{{a,b},{c,d}}`.
166    NestedAlternates,
167    /// Occurs when an unescaped '\' is found at the end of a glob.
168    DanglingEscape,
169    /// An error associated with parsing or compiling a regex.
170    Regex(String),
171    /// Hints that destructuring should not be exhaustive.
172    ///
173    /// This enum may grow additional variants, so this makes sure clients
174    /// don't count on exhaustive matching. (Otherwise, adding a new variant
175    /// could break existing code.)
176    #[doc(hidden)]
177    __Nonexhaustive,
178}
179
180impl StdError for Error {
181    fn description(&self) -> &str {
182        self.kind.description()
183    }
184}
185
186impl Error {
187    /// Return the glob that caused this error, if one exists.
188    pub fn glob(&self) -> Option<&str> {
189        self.glob.as_ref().map(|s| &**s)
190    }
191
192    /// Return the kind of this error.
193    pub fn kind(&self) -> &ErrorKind {
194        &self.kind
195    }
196}
197
198impl ErrorKind {
199    fn description(&self) -> &str {
200        match *self {
201            ErrorKind::InvalidRecursive => {
202                "invalid use of **; must be one path component"
203            }
204            ErrorKind::UnclosedClass => {
205                "unclosed character class; missing ']'"
206            }
207            ErrorKind::InvalidRange(_, _) => {
208                "invalid character range"
209            }
210            ErrorKind::UnopenedAlternates => {
211                "unopened alternate group; missing '{' \
212                (maybe escape '}' with '[}]'?)"
213            }
214            ErrorKind::UnclosedAlternates => {
215                "unclosed alternate group; missing '}' \
216                (maybe escape '{' with '[{]'?)"
217            }
218            ErrorKind::NestedAlternates => {
219                "nested alternate groups are not allowed"
220            }
221            ErrorKind::DanglingEscape => {
222                "dangling '\\'"
223            }
224            ErrorKind::Regex(ref err) => err,
225            ErrorKind::__Nonexhaustive => unreachable!(),
226        }
227    }
228}
229
230impl fmt::Display for Error {
231    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
232        match self.glob {
233            None => self.kind.fmt(f),
234            Some(ref glob) => {
235                write!(f, "error parsing glob '{}': {}", glob, self.kind)
236            }
237        }
238    }
239}
240
241impl fmt::Display for ErrorKind {
242    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
243        match *self {
244            ErrorKind::InvalidRecursive
245            | ErrorKind::UnclosedClass
246            | ErrorKind::UnopenedAlternates
247            | ErrorKind::UnclosedAlternates
248            | ErrorKind::NestedAlternates
249            | ErrorKind::DanglingEscape
250            | ErrorKind::Regex(_) => {
251                write!(f, "{}", self.description())
252            }
253            ErrorKind::InvalidRange(s, e) => {
254                write!(f, "invalid range; '{}' > '{}'", s, e)
255            }
256            ErrorKind::__Nonexhaustive => unreachable!(),
257        }
258    }
259}
260
261fn new_regex(pat: &str) -> Result<Regex, Error> {
262    RegexBuilder::new(pat)
263        .dot_matches_new_line(true)
264        .size_limit(10 * (1 << 20))
265        .dfa_size_limit(10 * (1 << 20))
266        .build()
267        .map_err(|err| {
268            Error {
269                glob: Some(pat.to_string()),
270                kind: ErrorKind::Regex(err.to_string()),
271            }
272        })
273}
274
275fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
276        where S: AsRef<str>, I: IntoIterator<Item=S> {
277    RegexSet::new(pats).map_err(|err| {
278        Error {
279            glob: None,
280            kind: ErrorKind::Regex(err.to_string()),
281        }
282    })
283}
284
285type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
286
287/// GlobSet represents a group of globs that can be matched together in a
288/// single pass.
289#[derive(Clone, Debug)]
290pub struct GlobSet {
291    len: usize,
292    strats: Vec<GlobSetMatchStrategy>,
293}
294
295impl GlobSet {
296    /// Create an empty `GlobSet`. An empty set matches nothing.
297    pub fn empty() -> GlobSet {
298        GlobSet {
299            len: 0,
300            strats: vec![],
301        }
302    }
303
304    /// Returns true if this set is empty, and therefore matches nothing.
305    pub fn is_empty(&self) -> bool {
306        self.len == 0
307    }
308
309    /// Returns the number of globs in this set.
310    pub fn len(&self) -> usize {
311        self.len
312    }
313
314    /// Returns true if any glob in this set matches the path given.
315    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
316        self.is_match_candidate(&Candidate::new(path.as_ref()))
317    }
318
319    /// Returns true if any glob in this set matches the path given.
320    ///
321    /// This takes a Candidate as input, which can be used to amortize the
322    /// cost of preparing a path for matching.
323    pub fn is_match_candidate(&self, path: &Candidate) -> bool {
324        if self.is_empty() {
325            return false;
326        }
327        for strat in &self.strats {
328            if strat.is_match(path) {
329                return true;
330            }
331        }
332        false
333    }
334
335    /// Returns the sequence number of every glob pattern that matches the
336    /// given path.
337    pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
338        self.matches_candidate(&Candidate::new(path.as_ref()))
339    }
340
341    /// Returns the sequence number of every glob pattern that matches the
342    /// given path.
343    ///
344    /// This takes a Candidate as input, which can be used to amortize the
345    /// cost of preparing a path for matching.
346    pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
347        let mut into = vec![];
348        if self.is_empty() {
349            return into;
350        }
351        self.matches_candidate_into(path, &mut into);
352        into
353    }
354
355    /// Adds the sequence number of every glob pattern that matches the given
356    /// path to the vec given.
357    ///
358    /// `into` is is cleared before matching begins, and contains the set of
359    /// sequence numbers (in ascending order) after matching ends. If no globs
360    /// were matched, then `into` will be empty.
361    pub fn matches_into<P: AsRef<Path>>(
362        &self,
363        path: P,
364        into: &mut Vec<usize>,
365    ) {
366        self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
367    }
368
369    /// Adds the sequence number of every glob pattern that matches the given
370    /// path to the vec given.
371    ///
372    /// `into` is is cleared before matching begins, and contains the set of
373    /// sequence numbers (in ascending order) after matching ends. If no globs
374    /// were matched, then `into` will be empty.
375    ///
376    /// This takes a Candidate as input, which can be used to amortize the
377    /// cost of preparing a path for matching.
378    pub fn matches_candidate_into(
379        &self,
380        path: &Candidate,
381        into: &mut Vec<usize>,
382    ) {
383        into.clear();
384        if self.is_empty() {
385            return;
386        }
387        for strat in &self.strats {
388            strat.matches_into(path, into);
389        }
390        into.sort();
391        into.dedup();
392    }
393
394    fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
395        if pats.is_empty() {
396            return Ok(GlobSet { len: 0, strats: vec![] });
397        }
398        let mut lits = LiteralStrategy::new();
399        let mut base_lits = BasenameLiteralStrategy::new();
400        let mut exts = ExtensionStrategy::new();
401        let mut prefixes = MultiStrategyBuilder::new();
402        let mut suffixes = MultiStrategyBuilder::new();
403        let mut required_exts = RequiredExtensionStrategyBuilder::new();
404        let mut regexes = MultiStrategyBuilder::new();
405        for (i, p) in pats.iter().enumerate() {
406            match MatchStrategy::new(p) {
407                MatchStrategy::Literal(lit) => {
408                    lits.add(i, lit);
409                }
410                MatchStrategy::BasenameLiteral(lit) => {
411                    base_lits.add(i, lit);
412                }
413                MatchStrategy::Extension(ext) => {
414                    exts.add(i, ext);
415                }
416                MatchStrategy::Prefix(prefix) => {
417                    prefixes.add(i, prefix);
418                }
419                MatchStrategy::Suffix { suffix, component } => {
420                    if component {
421                        lits.add(i, suffix[1..].to_string());
422                    }
423                    suffixes.add(i, suffix);
424                }
425                MatchStrategy::RequiredExtension(ext) => {
426                    required_exts.add(i, ext, p.regex().to_owned());
427                }
428                MatchStrategy::Regex => {
429                    debug!("glob converted to regex: {:?}", p);
430                    regexes.add(i, p.regex().to_owned());
431                }
432            }
433        }
434        debug!("built glob set; {} literals, {} basenames, {} extensions, \
435                {} prefixes, {} suffixes, {} required extensions, {} regexes",
436                lits.0.len(), base_lits.0.len(), exts.0.len(),
437                prefixes.literals.len(), suffixes.literals.len(),
438                required_exts.0.len(), regexes.literals.len());
439        Ok(GlobSet {
440            len: pats.len(),
441            strats: vec![
442                GlobSetMatchStrategy::Extension(exts),
443                GlobSetMatchStrategy::BasenameLiteral(base_lits),
444                GlobSetMatchStrategy::Literal(lits),
445                GlobSetMatchStrategy::Suffix(suffixes.suffix()),
446                GlobSetMatchStrategy::Prefix(prefixes.prefix()),
447                GlobSetMatchStrategy::RequiredExtension(
448                    required_exts.build()?),
449                GlobSetMatchStrategy::Regex(regexes.regex_set()?),
450            ],
451        })
452    }
453}
454
455/// GlobSetBuilder builds a group of patterns that can be used to
456/// simultaneously match a file path.
457#[derive(Clone, Debug)]
458pub struct GlobSetBuilder {
459    pats: Vec<Glob>,
460}
461
462impl GlobSetBuilder {
463    /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new
464    /// patterns. Once all patterns have been added, `build` should be called
465    /// to produce a `GlobSet`, which can then be used for matching.
466    pub fn new() -> GlobSetBuilder {
467        GlobSetBuilder { pats: vec![] }
468    }
469
470    /// Builds a new matcher from all of the glob patterns added so far.
471    ///
472    /// Once a matcher is built, no new patterns can be added to it.
473    pub fn build(&self) -> Result<GlobSet, Error> {
474        GlobSet::new(&self.pats)
475    }
476
477    /// Add a new pattern to this set.
478    pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
479        self.pats.push(pat);
480        self
481    }
482}
483
484/// A candidate path for matching.
485///
486/// All glob matching in this crate operates on `Candidate` values.
487/// Constructing candidates has a very small cost associated with it, so
488/// callers may find it beneficial to amortize that cost when matching a single
489/// path against multiple globs or sets of globs.
490#[derive(Clone, Debug)]
491pub struct Candidate<'a> {
492    path: Cow<'a, [u8]>,
493    basename: Cow<'a, [u8]>,
494    ext: Cow<'a, [u8]>,
495}
496
497impl<'a> Candidate<'a> {
498    /// Create a new candidate for matching from the given path.
499    pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
500        let path = path.as_ref();
501        let basename = file_name(path).unwrap_or(OsStr::new(""));
502        Candidate {
503            path: normalize_path(path_bytes(path)),
504            basename: os_str_bytes(basename),
505            ext: file_name_ext(basename).unwrap_or(Cow::Borrowed(b"")),
506        }
507    }
508
509    fn path_prefix(&self, max: usize) -> &[u8] {
510        if self.path.len() <= max {
511            &*self.path
512        } else {
513            &self.path[..max]
514        }
515    }
516
517    fn path_suffix(&self, max: usize) -> &[u8] {
518        if self.path.len() <= max {
519            &*self.path
520        } else {
521            &self.path[self.path.len() - max..]
522        }
523    }
524}
525
526#[derive(Clone, Debug)]
527enum GlobSetMatchStrategy {
528    Literal(LiteralStrategy),
529    BasenameLiteral(BasenameLiteralStrategy),
530    Extension(ExtensionStrategy),
531    Prefix(PrefixStrategy),
532    Suffix(SuffixStrategy),
533    RequiredExtension(RequiredExtensionStrategy),
534    Regex(RegexSetStrategy),
535}
536
537impl GlobSetMatchStrategy {
538    fn is_match(&self, candidate: &Candidate) -> bool {
539        use self::GlobSetMatchStrategy::*;
540        match *self {
541            Literal(ref s) => s.is_match(candidate),
542            BasenameLiteral(ref s) => s.is_match(candidate),
543            Extension(ref s) => s.is_match(candidate),
544            Prefix(ref s) => s.is_match(candidate),
545            Suffix(ref s) => s.is_match(candidate),
546            RequiredExtension(ref s) => s.is_match(candidate),
547            Regex(ref s) => s.is_match(candidate),
548        }
549    }
550
551    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
552        use self::GlobSetMatchStrategy::*;
553        match *self {
554            Literal(ref s) => s.matches_into(candidate, matches),
555            BasenameLiteral(ref s) => s.matches_into(candidate, matches),
556            Extension(ref s) => s.matches_into(candidate, matches),
557            Prefix(ref s) => s.matches_into(candidate, matches),
558            Suffix(ref s) => s.matches_into(candidate, matches),
559            RequiredExtension(ref s) => s.matches_into(candidate, matches),
560            Regex(ref s) => s.matches_into(candidate, matches),
561        }
562    }
563}
564
565#[derive(Clone, Debug)]
566struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
567
568impl LiteralStrategy {
569    fn new() -> LiteralStrategy {
570        LiteralStrategy(BTreeMap::new())
571    }
572
573    fn add(&mut self, global_index: usize, lit: String) {
574        self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
575    }
576
577    fn is_match(&self, candidate: &Candidate) -> bool {
578        self.0.contains_key(&*candidate.path)
579    }
580
581    #[inline(never)]
582    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
583        if let Some(hits) = self.0.get(&*candidate.path) {
584            matches.extend(hits);
585        }
586    }
587}
588
589#[derive(Clone, Debug)]
590struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
591
592impl BasenameLiteralStrategy {
593    fn new() -> BasenameLiteralStrategy {
594        BasenameLiteralStrategy(BTreeMap::new())
595    }
596
597    fn add(&mut self, global_index: usize, lit: String) {
598        self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
599    }
600
601    fn is_match(&self, candidate: &Candidate) -> bool {
602        if candidate.basename.is_empty() {
603            return false;
604        }
605        self.0.contains_key(&*candidate.basename)
606    }
607
608    #[inline(never)]
609    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
610        if candidate.basename.is_empty() {
611            return;
612        }
613        if let Some(hits) = self.0.get(&*candidate.basename) {
614            matches.extend(hits);
615        }
616    }
617}
618
619#[derive(Clone, Debug)]
620struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
621
622impl ExtensionStrategy {
623    fn new() -> ExtensionStrategy {
624        ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
625    }
626
627    fn add(&mut self, global_index: usize, ext: String) {
628        self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
629    }
630
631    fn is_match(&self, candidate: &Candidate) -> bool {
632        if candidate.ext.is_empty() {
633            return false;
634        }
635        self.0.contains_key(&*candidate.ext)
636    }
637
638    #[inline(never)]
639    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
640        if candidate.ext.is_empty() {
641            return;
642        }
643        if let Some(hits) = self.0.get(&*candidate.ext) {
644            matches.extend(hits);
645        }
646    }
647}
648
649#[derive(Clone, Debug)]
650struct PrefixStrategy {
651    matcher: FullAcAutomaton<Vec<u8>>,
652    map: Vec<usize>,
653    longest: usize,
654}
655
656impl PrefixStrategy {
657    fn is_match(&self, candidate: &Candidate) -> bool {
658        let path = candidate.path_prefix(self.longest);
659        for m in self.matcher.find_overlapping(path) {
660            if m.start == 0 {
661                return true;
662            }
663        }
664        false
665    }
666
667    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
668        let path = candidate.path_prefix(self.longest);
669        for m in self.matcher.find_overlapping(path) {
670            if m.start == 0 {
671                matches.push(self.map[m.pati]);
672            }
673        }
674    }
675}
676
677#[derive(Clone, Debug)]
678struct SuffixStrategy {
679    matcher: FullAcAutomaton<Vec<u8>>,
680    map: Vec<usize>,
681    longest: usize,
682}
683
684impl SuffixStrategy {
685    fn is_match(&self, candidate: &Candidate) -> bool {
686        let path = candidate.path_suffix(self.longest);
687        for m in self.matcher.find_overlapping(path) {
688            if m.end == path.len() {
689                return true;
690            }
691        }
692        false
693    }
694
695    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
696        let path = candidate.path_suffix(self.longest);
697        for m in self.matcher.find_overlapping(path) {
698            if m.end == path.len() {
699                matches.push(self.map[m.pati]);
700            }
701        }
702    }
703}
704
705#[derive(Clone, Debug)]
706struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
707
708impl RequiredExtensionStrategy {
709    fn is_match(&self, candidate: &Candidate) -> bool {
710        if candidate.ext.is_empty() {
711            return false;
712        }
713        match self.0.get(&*candidate.ext) {
714            None => false,
715            Some(regexes) => {
716                for &(_, ref re) in regexes {
717                    if re.is_match(&*candidate.path) {
718                        return true;
719                    }
720                }
721                false
722            }
723        }
724    }
725
726    #[inline(never)]
727    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
728        if candidate.ext.is_empty() {
729            return;
730        }
731        if let Some(regexes) = self.0.get(&*candidate.ext) {
732            for &(global_index, ref re) in regexes {
733                if re.is_match(&*candidate.path) {
734                    matches.push(global_index);
735                }
736            }
737        }
738    }
739}
740
741#[derive(Clone, Debug)]
742struct RegexSetStrategy {
743    matcher: RegexSet,
744    map: Vec<usize>,
745}
746
747impl RegexSetStrategy {
748    fn is_match(&self, candidate: &Candidate) -> bool {
749        self.matcher.is_match(&*candidate.path)
750    }
751
752    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
753        for i in self.matcher.matches(&*candidate.path) {
754            matches.push(self.map[i]);
755        }
756    }
757}
758
759#[derive(Clone, Debug)]
760struct MultiStrategyBuilder {
761    literals: Vec<String>,
762    map: Vec<usize>,
763    longest: usize,
764}
765
766impl MultiStrategyBuilder {
767    fn new() -> MultiStrategyBuilder {
768        MultiStrategyBuilder {
769            literals: vec![],
770            map: vec![],
771            longest: 0,
772        }
773    }
774
775    fn add(&mut self, global_index: usize, literal: String) {
776        if literal.len() > self.longest {
777            self.longest = literal.len();
778        }
779        self.map.push(global_index);
780        self.literals.push(literal);
781    }
782
783    fn prefix(self) -> PrefixStrategy {
784        let it = self.literals.into_iter().map(|s| s.into_bytes());
785        PrefixStrategy {
786            matcher: AcAutomaton::new(it).into_full(),
787            map: self.map,
788            longest: self.longest,
789        }
790    }
791
792    fn suffix(self) -> SuffixStrategy {
793        let it = self.literals.into_iter().map(|s| s.into_bytes());
794        SuffixStrategy {
795            matcher: AcAutomaton::new(it).into_full(),
796            map: self.map,
797            longest: self.longest,
798        }
799    }
800
801    fn regex_set(self) -> Result<RegexSetStrategy, Error> {
802        Ok(RegexSetStrategy {
803            matcher: new_regex_set(self.literals)?,
804            map: self.map,
805        })
806    }
807}
808
809#[derive(Clone, Debug)]
810struct RequiredExtensionStrategyBuilder(
811    HashMap<Vec<u8>, Vec<(usize, String)>>,
812);
813
814impl RequiredExtensionStrategyBuilder {
815    fn new() -> RequiredExtensionStrategyBuilder {
816        RequiredExtensionStrategyBuilder(HashMap::new())
817    }
818
819    fn add(&mut self, global_index: usize, ext: String, regex: String) {
820        self.0
821            .entry(ext.into_bytes())
822            .or_insert(vec![])
823            .push((global_index, regex));
824    }
825
826    fn build(self) -> Result<RequiredExtensionStrategy, Error> {
827        let mut exts = HashMap::with_hasher(Fnv::default());
828        for (ext, regexes) in self.0.into_iter() {
829            exts.insert(ext.clone(), vec![]);
830            for (global_index, regex) in regexes {
831                let compiled = new_regex(&regex)?;
832                exts.get_mut(&ext).unwrap().push((global_index, compiled));
833            }
834        }
835        Ok(RequiredExtensionStrategy(exts))
836    }
837}
838
839#[cfg(test)]
840mod tests {
841    use super::GlobSetBuilder;
842    use super::glob::Glob;
843
844    #[test]
845    fn set_works() {
846        let mut builder = GlobSetBuilder::new();
847        builder.add(Glob::new("src/**/*.rs").unwrap());
848        builder.add(Glob::new("*.c").unwrap());
849        builder.add(Glob::new("src/lib.rs").unwrap());
850        let set = builder.build().unwrap();
851
852        assert!(set.is_match("foo.c"));
853        assert!(set.is_match("src/foo.c"));
854        assert!(!set.is_match("foo.rs"));
855        assert!(!set.is_match("tests/foo.rs"));
856        assert!(set.is_match("src/foo.rs"));
857        assert!(set.is_match("src/grep/src/main.rs"));
858
859        let matches = set.matches("src/lib.rs");
860        assert_eq!(2, matches.len());
861        assert_eq!(0, matches[0]);
862        assert_eq!(2, matches[1]);
863    }
864
865    #[test]
866    fn empty_set_works() {
867        let set = GlobSetBuilder::new().build().unwrap();
868        assert!(!set.is_match(""));
869        assert!(!set.is_match("a"));
870    }
871}