1#![deny(missing_docs)]
105
106extern crate aho_corasick;
107extern crate fnv;
108#[macro_use]
109extern crate log;
110extern crate memchr;
111extern crate regex;
112
113use std::borrow::Cow;
114use std::collections::{BTreeMap, HashMap};
115use std::error::Error as StdError;
116use std::ffi::OsStr;
117use std::fmt;
118use std::hash;
119use std::path::Path;
120use std::str;
121
122use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
123use regex::bytes::{Regex, RegexBuilder, RegexSet};
124
125use pathutil::{
126    file_name, file_name_ext, normalize_path, os_str_bytes, path_bytes,
127};
128use self::glob::MatchStrategy;
129pub use self::glob::{Glob, GlobBuilder, GlobMatcher};
130
131mod glob;
132mod pathutil;
133
134#[derive(Clone, Debug, Eq, PartialEq)]
136pub struct Error {
137    glob: Option<String>,
139    kind: ErrorKind,
141}
142
143#[derive(Clone, Debug, Eq, PartialEq)]
145pub enum ErrorKind {
146    InvalidRecursive,
154    UnclosedClass,
156    InvalidRange(char, char),
160    UnopenedAlternates,
162    UnclosedAlternates,
164    NestedAlternates,
167    DanglingEscape,
169    Regex(String),
171    #[doc(hidden)]
177    __Nonexhaustive,
178}
179
180impl StdError for Error {
181    fn description(&self) -> &str {
182        self.kind.description()
183    }
184}
185
186impl Error {
187    pub fn glob(&self) -> Option<&str> {
189        self.glob.as_ref().map(|s| &**s)
190    }
191
192    pub fn kind(&self) -> &ErrorKind {
194        &self.kind
195    }
196}
197
198impl ErrorKind {
199    fn description(&self) -> &str {
200        match *self {
201            ErrorKind::InvalidRecursive => {
202                "invalid use of **; must be one path component"
203            }
204            ErrorKind::UnclosedClass => {
205                "unclosed character class; missing ']'"
206            }
207            ErrorKind::InvalidRange(_, _) => {
208                "invalid character range"
209            }
210            ErrorKind::UnopenedAlternates => {
211                "unopened alternate group; missing '{' \
212                (maybe escape '}' with '[}]'?)"
213            }
214            ErrorKind::UnclosedAlternates => {
215                "unclosed alternate group; missing '}' \
216                (maybe escape '{' with '[{]'?)"
217            }
218            ErrorKind::NestedAlternates => {
219                "nested alternate groups are not allowed"
220            }
221            ErrorKind::DanglingEscape => {
222                "dangling '\\'"
223            }
224            ErrorKind::Regex(ref err) => err,
225            ErrorKind::__Nonexhaustive => unreachable!(),
226        }
227    }
228}
229
230impl fmt::Display for Error {
231    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
232        match self.glob {
233            None => self.kind.fmt(f),
234            Some(ref glob) => {
235                write!(f, "error parsing glob '{}': {}", glob, self.kind)
236            }
237        }
238    }
239}
240
241impl fmt::Display for ErrorKind {
242    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
243        match *self {
244            ErrorKind::InvalidRecursive
245            | ErrorKind::UnclosedClass
246            | ErrorKind::UnopenedAlternates
247            | ErrorKind::UnclosedAlternates
248            | ErrorKind::NestedAlternates
249            | ErrorKind::DanglingEscape
250            | ErrorKind::Regex(_) => {
251                write!(f, "{}", self.description())
252            }
253            ErrorKind::InvalidRange(s, e) => {
254                write!(f, "invalid range; '{}' > '{}'", s, e)
255            }
256            ErrorKind::__Nonexhaustive => unreachable!(),
257        }
258    }
259}
260
261fn new_regex(pat: &str) -> Result<Regex, Error> {
262    RegexBuilder::new(pat)
263        .dot_matches_new_line(true)
264        .size_limit(10 * (1 << 20))
265        .dfa_size_limit(10 * (1 << 20))
266        .build()
267        .map_err(|err| {
268            Error {
269                glob: Some(pat.to_string()),
270                kind: ErrorKind::Regex(err.to_string()),
271            }
272        })
273}
274
275fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
276        where S: AsRef<str>, I: IntoIterator<Item=S> {
277    RegexSet::new(pats).map_err(|err| {
278        Error {
279            glob: None,
280            kind: ErrorKind::Regex(err.to_string()),
281        }
282    })
283}
284
285type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
286
287#[derive(Clone, Debug)]
290pub struct GlobSet {
291    len: usize,
292    strats: Vec<GlobSetMatchStrategy>,
293}
294
295impl GlobSet {
296    pub fn empty() -> GlobSet {
298        GlobSet {
299            len: 0,
300            strats: vec![],
301        }
302    }
303
304    pub fn is_empty(&self) -> bool {
306        self.len == 0
307    }
308
309    pub fn len(&self) -> usize {
311        self.len
312    }
313
314    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
316        self.is_match_candidate(&Candidate::new(path.as_ref()))
317    }
318
319    pub fn is_match_candidate(&self, path: &Candidate) -> bool {
324        if self.is_empty() {
325            return false;
326        }
327        for strat in &self.strats {
328            if strat.is_match(path) {
329                return true;
330            }
331        }
332        false
333    }
334
335    pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
338        self.matches_candidate(&Candidate::new(path.as_ref()))
339    }
340
341    pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
347        let mut into = vec![];
348        if self.is_empty() {
349            return into;
350        }
351        self.matches_candidate_into(path, &mut into);
352        into
353    }
354
355    pub fn matches_into<P: AsRef<Path>>(
362        &self,
363        path: P,
364        into: &mut Vec<usize>,
365    ) {
366        self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
367    }
368
369    pub fn matches_candidate_into(
379        &self,
380        path: &Candidate,
381        into: &mut Vec<usize>,
382    ) {
383        into.clear();
384        if self.is_empty() {
385            return;
386        }
387        for strat in &self.strats {
388            strat.matches_into(path, into);
389        }
390        into.sort();
391        into.dedup();
392    }
393
394    fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
395        if pats.is_empty() {
396            return Ok(GlobSet { len: 0, strats: vec![] });
397        }
398        let mut lits = LiteralStrategy::new();
399        let mut base_lits = BasenameLiteralStrategy::new();
400        let mut exts = ExtensionStrategy::new();
401        let mut prefixes = MultiStrategyBuilder::new();
402        let mut suffixes = MultiStrategyBuilder::new();
403        let mut required_exts = RequiredExtensionStrategyBuilder::new();
404        let mut regexes = MultiStrategyBuilder::new();
405        for (i, p) in pats.iter().enumerate() {
406            match MatchStrategy::new(p) {
407                MatchStrategy::Literal(lit) => {
408                    lits.add(i, lit);
409                }
410                MatchStrategy::BasenameLiteral(lit) => {
411                    base_lits.add(i, lit);
412                }
413                MatchStrategy::Extension(ext) => {
414                    exts.add(i, ext);
415                }
416                MatchStrategy::Prefix(prefix) => {
417                    prefixes.add(i, prefix);
418                }
419                MatchStrategy::Suffix { suffix, component } => {
420                    if component {
421                        lits.add(i, suffix[1..].to_string());
422                    }
423                    suffixes.add(i, suffix);
424                }
425                MatchStrategy::RequiredExtension(ext) => {
426                    required_exts.add(i, ext, p.regex().to_owned());
427                }
428                MatchStrategy::Regex => {
429                    debug!("glob converted to regex: {:?}", p);
430                    regexes.add(i, p.regex().to_owned());
431                }
432            }
433        }
434        debug!("built glob set; {} literals, {} basenames, {} extensions, \
435                {} prefixes, {} suffixes, {} required extensions, {} regexes",
436                lits.0.len(), base_lits.0.len(), exts.0.len(),
437                prefixes.literals.len(), suffixes.literals.len(),
438                required_exts.0.len(), regexes.literals.len());
439        Ok(GlobSet {
440            len: pats.len(),
441            strats: vec![
442                GlobSetMatchStrategy::Extension(exts),
443                GlobSetMatchStrategy::BasenameLiteral(base_lits),
444                GlobSetMatchStrategy::Literal(lits),
445                GlobSetMatchStrategy::Suffix(suffixes.suffix()),
446                GlobSetMatchStrategy::Prefix(prefixes.prefix()),
447                GlobSetMatchStrategy::RequiredExtension(
448                    required_exts.build()?),
449                GlobSetMatchStrategy::Regex(regexes.regex_set()?),
450            ],
451        })
452    }
453}
454
455#[derive(Clone, Debug)]
458pub struct GlobSetBuilder {
459    pats: Vec<Glob>,
460}
461
462impl GlobSetBuilder {
463    pub fn new() -> GlobSetBuilder {
467        GlobSetBuilder { pats: vec![] }
468    }
469
470    pub fn build(&self) -> Result<GlobSet, Error> {
474        GlobSet::new(&self.pats)
475    }
476
477    pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
479        self.pats.push(pat);
480        self
481    }
482}
483
484#[derive(Clone, Debug)]
491pub struct Candidate<'a> {
492    path: Cow<'a, [u8]>,
493    basename: Cow<'a, [u8]>,
494    ext: Cow<'a, [u8]>,
495}
496
497impl<'a> Candidate<'a> {
498    pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
500        let path = path.as_ref();
501        let basename = file_name(path).unwrap_or(OsStr::new(""));
502        Candidate {
503            path: normalize_path(path_bytes(path)),
504            basename: os_str_bytes(basename),
505            ext: file_name_ext(basename).unwrap_or(Cow::Borrowed(b"")),
506        }
507    }
508
509    fn path_prefix(&self, max: usize) -> &[u8] {
510        if self.path.len() <= max {
511            &*self.path
512        } else {
513            &self.path[..max]
514        }
515    }
516
517    fn path_suffix(&self, max: usize) -> &[u8] {
518        if self.path.len() <= max {
519            &*self.path
520        } else {
521            &self.path[self.path.len() - max..]
522        }
523    }
524}
525
526#[derive(Clone, Debug)]
527enum GlobSetMatchStrategy {
528    Literal(LiteralStrategy),
529    BasenameLiteral(BasenameLiteralStrategy),
530    Extension(ExtensionStrategy),
531    Prefix(PrefixStrategy),
532    Suffix(SuffixStrategy),
533    RequiredExtension(RequiredExtensionStrategy),
534    Regex(RegexSetStrategy),
535}
536
537impl GlobSetMatchStrategy {
538    fn is_match(&self, candidate: &Candidate) -> bool {
539        use self::GlobSetMatchStrategy::*;
540        match *self {
541            Literal(ref s) => s.is_match(candidate),
542            BasenameLiteral(ref s) => s.is_match(candidate),
543            Extension(ref s) => s.is_match(candidate),
544            Prefix(ref s) => s.is_match(candidate),
545            Suffix(ref s) => s.is_match(candidate),
546            RequiredExtension(ref s) => s.is_match(candidate),
547            Regex(ref s) => s.is_match(candidate),
548        }
549    }
550
551    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
552        use self::GlobSetMatchStrategy::*;
553        match *self {
554            Literal(ref s) => s.matches_into(candidate, matches),
555            BasenameLiteral(ref s) => s.matches_into(candidate, matches),
556            Extension(ref s) => s.matches_into(candidate, matches),
557            Prefix(ref s) => s.matches_into(candidate, matches),
558            Suffix(ref s) => s.matches_into(candidate, matches),
559            RequiredExtension(ref s) => s.matches_into(candidate, matches),
560            Regex(ref s) => s.matches_into(candidate, matches),
561        }
562    }
563}
564
565#[derive(Clone, Debug)]
566struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
567
568impl LiteralStrategy {
569    fn new() -> LiteralStrategy {
570        LiteralStrategy(BTreeMap::new())
571    }
572
573    fn add(&mut self, global_index: usize, lit: String) {
574        self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
575    }
576
577    fn is_match(&self, candidate: &Candidate) -> bool {
578        self.0.contains_key(&*candidate.path)
579    }
580
581    #[inline(never)]
582    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
583        if let Some(hits) = self.0.get(&*candidate.path) {
584            matches.extend(hits);
585        }
586    }
587}
588
589#[derive(Clone, Debug)]
590struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
591
592impl BasenameLiteralStrategy {
593    fn new() -> BasenameLiteralStrategy {
594        BasenameLiteralStrategy(BTreeMap::new())
595    }
596
597    fn add(&mut self, global_index: usize, lit: String) {
598        self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
599    }
600
601    fn is_match(&self, candidate: &Candidate) -> bool {
602        if candidate.basename.is_empty() {
603            return false;
604        }
605        self.0.contains_key(&*candidate.basename)
606    }
607
608    #[inline(never)]
609    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
610        if candidate.basename.is_empty() {
611            return;
612        }
613        if let Some(hits) = self.0.get(&*candidate.basename) {
614            matches.extend(hits);
615        }
616    }
617}
618
619#[derive(Clone, Debug)]
620struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
621
622impl ExtensionStrategy {
623    fn new() -> ExtensionStrategy {
624        ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
625    }
626
627    fn add(&mut self, global_index: usize, ext: String) {
628        self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
629    }
630
631    fn is_match(&self, candidate: &Candidate) -> bool {
632        if candidate.ext.is_empty() {
633            return false;
634        }
635        self.0.contains_key(&*candidate.ext)
636    }
637
638    #[inline(never)]
639    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
640        if candidate.ext.is_empty() {
641            return;
642        }
643        if let Some(hits) = self.0.get(&*candidate.ext) {
644            matches.extend(hits);
645        }
646    }
647}
648
649#[derive(Clone, Debug)]
650struct PrefixStrategy {
651    matcher: FullAcAutomaton<Vec<u8>>,
652    map: Vec<usize>,
653    longest: usize,
654}
655
656impl PrefixStrategy {
657    fn is_match(&self, candidate: &Candidate) -> bool {
658        let path = candidate.path_prefix(self.longest);
659        for m in self.matcher.find_overlapping(path) {
660            if m.start == 0 {
661                return true;
662            }
663        }
664        false
665    }
666
667    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
668        let path = candidate.path_prefix(self.longest);
669        for m in self.matcher.find_overlapping(path) {
670            if m.start == 0 {
671                matches.push(self.map[m.pati]);
672            }
673        }
674    }
675}
676
677#[derive(Clone, Debug)]
678struct SuffixStrategy {
679    matcher: FullAcAutomaton<Vec<u8>>,
680    map: Vec<usize>,
681    longest: usize,
682}
683
684impl SuffixStrategy {
685    fn is_match(&self, candidate: &Candidate) -> bool {
686        let path = candidate.path_suffix(self.longest);
687        for m in self.matcher.find_overlapping(path) {
688            if m.end == path.len() {
689                return true;
690            }
691        }
692        false
693    }
694
695    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
696        let path = candidate.path_suffix(self.longest);
697        for m in self.matcher.find_overlapping(path) {
698            if m.end == path.len() {
699                matches.push(self.map[m.pati]);
700            }
701        }
702    }
703}
704
705#[derive(Clone, Debug)]
706struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
707
708impl RequiredExtensionStrategy {
709    fn is_match(&self, candidate: &Candidate) -> bool {
710        if candidate.ext.is_empty() {
711            return false;
712        }
713        match self.0.get(&*candidate.ext) {
714            None => false,
715            Some(regexes) => {
716                for &(_, ref re) in regexes {
717                    if re.is_match(&*candidate.path) {
718                        return true;
719                    }
720                }
721                false
722            }
723        }
724    }
725
726    #[inline(never)]
727    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
728        if candidate.ext.is_empty() {
729            return;
730        }
731        if let Some(regexes) = self.0.get(&*candidate.ext) {
732            for &(global_index, ref re) in regexes {
733                if re.is_match(&*candidate.path) {
734                    matches.push(global_index);
735                }
736            }
737        }
738    }
739}
740
741#[derive(Clone, Debug)]
742struct RegexSetStrategy {
743    matcher: RegexSet,
744    map: Vec<usize>,
745}
746
747impl RegexSetStrategy {
748    fn is_match(&self, candidate: &Candidate) -> bool {
749        self.matcher.is_match(&*candidate.path)
750    }
751
752    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
753        for i in self.matcher.matches(&*candidate.path) {
754            matches.push(self.map[i]);
755        }
756    }
757}
758
759#[derive(Clone, Debug)]
760struct MultiStrategyBuilder {
761    literals: Vec<String>,
762    map: Vec<usize>,
763    longest: usize,
764}
765
766impl MultiStrategyBuilder {
767    fn new() -> MultiStrategyBuilder {
768        MultiStrategyBuilder {
769            literals: vec![],
770            map: vec![],
771            longest: 0,
772        }
773    }
774
775    fn add(&mut self, global_index: usize, literal: String) {
776        if literal.len() > self.longest {
777            self.longest = literal.len();
778        }
779        self.map.push(global_index);
780        self.literals.push(literal);
781    }
782
783    fn prefix(self) -> PrefixStrategy {
784        let it = self.literals.into_iter().map(|s| s.into_bytes());
785        PrefixStrategy {
786            matcher: AcAutomaton::new(it).into_full(),
787            map: self.map,
788            longest: self.longest,
789        }
790    }
791
792    fn suffix(self) -> SuffixStrategy {
793        let it = self.literals.into_iter().map(|s| s.into_bytes());
794        SuffixStrategy {
795            matcher: AcAutomaton::new(it).into_full(),
796            map: self.map,
797            longest: self.longest,
798        }
799    }
800
801    fn regex_set(self) -> Result<RegexSetStrategy, Error> {
802        Ok(RegexSetStrategy {
803            matcher: new_regex_set(self.literals)?,
804            map: self.map,
805        })
806    }
807}
808
809#[derive(Clone, Debug)]
810struct RequiredExtensionStrategyBuilder(
811    HashMap<Vec<u8>, Vec<(usize, String)>>,
812);
813
814impl RequiredExtensionStrategyBuilder {
815    fn new() -> RequiredExtensionStrategyBuilder {
816        RequiredExtensionStrategyBuilder(HashMap::new())
817    }
818
819    fn add(&mut self, global_index: usize, ext: String, regex: String) {
820        self.0
821            .entry(ext.into_bytes())
822            .or_insert(vec![])
823            .push((global_index, regex));
824    }
825
826    fn build(self) -> Result<RequiredExtensionStrategy, Error> {
827        let mut exts = HashMap::with_hasher(Fnv::default());
828        for (ext, regexes) in self.0.into_iter() {
829            exts.insert(ext.clone(), vec![]);
830            for (global_index, regex) in regexes {
831                let compiled = new_regex(®ex)?;
832                exts.get_mut(&ext).unwrap().push((global_index, compiled));
833            }
834        }
835        Ok(RequiredExtensionStrategy(exts))
836    }
837}
838
839#[cfg(test)]
840mod tests {
841    use super::GlobSetBuilder;
842    use super::glob::Glob;
843
844    #[test]
845    fn set_works() {
846        let mut builder = GlobSetBuilder::new();
847        builder.add(Glob::new("src/**/*.rs").unwrap());
848        builder.add(Glob::new("*.c").unwrap());
849        builder.add(Glob::new("src/lib.rs").unwrap());
850        let set = builder.build().unwrap();
851
852        assert!(set.is_match("foo.c"));
853        assert!(set.is_match("src/foo.c"));
854        assert!(!set.is_match("foo.rs"));
855        assert!(!set.is_match("tests/foo.rs"));
856        assert!(set.is_match("src/foo.rs"));
857        assert!(set.is_match("src/grep/src/main.rs"));
858
859        let matches = set.matches("src/lib.rs");
860        assert_eq!(2, matches.len());
861        assert_eq!(0, matches[0]);
862        assert_eq!(2, matches[1]);
863    }
864
865    #[test]
866    fn empty_set_works() {
867        let set = GlobSetBuilder::new().build().unwrap();
868        assert!(!set.is_match(""));
869        assert!(!set.is_match("a"));
870    }
871}