1#![deny(missing_docs)]
105
106extern crate aho_corasick;
107extern crate fnv;
108#[macro_use]
109extern crate log;
110extern crate memchr;
111extern crate regex;
112
113use std::borrow::Cow;
114use std::collections::{BTreeMap, HashMap};
115use std::error::Error as StdError;
116use std::ffi::OsStr;
117use std::fmt;
118use std::hash;
119use std::path::Path;
120use std::str;
121
122use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
123use regex::bytes::{Regex, RegexBuilder, RegexSet};
124
125use pathutil::{
126 file_name, file_name_ext, normalize_path, os_str_bytes, path_bytes,
127};
128use self::glob::MatchStrategy;
129pub use self::glob::{Glob, GlobBuilder, GlobMatcher};
130
131mod glob;
132mod pathutil;
133
134#[derive(Clone, Debug, Eq, PartialEq)]
136pub struct Error {
137 glob: Option<String>,
139 kind: ErrorKind,
141}
142
143#[derive(Clone, Debug, Eq, PartialEq)]
145pub enum ErrorKind {
146 InvalidRecursive,
154 UnclosedClass,
156 InvalidRange(char, char),
160 UnopenedAlternates,
162 UnclosedAlternates,
164 NestedAlternates,
167 DanglingEscape,
169 Regex(String),
171 #[doc(hidden)]
177 __Nonexhaustive,
178}
179
180impl StdError for Error {
181 fn description(&self) -> &str {
182 self.kind.description()
183 }
184}
185
186impl Error {
187 pub fn glob(&self) -> Option<&str> {
189 self.glob.as_ref().map(|s| &**s)
190 }
191
192 pub fn kind(&self) -> &ErrorKind {
194 &self.kind
195 }
196}
197
198impl ErrorKind {
199 fn description(&self) -> &str {
200 match *self {
201 ErrorKind::InvalidRecursive => {
202 "invalid use of **; must be one path component"
203 }
204 ErrorKind::UnclosedClass => {
205 "unclosed character class; missing ']'"
206 }
207 ErrorKind::InvalidRange(_, _) => {
208 "invalid character range"
209 }
210 ErrorKind::UnopenedAlternates => {
211 "unopened alternate group; missing '{' \
212 (maybe escape '}' with '[}]'?)"
213 }
214 ErrorKind::UnclosedAlternates => {
215 "unclosed alternate group; missing '}' \
216 (maybe escape '{' with '[{]'?)"
217 }
218 ErrorKind::NestedAlternates => {
219 "nested alternate groups are not allowed"
220 }
221 ErrorKind::DanglingEscape => {
222 "dangling '\\'"
223 }
224 ErrorKind::Regex(ref err) => err,
225 ErrorKind::__Nonexhaustive => unreachable!(),
226 }
227 }
228}
229
230impl fmt::Display for Error {
231 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
232 match self.glob {
233 None => self.kind.fmt(f),
234 Some(ref glob) => {
235 write!(f, "error parsing glob '{}': {}", glob, self.kind)
236 }
237 }
238 }
239}
240
241impl fmt::Display for ErrorKind {
242 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
243 match *self {
244 ErrorKind::InvalidRecursive
245 | ErrorKind::UnclosedClass
246 | ErrorKind::UnopenedAlternates
247 | ErrorKind::UnclosedAlternates
248 | ErrorKind::NestedAlternates
249 | ErrorKind::DanglingEscape
250 | ErrorKind::Regex(_) => {
251 write!(f, "{}", self.description())
252 }
253 ErrorKind::InvalidRange(s, e) => {
254 write!(f, "invalid range; '{}' > '{}'", s, e)
255 }
256 ErrorKind::__Nonexhaustive => unreachable!(),
257 }
258 }
259}
260
261fn new_regex(pat: &str) -> Result<Regex, Error> {
262 RegexBuilder::new(pat)
263 .dot_matches_new_line(true)
264 .size_limit(10 * (1 << 20))
265 .dfa_size_limit(10 * (1 << 20))
266 .build()
267 .map_err(|err| {
268 Error {
269 glob: Some(pat.to_string()),
270 kind: ErrorKind::Regex(err.to_string()),
271 }
272 })
273}
274
275fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
276 where S: AsRef<str>, I: IntoIterator<Item=S> {
277 RegexSet::new(pats).map_err(|err| {
278 Error {
279 glob: None,
280 kind: ErrorKind::Regex(err.to_string()),
281 }
282 })
283}
284
285type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
286
287#[derive(Clone, Debug)]
290pub struct GlobSet {
291 len: usize,
292 strats: Vec<GlobSetMatchStrategy>,
293}
294
295impl GlobSet {
296 pub fn empty() -> GlobSet {
298 GlobSet {
299 len: 0,
300 strats: vec![],
301 }
302 }
303
304 pub fn is_empty(&self) -> bool {
306 self.len == 0
307 }
308
309 pub fn len(&self) -> usize {
311 self.len
312 }
313
314 pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
316 self.is_match_candidate(&Candidate::new(path.as_ref()))
317 }
318
319 pub fn is_match_candidate(&self, path: &Candidate) -> bool {
324 if self.is_empty() {
325 return false;
326 }
327 for strat in &self.strats {
328 if strat.is_match(path) {
329 return true;
330 }
331 }
332 false
333 }
334
335 pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
338 self.matches_candidate(&Candidate::new(path.as_ref()))
339 }
340
341 pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
347 let mut into = vec![];
348 if self.is_empty() {
349 return into;
350 }
351 self.matches_candidate_into(path, &mut into);
352 into
353 }
354
355 pub fn matches_into<P: AsRef<Path>>(
362 &self,
363 path: P,
364 into: &mut Vec<usize>,
365 ) {
366 self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
367 }
368
369 pub fn matches_candidate_into(
379 &self,
380 path: &Candidate,
381 into: &mut Vec<usize>,
382 ) {
383 into.clear();
384 if self.is_empty() {
385 return;
386 }
387 for strat in &self.strats {
388 strat.matches_into(path, into);
389 }
390 into.sort();
391 into.dedup();
392 }
393
394 fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
395 if pats.is_empty() {
396 return Ok(GlobSet { len: 0, strats: vec![] });
397 }
398 let mut lits = LiteralStrategy::new();
399 let mut base_lits = BasenameLiteralStrategy::new();
400 let mut exts = ExtensionStrategy::new();
401 let mut prefixes = MultiStrategyBuilder::new();
402 let mut suffixes = MultiStrategyBuilder::new();
403 let mut required_exts = RequiredExtensionStrategyBuilder::new();
404 let mut regexes = MultiStrategyBuilder::new();
405 for (i, p) in pats.iter().enumerate() {
406 match MatchStrategy::new(p) {
407 MatchStrategy::Literal(lit) => {
408 lits.add(i, lit);
409 }
410 MatchStrategy::BasenameLiteral(lit) => {
411 base_lits.add(i, lit);
412 }
413 MatchStrategy::Extension(ext) => {
414 exts.add(i, ext);
415 }
416 MatchStrategy::Prefix(prefix) => {
417 prefixes.add(i, prefix);
418 }
419 MatchStrategy::Suffix { suffix, component } => {
420 if component {
421 lits.add(i, suffix[1..].to_string());
422 }
423 suffixes.add(i, suffix);
424 }
425 MatchStrategy::RequiredExtension(ext) => {
426 required_exts.add(i, ext, p.regex().to_owned());
427 }
428 MatchStrategy::Regex => {
429 debug!("glob converted to regex: {:?}", p);
430 regexes.add(i, p.regex().to_owned());
431 }
432 }
433 }
434 debug!("built glob set; {} literals, {} basenames, {} extensions, \
435 {} prefixes, {} suffixes, {} required extensions, {} regexes",
436 lits.0.len(), base_lits.0.len(), exts.0.len(),
437 prefixes.literals.len(), suffixes.literals.len(),
438 required_exts.0.len(), regexes.literals.len());
439 Ok(GlobSet {
440 len: pats.len(),
441 strats: vec![
442 GlobSetMatchStrategy::Extension(exts),
443 GlobSetMatchStrategy::BasenameLiteral(base_lits),
444 GlobSetMatchStrategy::Literal(lits),
445 GlobSetMatchStrategy::Suffix(suffixes.suffix()),
446 GlobSetMatchStrategy::Prefix(prefixes.prefix()),
447 GlobSetMatchStrategy::RequiredExtension(
448 required_exts.build()?),
449 GlobSetMatchStrategy::Regex(regexes.regex_set()?),
450 ],
451 })
452 }
453}
454
455#[derive(Clone, Debug)]
458pub struct GlobSetBuilder {
459 pats: Vec<Glob>,
460}
461
462impl GlobSetBuilder {
463 pub fn new() -> GlobSetBuilder {
467 GlobSetBuilder { pats: vec![] }
468 }
469
470 pub fn build(&self) -> Result<GlobSet, Error> {
474 GlobSet::new(&self.pats)
475 }
476
477 pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
479 self.pats.push(pat);
480 self
481 }
482}
483
484#[derive(Clone, Debug)]
491pub struct Candidate<'a> {
492 path: Cow<'a, [u8]>,
493 basename: Cow<'a, [u8]>,
494 ext: Cow<'a, [u8]>,
495}
496
497impl<'a> Candidate<'a> {
498 pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
500 let path = path.as_ref();
501 let basename = file_name(path).unwrap_or(OsStr::new(""));
502 Candidate {
503 path: normalize_path(path_bytes(path)),
504 basename: os_str_bytes(basename),
505 ext: file_name_ext(basename).unwrap_or(Cow::Borrowed(b"")),
506 }
507 }
508
509 fn path_prefix(&self, max: usize) -> &[u8] {
510 if self.path.len() <= max {
511 &*self.path
512 } else {
513 &self.path[..max]
514 }
515 }
516
517 fn path_suffix(&self, max: usize) -> &[u8] {
518 if self.path.len() <= max {
519 &*self.path
520 } else {
521 &self.path[self.path.len() - max..]
522 }
523 }
524}
525
526#[derive(Clone, Debug)]
527enum GlobSetMatchStrategy {
528 Literal(LiteralStrategy),
529 BasenameLiteral(BasenameLiteralStrategy),
530 Extension(ExtensionStrategy),
531 Prefix(PrefixStrategy),
532 Suffix(SuffixStrategy),
533 RequiredExtension(RequiredExtensionStrategy),
534 Regex(RegexSetStrategy),
535}
536
537impl GlobSetMatchStrategy {
538 fn is_match(&self, candidate: &Candidate) -> bool {
539 use self::GlobSetMatchStrategy::*;
540 match *self {
541 Literal(ref s) => s.is_match(candidate),
542 BasenameLiteral(ref s) => s.is_match(candidate),
543 Extension(ref s) => s.is_match(candidate),
544 Prefix(ref s) => s.is_match(candidate),
545 Suffix(ref s) => s.is_match(candidate),
546 RequiredExtension(ref s) => s.is_match(candidate),
547 Regex(ref s) => s.is_match(candidate),
548 }
549 }
550
551 fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
552 use self::GlobSetMatchStrategy::*;
553 match *self {
554 Literal(ref s) => s.matches_into(candidate, matches),
555 BasenameLiteral(ref s) => s.matches_into(candidate, matches),
556 Extension(ref s) => s.matches_into(candidate, matches),
557 Prefix(ref s) => s.matches_into(candidate, matches),
558 Suffix(ref s) => s.matches_into(candidate, matches),
559 RequiredExtension(ref s) => s.matches_into(candidate, matches),
560 Regex(ref s) => s.matches_into(candidate, matches),
561 }
562 }
563}
564
565#[derive(Clone, Debug)]
566struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
567
568impl LiteralStrategy {
569 fn new() -> LiteralStrategy {
570 LiteralStrategy(BTreeMap::new())
571 }
572
573 fn add(&mut self, global_index: usize, lit: String) {
574 self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
575 }
576
577 fn is_match(&self, candidate: &Candidate) -> bool {
578 self.0.contains_key(&*candidate.path)
579 }
580
581 #[inline(never)]
582 fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
583 if let Some(hits) = self.0.get(&*candidate.path) {
584 matches.extend(hits);
585 }
586 }
587}
588
589#[derive(Clone, Debug)]
590struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
591
592impl BasenameLiteralStrategy {
593 fn new() -> BasenameLiteralStrategy {
594 BasenameLiteralStrategy(BTreeMap::new())
595 }
596
597 fn add(&mut self, global_index: usize, lit: String) {
598 self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
599 }
600
601 fn is_match(&self, candidate: &Candidate) -> bool {
602 if candidate.basename.is_empty() {
603 return false;
604 }
605 self.0.contains_key(&*candidate.basename)
606 }
607
608 #[inline(never)]
609 fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
610 if candidate.basename.is_empty() {
611 return;
612 }
613 if let Some(hits) = self.0.get(&*candidate.basename) {
614 matches.extend(hits);
615 }
616 }
617}
618
619#[derive(Clone, Debug)]
620struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
621
622impl ExtensionStrategy {
623 fn new() -> ExtensionStrategy {
624 ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
625 }
626
627 fn add(&mut self, global_index: usize, ext: String) {
628 self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
629 }
630
631 fn is_match(&self, candidate: &Candidate) -> bool {
632 if candidate.ext.is_empty() {
633 return false;
634 }
635 self.0.contains_key(&*candidate.ext)
636 }
637
638 #[inline(never)]
639 fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
640 if candidate.ext.is_empty() {
641 return;
642 }
643 if let Some(hits) = self.0.get(&*candidate.ext) {
644 matches.extend(hits);
645 }
646 }
647}
648
649#[derive(Clone, Debug)]
650struct PrefixStrategy {
651 matcher: FullAcAutomaton<Vec<u8>>,
652 map: Vec<usize>,
653 longest: usize,
654}
655
656impl PrefixStrategy {
657 fn is_match(&self, candidate: &Candidate) -> bool {
658 let path = candidate.path_prefix(self.longest);
659 for m in self.matcher.find_overlapping(path) {
660 if m.start == 0 {
661 return true;
662 }
663 }
664 false
665 }
666
667 fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
668 let path = candidate.path_prefix(self.longest);
669 for m in self.matcher.find_overlapping(path) {
670 if m.start == 0 {
671 matches.push(self.map[m.pati]);
672 }
673 }
674 }
675}
676
677#[derive(Clone, Debug)]
678struct SuffixStrategy {
679 matcher: FullAcAutomaton<Vec<u8>>,
680 map: Vec<usize>,
681 longest: usize,
682}
683
684impl SuffixStrategy {
685 fn is_match(&self, candidate: &Candidate) -> bool {
686 let path = candidate.path_suffix(self.longest);
687 for m in self.matcher.find_overlapping(path) {
688 if m.end == path.len() {
689 return true;
690 }
691 }
692 false
693 }
694
695 fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
696 let path = candidate.path_suffix(self.longest);
697 for m in self.matcher.find_overlapping(path) {
698 if m.end == path.len() {
699 matches.push(self.map[m.pati]);
700 }
701 }
702 }
703}
704
705#[derive(Clone, Debug)]
706struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
707
708impl RequiredExtensionStrategy {
709 fn is_match(&self, candidate: &Candidate) -> bool {
710 if candidate.ext.is_empty() {
711 return false;
712 }
713 match self.0.get(&*candidate.ext) {
714 None => false,
715 Some(regexes) => {
716 for &(_, ref re) in regexes {
717 if re.is_match(&*candidate.path) {
718 return true;
719 }
720 }
721 false
722 }
723 }
724 }
725
726 #[inline(never)]
727 fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
728 if candidate.ext.is_empty() {
729 return;
730 }
731 if let Some(regexes) = self.0.get(&*candidate.ext) {
732 for &(global_index, ref re) in regexes {
733 if re.is_match(&*candidate.path) {
734 matches.push(global_index);
735 }
736 }
737 }
738 }
739}
740
741#[derive(Clone, Debug)]
742struct RegexSetStrategy {
743 matcher: RegexSet,
744 map: Vec<usize>,
745}
746
747impl RegexSetStrategy {
748 fn is_match(&self, candidate: &Candidate) -> bool {
749 self.matcher.is_match(&*candidate.path)
750 }
751
752 fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
753 for i in self.matcher.matches(&*candidate.path) {
754 matches.push(self.map[i]);
755 }
756 }
757}
758
759#[derive(Clone, Debug)]
760struct MultiStrategyBuilder {
761 literals: Vec<String>,
762 map: Vec<usize>,
763 longest: usize,
764}
765
766impl MultiStrategyBuilder {
767 fn new() -> MultiStrategyBuilder {
768 MultiStrategyBuilder {
769 literals: vec![],
770 map: vec![],
771 longest: 0,
772 }
773 }
774
775 fn add(&mut self, global_index: usize, literal: String) {
776 if literal.len() > self.longest {
777 self.longest = literal.len();
778 }
779 self.map.push(global_index);
780 self.literals.push(literal);
781 }
782
783 fn prefix(self) -> PrefixStrategy {
784 let it = self.literals.into_iter().map(|s| s.into_bytes());
785 PrefixStrategy {
786 matcher: AcAutomaton::new(it).into_full(),
787 map: self.map,
788 longest: self.longest,
789 }
790 }
791
792 fn suffix(self) -> SuffixStrategy {
793 let it = self.literals.into_iter().map(|s| s.into_bytes());
794 SuffixStrategy {
795 matcher: AcAutomaton::new(it).into_full(),
796 map: self.map,
797 longest: self.longest,
798 }
799 }
800
801 fn regex_set(self) -> Result<RegexSetStrategy, Error> {
802 Ok(RegexSetStrategy {
803 matcher: new_regex_set(self.literals)?,
804 map: self.map,
805 })
806 }
807}
808
809#[derive(Clone, Debug)]
810struct RequiredExtensionStrategyBuilder(
811 HashMap<Vec<u8>, Vec<(usize, String)>>,
812);
813
814impl RequiredExtensionStrategyBuilder {
815 fn new() -> RequiredExtensionStrategyBuilder {
816 RequiredExtensionStrategyBuilder(HashMap::new())
817 }
818
819 fn add(&mut self, global_index: usize, ext: String, regex: String) {
820 self.0
821 .entry(ext.into_bytes())
822 .or_insert(vec![])
823 .push((global_index, regex));
824 }
825
826 fn build(self) -> Result<RequiredExtensionStrategy, Error> {
827 let mut exts = HashMap::with_hasher(Fnv::default());
828 for (ext, regexes) in self.0.into_iter() {
829 exts.insert(ext.clone(), vec![]);
830 for (global_index, regex) in regexes {
831 let compiled = new_regex(®ex)?;
832 exts.get_mut(&ext).unwrap().push((global_index, compiled));
833 }
834 }
835 Ok(RequiredExtensionStrategy(exts))
836 }
837}
838
839#[cfg(test)]
840mod tests {
841 use super::GlobSetBuilder;
842 use super::glob::Glob;
843
844 #[test]
845 fn set_works() {
846 let mut builder = GlobSetBuilder::new();
847 builder.add(Glob::new("src/**/*.rs").unwrap());
848 builder.add(Glob::new("*.c").unwrap());
849 builder.add(Glob::new("src/lib.rs").unwrap());
850 let set = builder.build().unwrap();
851
852 assert!(set.is_match("foo.c"));
853 assert!(set.is_match("src/foo.c"));
854 assert!(!set.is_match("foo.rs"));
855 assert!(!set.is_match("tests/foo.rs"));
856 assert!(set.is_match("src/foo.rs"));
857 assert!(set.is_match("src/grep/src/main.rs"));
858
859 let matches = set.matches("src/lib.rs");
860 assert_eq!(2, matches.len());
861 assert_eq!(0, matches[0]);
862 assert_eq!(2, matches[1]);
863 }
864
865 #[test]
866 fn empty_set_works() {
867 let set = GlobSetBuilder::new().build().unwrap();
868 assert!(!set.is_match(""));
869 assert!(!set.is_match("a"));
870 }
871}