ignore/
types.rs

1/*!
2The types module provides a way of associating globs on file names to file
3types.
4
5This can be used to match specific types of files. For example, among
6the default file types provided, the Rust file type is defined to be `*.rs`
7with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with
8name `c`.
9
10Note that the set of default types may change over time.
11
12# Example
13
14This shows how to create and use a simple file type matcher using the default
15file types defined in this crate.
16
17```
18use ignore::types::TypesBuilder;
19
20let mut builder = TypesBuilder::new();
21builder.add_defaults();
22builder.select("rust");
23let matcher = builder.build().unwrap();
24
25assert!(matcher.matched("foo.rs", false).is_whitelist());
26assert!(matcher.matched("foo.c", false).is_ignore());
27```
28
29# Example: negation
30
31This is like the previous example, but shows how negating a file type works.
32That is, this will let us match file paths that *don't* correspond to a
33particular file type.
34
35```
36use ignore::types::TypesBuilder;
37
38let mut builder = TypesBuilder::new();
39builder.add_defaults();
40builder.negate("c");
41let matcher = builder.build().unwrap();
42
43assert!(matcher.matched("foo.rs", false).is_none());
44assert!(matcher.matched("foo.c", false).is_ignore());
45```
46
47# Example: custom file type definitions
48
49This shows how to extend this library default file type definitions with
50your own.
51
52```
53use ignore::types::TypesBuilder;
54
55let mut builder = TypesBuilder::new();
56builder.add_defaults();
57builder.add("foo", "*.foo");
58// Another way of adding a file type definition.
59// This is useful when accepting input from an end user.
60builder.add_def("bar:*.bar");
61// Note: we only select `foo`, not `bar`.
62builder.select("foo");
63let matcher = builder.build().unwrap();
64
65assert!(matcher.matched("x.foo", false).is_whitelist());
66// This is ignored because we only selected the `foo` file type.
67assert!(matcher.matched("x.bar", false).is_ignore());
68```
69
70We can also add file type definitions based on other definitions.
71
72```
73use ignore::types::TypesBuilder;
74
75let mut builder = TypesBuilder::new();
76builder.add_defaults();
77builder.add("foo", "*.foo");
78builder.add_def("bar:include:foo,cpp");
79builder.select("bar");
80let matcher = builder.build().unwrap();
81
82assert!(matcher.matched("x.foo", false).is_whitelist());
83assert!(matcher.matched("y.cpp", false).is_whitelist());
84```
85*/
86
87use std::cell::RefCell;
88use std::collections::HashMap;
89use std::path::Path;
90use std::sync::Arc;
91
92use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
93use regex::Regex;
94use thread_local::ThreadLocal;
95
96use pathutil::file_name;
97use {Error, Match};
98
99const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
100    ("agda", &["*.agda", "*.lagda"]),
101    ("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
102    ("aidl", &["*.aidl"]),
103    ("amake", &["*.mk", "*.bp"]),
104    ("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
105    ("asm", &["*.asm", "*.s", "*.S"]),
106    ("asp", &["*.aspx", "*.aspx.cs", "*.aspx.cs", "*.ascx", "*.ascx.cs", "*.ascx.vb"]),
107    ("avro", &["*.avdl", "*.avpr", "*.avsc"]),
108    ("awk", &["*.awk"]),
109    ("bazel", &["*.bzl", "WORKSPACE", "BUILD", "BUILD.bazel"]),
110    ("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
111    ("brotli", &["*.br"]),
112    ("buildstream", &["*.bst"]),
113    ("bzip2", &["*.bz2", "*.tbz2"]),
114    ("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
115    ("cabal", &["*.cabal"]),
116    ("cbor", &["*.cbor"]),
117    ("ceylon", &["*.ceylon"]),
118    ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
119    ("cmake", &["*.cmake", "CMakeLists.txt"]),
120    ("coffeescript", &["*.coffee"]),
121    ("creole", &["*.creole"]),
122    ("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
123    ("cpp", &[
124        "*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh",  "*.inl",
125        "*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
126    ]),
127    ("crystal", &["Projectfile", "*.cr"]),
128    ("cs", &["*.cs"]),
129    ("csharp", &["*.cs"]),
130    ("cshtml", &["*.cshtml"]),
131    ("css", &["*.css", "*.scss"]),
132    ("csv", &["*.csv"]),
133    ("cython", &["*.pyx", "*.pxi", "*.pxd"]),
134    ("dart", &["*.dart"]),
135    ("d", &["*.d"]),
136    ("dhall", &["*.dhall"]),
137    ("docker", &["*Dockerfile*"]),
138    ("edn", &["*.edn"]),
139    ("elisp", &["*.el"]),
140    ("elixir", &["*.ex", "*.eex", "*.exs"]),
141    ("elm", &["*.elm"]),
142    ("erlang", &["*.erl", "*.hrl"]),
143    ("fidl", &["*.fidl"]),
144    ("fish", &["*.fish"]),
145    ("fortran", &[
146        "*.f", "*.F", "*.f77", "*.F77", "*.pfo",
147        "*.f90", "*.F90", "*.f95", "*.F95",
148    ]),
149    ("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
150    ("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
151    ("gn", &["*.gn", "*.gni"]),
152    ("go", &["*.go"]),
153    ("gzip", &["*.gz", "*.tgz"]),
154    ("groovy", &["*.groovy", "*.gradle"]),
155    ("h", &["*.h", "*.hpp"]),
156    ("hbs", &["*.hbs"]),
157    ("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
158    ("hs", &["*.hs", "*.lhs"]),
159    ("html", &["*.htm", "*.html", "*.ejs"]),
160    ("idris", &["*.idr", "*.lidr"]),
161    ("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
162    ("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
163    ("js", &[
164        "*.js", "*.jsx", "*.vue",
165    ]),
166    ("json", &["*.json", "composer.lock"]),
167    ("jsonl", &["*.jsonl"]),
168    ("julia", &["*.jl"]),
169    ("jupyter", &["*.ipynb", "*.jpynb"]),
170    ("jl", &["*.jl"]),
171    ("kotlin", &["*.kt", "*.kts"]),
172    ("less", &["*.less"]),
173    ("license", &[
174        // General
175        "COPYING", "COPYING[.-]*",
176        "COPYRIGHT", "COPYRIGHT[.-]*",
177        "EULA", "EULA[.-]*",
178        "licen[cs]e", "licen[cs]e.*",
179        "LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*",
180        "NOTICE", "NOTICE[.-]*",
181        "PATENTS", "PATENTS[.-]*",
182        "UNLICEN[CS]E", "UNLICEN[CS]E[.-]*",
183        // GPL (gpl.txt, etc.)
184        "agpl[.-]*",
185        "gpl[.-]*",
186        "lgpl[.-]*",
187        // Other license-specific (APACHE-2.0.txt, etc.)
188        "AGPL-*[0-9]*",
189        "APACHE-*[0-9]*",
190        "BSD-*[0-9]*",
191        "CC-BY-*",
192        "GFDL-*[0-9]*",
193        "GNU-*[0-9]*",
194        "GPL-*[0-9]*",
195        "LGPL-*[0-9]*",
196        "MIT-*[0-9]*",
197        "MPL-*[0-9]*",
198        "OFL-*[0-9]*",
199    ]),
200    ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
201    ("lock", &["*.lock", "package-lock.json"]),
202    ("log", &["*.log"]),
203    ("lua", &["*.lua"]),
204    ("lzma", &["*.lzma"]),
205    ("lz4", &["*.lz4"]),
206    ("m4", &["*.ac", "*.m4"]),
207    ("make", &[
208        "[Gg][Nn][Uu]makefile", "[Mm]akefile",
209        "[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
210        "[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
211        "*.mk", "*.mak"
212    ]),
213    ("mako", &["*.mako", "*.mao"]),
214    ("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
215    ("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
216    ("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
217    ("matlab", &["*.m"]),
218    ("mk", &["mkfile"]),
219    ("ml", &["*.ml"]),
220    ("msbuild", &[
221        "*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets"
222    ]),
223    ("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
224    ("nix", &["*.nix"]),
225    ("objc", &["*.h", "*.m"]),
226    ("objcpp", &["*.h", "*.mm"]),
227    ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
228    ("org", &["*.org"]),
229    ("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
230    ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
231    ("pdf", &["*.pdf"]),
232    ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
233    ("pod", &["*.pod"]),
234    ("postscript", &[".eps", ".ps"]),
235    ("protobuf", &["*.proto"]),
236    ("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
237    ("puppet", &["*.erb", "*.pp", "*.rb"]),
238    ("purs", &["*.purs"]),
239    ("py", &["*.py"]),
240    ("qmake", &["*.pro", "*.pri", "*.prf"]),
241    ("qml", &["*.qml"]),
242    ("readme", &["README*", "*README"]),
243    ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
244    ("rdoc", &["*.rdoc"]),
245    ("robot", &["*.robot"]),
246    ("rst", &["*.rst"]),
247    ("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
248    ("rust", &["*.rs"]),
249    ("sass", &["*.sass", "*.scss"]),
250    ("scala", &["*.scala", "*.sbt"]),
251    ("sh", &[
252        // Portable/misc. init files
253        ".login", ".logout", ".profile", "profile",
254        // bash-specific init files
255        ".bash_login", "bash_login",
256        ".bash_logout", "bash_logout",
257        ".bash_profile", "bash_profile",
258        ".bashrc", "bashrc", "*.bashrc",
259        // csh-specific init files
260        ".cshrc", "*.cshrc",
261        // ksh-specific init files
262        ".kshrc", "*.kshrc",
263        // tcsh-specific init files
264        ".tcshrc",
265        // zsh-specific init files
266        ".zshenv", "zshenv",
267        ".zlogin", "zlogin",
268        ".zlogout", "zlogout",
269        ".zprofile", "zprofile",
270        ".zshrc", "zshrc",
271        // Extensions
272        "*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
273    ]),
274    ("smarty", &["*.tpl"]),
275    ("sml", &["*.sml", "*.sig"]),
276    ("soy", &["*.soy"]),
277    ("spark", &["*.spark"]),
278    ("sql", &["*.sql", "*.psql"]),
279    ("stylus", &["*.styl"]),
280    ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
281    ("svg", &["*.svg"]),
282    ("swift", &["*.swift"]),
283    ("swig", &["*.def", "*.i"]),
284    ("systemd", &[
285        "*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path",
286        "*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target",
287        "*.timer",
288    ]),
289    ("taskpaper", &["*.taskpaper"]),
290    ("tcl", &["*.tcl"]),
291    ("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
292    ("textile", &["*.textile"]),
293    ("thrift", &["*.thrift"]),
294    ("tf", &["*.tf"]),
295    ("ts", &["*.ts", "*.tsx"]),
296    ("txt", &["*.txt"]),
297    ("toml", &["*.toml", "Cargo.lock"]),
298    ("twig", &["*.twig"]),
299    ("vala", &["*.vala"]),
300    ("vb", &["*.vb"]),
301    ("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]),
302    ("vhdl", &["*.vhd", "*.vhdl"]),
303    ("vim", &["*.vim"]),
304    ("vimscript", &["*.vim"]),
305    ("wiki", &["*.mediawiki", "*.wiki"]),
306    ("webidl", &["*.idl", "*.webidl", "*.widl"]),
307    ("xml", &[
308        "*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
309        "*.rng", "*.sch",
310    ]),
311    ("xz", &["*.xz", "*.txz"]),
312    ("yacc", &["*.y"]),
313    ("yaml", &["*.yaml", "*.yml"]),
314    ("zig", &["*.zig"]),
315    ("zsh", &[
316        ".zshenv", "zshenv",
317        ".zlogin", "zlogin",
318        ".zlogout", "zlogout",
319        ".zprofile", "zprofile",
320        ".zshrc", "zshrc",
321        "*.zsh",
322    ]),
323    ("zstd", &["*.zst", "*.zstd"]),
324];
325
326/// Glob represents a single glob in a set of file type definitions.
327///
328/// There may be more than one glob for a particular file type.
329///
330/// This is used to report information about the highest precedent glob
331/// that matched.
332///
333/// Note that not all matches necessarily correspond to a specific glob.
334/// For example, if there are one or more selections and a file path doesn't
335/// match any of those selections, then the file path is considered to be
336/// ignored.
337///
338/// The lifetime `'a` refers to the lifetime of the underlying file type
339/// definition, which corresponds to the lifetime of the file type matcher.
340#[derive(Clone, Debug)]
341pub struct Glob<'a>(GlobInner<'a>);
342
343#[derive(Clone, Debug)]
344enum GlobInner<'a> {
345    /// No glob matched, but the file path should still be ignored.
346    UnmatchedIgnore,
347    /// A glob matched.
348    Matched {
349        /// The file type definition which provided the glob.
350        def: &'a FileTypeDef,
351        /// The index of the glob that matched inside the file type definition.
352        which: usize,
353        /// Whether the selection was negated or not.
354        negated: bool,
355    }
356}
357
358impl<'a> Glob<'a> {
359    fn unmatched() -> Glob<'a> {
360        Glob(GlobInner::UnmatchedIgnore)
361    }
362
363    /// Return the file type defintion that matched, if one exists. A file type
364    /// definition always exists when a specific definition matches a file
365    /// path.
366    pub fn file_type_def(&self) -> Option<&FileTypeDef> {
367        match self {
368            Glob(GlobInner::UnmatchedIgnore) => None,
369            Glob(GlobInner::Matched { def, .. }) => {
370                Some(def)
371            },
372        }
373    }
374}
375
376/// A single file type definition.
377///
378/// File type definitions can be retrieved in aggregate from a file type
379/// matcher. File type definitions are also reported when its responsible
380/// for a match.
381#[derive(Clone, Debug, Eq, PartialEq)]
382pub struct FileTypeDef {
383    name: String,
384    globs: Vec<String>,
385}
386
387impl FileTypeDef {
388    /// Return the name of this file type.
389    pub fn name(&self) -> &str {
390        &self.name
391    }
392
393    /// Return the globs used to recognize this file type.
394    pub fn globs(&self) -> &[String] {
395        &self.globs
396    }
397}
398
399/// Types is a file type matcher.
400#[derive(Clone, Debug)]
401pub struct Types {
402    /// All of the file type definitions, sorted lexicographically by name.
403    defs: Vec<FileTypeDef>,
404    /// All of the selections made by the user.
405    selections: Vec<Selection<FileTypeDef>>,
406    /// Whether there is at least one Selection::Select in our selections.
407    /// When this is true, a Match::None is converted to Match::Ignore.
408    has_selected: bool,
409    /// A mapping from glob index in the set to two indices. The first is an
410    /// index into `selections` and the second is an index into the
411    /// corresponding file type definition's list of globs.
412    glob_to_selection: Vec<(usize, usize)>,
413    /// The set of all glob selections, used for actual matching.
414    set: GlobSet,
415    /// Temporary storage for globs that match.
416    matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
417}
418
419/// Indicates the type of a selection for a particular file type.
420#[derive(Clone, Debug)]
421enum Selection<T> {
422    Select(String, T),
423    Negate(String, T),
424}
425
426impl<T> Selection<T> {
427    fn is_negated(&self) -> bool {
428        match *self {
429            Selection::Select(..) => false,
430            Selection::Negate(..) => true,
431        }
432    }
433
434    fn name(&self) -> &str {
435        match *self {
436            Selection::Select(ref name, _) => name,
437            Selection::Negate(ref name, _) => name,
438        }
439    }
440
441    fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
442        match self {
443            Selection::Select(name, inner) => {
444                Selection::Select(name, f(inner))
445            }
446            Selection::Negate(name, inner) => {
447                Selection::Negate(name, f(inner))
448            }
449        }
450    }
451
452    fn inner(&self) -> &T {
453        match *self {
454            Selection::Select(_, ref inner) => inner,
455            Selection::Negate(_, ref inner) => inner,
456        }
457    }
458}
459
460impl Types {
461    /// Creates a new file type matcher that never matches any path and
462    /// contains no file type definitions.
463    pub fn empty() -> Types {
464        Types {
465            defs: vec![],
466            selections: vec![],
467            has_selected: false,
468            glob_to_selection: vec![],
469            set: GlobSetBuilder::new().build().unwrap(),
470            matches: Arc::new(ThreadLocal::default()),
471        }
472    }
473
474    /// Returns true if and only if this matcher has zero selections.
475    pub fn is_empty(&self) -> bool {
476        self.selections.is_empty()
477    }
478
479    /// Returns the number of selections used in this matcher.
480    pub fn len(&self) -> usize {
481        self.selections.len()
482    }
483
484    /// Return the set of current file type definitions.
485    ///
486    /// Definitions and globs are sorted.
487    pub fn definitions(&self) -> &[FileTypeDef] {
488        &self.defs
489    }
490
491    /// Returns a match for the given path against this file type matcher.
492    ///
493    /// The path is considered whitelisted if it matches a selected file type.
494    /// The path is considered ignored if it matches a negated file type.
495    /// If at least one file type is selected and `path` doesn't match, then
496    /// the path is also considered ignored.
497    pub fn matched<'a, P: AsRef<Path>>(
498        &'a self,
499        path: P,
500        is_dir: bool,
501    ) -> Match<Glob<'a>> {
502        // File types don't apply to directories, and we can't do anything
503        // if our glob set is empty.
504        if is_dir || self.set.is_empty() {
505            return Match::None;
506        }
507        // We only want to match against the file name, so extract it.
508        // If one doesn't exist, then we can't match it.
509        let name = match file_name(path.as_ref()) {
510            Some(name) => name,
511            None if self.has_selected => {
512                return Match::Ignore(Glob::unmatched());
513            }
514            None => {
515                return Match::None;
516            }
517        };
518        let mut matches = self.matches.get_default().borrow_mut();
519        self.set.matches_into(name, &mut *matches);
520        // The highest precedent match is the last one.
521        if let Some(&i) = matches.last() {
522            let (isel, iglob) = self.glob_to_selection[i];
523            let sel = &self.selections[isel];
524            let glob = Glob(GlobInner::Matched {
525                def: sel.inner(),
526                which: iglob,
527                negated: sel.is_negated(),
528            });
529            return if sel.is_negated() {
530                Match::Ignore(glob)
531            } else {
532                Match::Whitelist(glob)
533            };
534        }
535        if self.has_selected {
536            Match::Ignore(Glob::unmatched())
537        } else {
538            Match::None
539        }
540    }
541}
542
543/// TypesBuilder builds a type matcher from a set of file type definitions and
544/// a set of file type selections.
545pub struct TypesBuilder {
546    types: HashMap<String, FileTypeDef>,
547    selections: Vec<Selection<()>>,
548}
549
550impl TypesBuilder {
551    /// Create a new builder for a file type matcher.
552    ///
553    /// The builder contains *no* type definitions to start with. A set
554    /// of default type definitions can be added with `add_defaults`, and
555    /// additional type definitions can be added with `select` and `negate`.
556    pub fn new() -> TypesBuilder {
557        TypesBuilder {
558            types: HashMap::new(),
559            selections: vec![],
560        }
561    }
562
563    /// Build the current set of file type definitions *and* selections into
564    /// a file type matcher.
565    pub fn build(&self) -> Result<Types, Error> {
566        let defs = self.definitions();
567        let has_selected = self.selections.iter().any(|s| !s.is_negated());
568
569        let mut selections = vec![];
570        let mut glob_to_selection = vec![];
571        let mut build_set = GlobSetBuilder::new();
572        for (isel, selection) in self.selections.iter().enumerate() {
573            let def = match self.types.get(selection.name()) {
574                Some(def) => def.clone(),
575                None => {
576                    let name = selection.name().to_string();
577                    return Err(Error::UnrecognizedFileType(name));
578                }
579            };
580            for (iglob, glob) in def.globs.iter().enumerate() {
581                build_set.add(
582                    GlobBuilder::new(glob)
583                        .literal_separator(true)
584                        .build()
585                        .map_err(|err| {
586                            Error::Glob {
587                                glob: Some(glob.to_string()),
588                                err: err.kind().to_string(),
589                            }
590                        })?);
591                glob_to_selection.push((isel, iglob));
592            }
593            selections.push(selection.clone().map(move |_| def));
594        }
595        let set = build_set.build().map_err(|err| {
596            Error::Glob { glob: None, err: err.to_string() }
597        })?;
598        Ok(Types {
599            defs: defs,
600            selections: selections,
601            has_selected: has_selected,
602            glob_to_selection: glob_to_selection,
603            set: set,
604            matches: Arc::new(ThreadLocal::default()),
605        })
606    }
607
608    /// Return the set of current file type definitions.
609    ///
610    /// Definitions and globs are sorted.
611    pub fn definitions(&self) -> Vec<FileTypeDef> {
612        let mut defs = vec![];
613        for def in self.types.values() {
614            let mut def = def.clone();
615            def.globs.sort();
616            defs.push(def);
617        }
618        defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
619        defs
620    }
621
622    /// Select the file type given by `name`.
623    ///
624    /// If `name` is `all`, then all file types currently defined are selected.
625    pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
626        if name == "all" {
627            for name in self.types.keys() {
628                self.selections.push(Selection::Select(name.to_string(), ()));
629            }
630        } else {
631            self.selections.push(Selection::Select(name.to_string(), ()));
632        }
633        self
634    }
635
636    /// Ignore the file type given by `name`.
637    ///
638    /// If `name` is `all`, then all file types currently defined are negated.
639    pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
640        if name == "all" {
641            for name in self.types.keys() {
642                self.selections.push(Selection::Negate(name.to_string(), ()));
643            }
644        } else {
645            self.selections.push(Selection::Negate(name.to_string(), ()));
646        }
647        self
648    }
649
650    /// Clear any file type definitions for the type name given.
651    pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
652        self.types.remove(name);
653        self
654    }
655
656    /// Add a new file type definition. `name` can be arbitrary and `pat`
657    /// should be a glob recognizing file paths belonging to the `name` type.
658    ///
659    /// If `name` is `all` or otherwise contains any character that is not a
660    /// Unicode letter or number, then an error is returned.
661    pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
662        lazy_static! {
663            static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap();
664        };
665        if name == "all" || !RE.is_match(name) {
666            return Err(Error::InvalidDefinition);
667        }
668        let (key, glob) = (name.to_string(), glob.to_string());
669        self.types.entry(key).or_insert_with(|| {
670            FileTypeDef { name: name.to_string(), globs: vec![] }
671        }).globs.push(glob);
672        Ok(())
673    }
674
675    /// Add a new file type definition specified in string form. There are two
676    /// valid formats:
677    /// 1. `{name}:{glob}`.  This defines a 'root' definition that associates the
678    ///     given name with the given glob.
679    /// 2. `{name}:include:{comma-separated list of already defined names}.
680    ///     This defines an 'include' definition that associates the given name
681    ///     with the definitions of the given existing types.
682    /// Names may not include any characters that are not
683    /// Unicode letters or numbers.
684    pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
685        let parts: Vec<&str> = def.split(':').collect();
686        match parts.len() {
687            2 => {
688                let name = parts[0];
689                let glob = parts[1];
690                if name.is_empty() || glob.is_empty() {
691                    return Err(Error::InvalidDefinition);
692                }
693                self.add(name, glob)
694            }
695            3 => {
696                let name = parts[0];
697                let types_string = parts[2];
698                if name.is_empty() || parts[1] != "include" || types_string.is_empty() {
699                    return Err(Error::InvalidDefinition);
700                }
701                let types = types_string.split(',');
702                // Check ahead of time to ensure that all types specified are
703                // present and fail fast if not.
704                if types.clone().any(|t| !self.types.contains_key(t)) {
705                    return Err(Error::InvalidDefinition);
706                }
707                for type_name in types {
708                    let globs = self.types.get(type_name).unwrap().globs.clone();
709                    for glob in globs {
710                        self.add(name, &glob)?;
711                    }
712                }
713                Ok(())
714            }
715            _ => Err(Error::InvalidDefinition)
716        }
717    }
718
719    /// Add a set of default file type definitions.
720    pub fn add_defaults(&mut self) -> &mut TypesBuilder {
721        static MSG: &'static str = "adding a default type should never fail";
722        for &(name, exts) in DEFAULT_TYPES {
723            for ext in exts {
724                self.add(name, ext).expect(MSG);
725            }
726        }
727        self
728    }
729}
730
731#[cfg(test)]
732mod tests {
733    use super::TypesBuilder;
734
735    macro_rules! matched {
736        ($name:ident, $types:expr, $sel:expr, $selnot:expr,
737         $path:expr) => {
738            matched!($name, $types, $sel, $selnot, $path, true);
739        };
740        (not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
741         $path:expr) => {
742            matched!($name, $types, $sel, $selnot, $path, false);
743        };
744        ($name:ident, $types:expr, $sel:expr, $selnot:expr,
745         $path:expr, $matched:expr) => {
746            #[test]
747            fn $name() {
748                let mut btypes = TypesBuilder::new();
749                for tydef in $types {
750                    btypes.add_def(tydef).unwrap();
751                }
752                for sel in $sel {
753                    btypes.select(sel);
754                }
755                for selnot in $selnot {
756                    btypes.negate(selnot);
757                }
758                let types = btypes.build().unwrap();
759                let mat = types.matched($path, false);
760                assert_eq!($matched, !mat.is_ignore());
761            }
762        };
763    }
764
765    fn types() -> Vec<&'static str> {
766        vec![
767            "html:*.html",
768            "html:*.htm",
769            "rust:*.rs",
770            "js:*.js",
771            "foo:*.{rs,foo}",
772            "combo:include:html,rust"
773        ]
774    }
775
776    matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
777    matched!(match2, types(), vec!["html"], vec![], "index.html");
778    matched!(match3, types(), vec!["html"], vec![], "index.htm");
779    matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
780    matched!(match5, types(), vec![], vec![], "index.html");
781    matched!(match6, types(), vec![], vec!["rust"], "index.html");
782    matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
783    matched!(match8, types(), vec!["combo"], vec![], "index.html");
784    matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
785
786    matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
787    matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
788    matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
789    matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
790    matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
791    matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
792
793    #[test]
794    fn test_invalid_defs() {
795        let mut btypes = TypesBuilder::new();
796        for tydef in types() {
797            btypes.add_def(tydef).unwrap();
798        }
799        // Preserve the original definitions for later comparison.
800        let original_defs = btypes.definitions();
801        let bad_defs = vec![
802            // Reference to type that does not exist
803            "combo:include:html,python",
804            // Bad format
805            "combo:foobar:html,rust",
806            ""
807        ];
808        for def in bad_defs {
809            assert!(btypes.add_def(def).is_err());
810            // Ensure that nothing changed, even if some of the includes were valid.
811            assert_eq!(btypes.definitions(), original_defs);
812        }
813    }
814}