1use std::cell::RefCell;
88use std::collections::HashMap;
89use std::path::Path;
90use std::sync::Arc;
91
92use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
93use regex::Regex;
94use thread_local::ThreadLocal;
95
96use pathutil::file_name;
97use {Error, Match};
98
99const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
100 ("agda", &["*.agda", "*.lagda"]),
101 ("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
102 ("aidl", &["*.aidl"]),
103 ("amake", &["*.mk", "*.bp"]),
104 ("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
105 ("asm", &["*.asm", "*.s", "*.S"]),
106 ("asp", &["*.aspx", "*.aspx.cs", "*.aspx.cs", "*.ascx", "*.ascx.cs", "*.ascx.vb"]),
107 ("avro", &["*.avdl", "*.avpr", "*.avsc"]),
108 ("awk", &["*.awk"]),
109 ("bazel", &["*.bzl", "WORKSPACE", "BUILD", "BUILD.bazel"]),
110 ("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
111 ("brotli", &["*.br"]),
112 ("buildstream", &["*.bst"]),
113 ("bzip2", &["*.bz2", "*.tbz2"]),
114 ("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
115 ("cabal", &["*.cabal"]),
116 ("cbor", &["*.cbor"]),
117 ("ceylon", &["*.ceylon"]),
118 ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
119 ("cmake", &["*.cmake", "CMakeLists.txt"]),
120 ("coffeescript", &["*.coffee"]),
121 ("creole", &["*.creole"]),
122 ("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
123 ("cpp", &[
124 "*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl",
125 "*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
126 ]),
127 ("crystal", &["Projectfile", "*.cr"]),
128 ("cs", &["*.cs"]),
129 ("csharp", &["*.cs"]),
130 ("cshtml", &["*.cshtml"]),
131 ("css", &["*.css", "*.scss"]),
132 ("csv", &["*.csv"]),
133 ("cython", &["*.pyx", "*.pxi", "*.pxd"]),
134 ("dart", &["*.dart"]),
135 ("d", &["*.d"]),
136 ("dhall", &["*.dhall"]),
137 ("docker", &["*Dockerfile*"]),
138 ("edn", &["*.edn"]),
139 ("elisp", &["*.el"]),
140 ("elixir", &["*.ex", "*.eex", "*.exs"]),
141 ("elm", &["*.elm"]),
142 ("erlang", &["*.erl", "*.hrl"]),
143 ("fidl", &["*.fidl"]),
144 ("fish", &["*.fish"]),
145 ("fortran", &[
146 "*.f", "*.F", "*.f77", "*.F77", "*.pfo",
147 "*.f90", "*.F90", "*.f95", "*.F95",
148 ]),
149 ("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
150 ("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
151 ("gn", &["*.gn", "*.gni"]),
152 ("go", &["*.go"]),
153 ("gzip", &["*.gz", "*.tgz"]),
154 ("groovy", &["*.groovy", "*.gradle"]),
155 ("h", &["*.h", "*.hpp"]),
156 ("hbs", &["*.hbs"]),
157 ("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
158 ("hs", &["*.hs", "*.lhs"]),
159 ("html", &["*.htm", "*.html", "*.ejs"]),
160 ("idris", &["*.idr", "*.lidr"]),
161 ("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
162 ("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
163 ("js", &[
164 "*.js", "*.jsx", "*.vue",
165 ]),
166 ("json", &["*.json", "composer.lock"]),
167 ("jsonl", &["*.jsonl"]),
168 ("julia", &["*.jl"]),
169 ("jupyter", &["*.ipynb", "*.jpynb"]),
170 ("jl", &["*.jl"]),
171 ("kotlin", &["*.kt", "*.kts"]),
172 ("less", &["*.less"]),
173 ("license", &[
174 "COPYING", "COPYING[.-]*",
176 "COPYRIGHT", "COPYRIGHT[.-]*",
177 "EULA", "EULA[.-]*",
178 "licen[cs]e", "licen[cs]e.*",
179 "LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*",
180 "NOTICE", "NOTICE[.-]*",
181 "PATENTS", "PATENTS[.-]*",
182 "UNLICEN[CS]E", "UNLICEN[CS]E[.-]*",
183 "agpl[.-]*",
185 "gpl[.-]*",
186 "lgpl[.-]*",
187 "AGPL-*[0-9]*",
189 "APACHE-*[0-9]*",
190 "BSD-*[0-9]*",
191 "CC-BY-*",
192 "GFDL-*[0-9]*",
193 "GNU-*[0-9]*",
194 "GPL-*[0-9]*",
195 "LGPL-*[0-9]*",
196 "MIT-*[0-9]*",
197 "MPL-*[0-9]*",
198 "OFL-*[0-9]*",
199 ]),
200 ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
201 ("lock", &["*.lock", "package-lock.json"]),
202 ("log", &["*.log"]),
203 ("lua", &["*.lua"]),
204 ("lzma", &["*.lzma"]),
205 ("lz4", &["*.lz4"]),
206 ("m4", &["*.ac", "*.m4"]),
207 ("make", &[
208 "[Gg][Nn][Uu]makefile", "[Mm]akefile",
209 "[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
210 "[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
211 "*.mk", "*.mak"
212 ]),
213 ("mako", &["*.mako", "*.mao"]),
214 ("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
215 ("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
216 ("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
217 ("matlab", &["*.m"]),
218 ("mk", &["mkfile"]),
219 ("ml", &["*.ml"]),
220 ("msbuild", &[
221 "*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets"
222 ]),
223 ("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
224 ("nix", &["*.nix"]),
225 ("objc", &["*.h", "*.m"]),
226 ("objcpp", &["*.h", "*.mm"]),
227 ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
228 ("org", &["*.org"]),
229 ("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
230 ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
231 ("pdf", &["*.pdf"]),
232 ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
233 ("pod", &["*.pod"]),
234 ("postscript", &[".eps", ".ps"]),
235 ("protobuf", &["*.proto"]),
236 ("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
237 ("puppet", &["*.erb", "*.pp", "*.rb"]),
238 ("purs", &["*.purs"]),
239 ("py", &["*.py"]),
240 ("qmake", &["*.pro", "*.pri", "*.prf"]),
241 ("qml", &["*.qml"]),
242 ("readme", &["README*", "*README"]),
243 ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
244 ("rdoc", &["*.rdoc"]),
245 ("robot", &["*.robot"]),
246 ("rst", &["*.rst"]),
247 ("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
248 ("rust", &["*.rs"]),
249 ("sass", &["*.sass", "*.scss"]),
250 ("scala", &["*.scala", "*.sbt"]),
251 ("sh", &[
252 ".login", ".logout", ".profile", "profile",
254 ".bash_login", "bash_login",
256 ".bash_logout", "bash_logout",
257 ".bash_profile", "bash_profile",
258 ".bashrc", "bashrc", "*.bashrc",
259 ".cshrc", "*.cshrc",
261 ".kshrc", "*.kshrc",
263 ".tcshrc",
265 ".zshenv", "zshenv",
267 ".zlogin", "zlogin",
268 ".zlogout", "zlogout",
269 ".zprofile", "zprofile",
270 ".zshrc", "zshrc",
271 "*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
273 ]),
274 ("smarty", &["*.tpl"]),
275 ("sml", &["*.sml", "*.sig"]),
276 ("soy", &["*.soy"]),
277 ("spark", &["*.spark"]),
278 ("sql", &["*.sql", "*.psql"]),
279 ("stylus", &["*.styl"]),
280 ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
281 ("svg", &["*.svg"]),
282 ("swift", &["*.swift"]),
283 ("swig", &["*.def", "*.i"]),
284 ("systemd", &[
285 "*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path",
286 "*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target",
287 "*.timer",
288 ]),
289 ("taskpaper", &["*.taskpaper"]),
290 ("tcl", &["*.tcl"]),
291 ("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
292 ("textile", &["*.textile"]),
293 ("thrift", &["*.thrift"]),
294 ("tf", &["*.tf"]),
295 ("ts", &["*.ts", "*.tsx"]),
296 ("txt", &["*.txt"]),
297 ("toml", &["*.toml", "Cargo.lock"]),
298 ("twig", &["*.twig"]),
299 ("vala", &["*.vala"]),
300 ("vb", &["*.vb"]),
301 ("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]),
302 ("vhdl", &["*.vhd", "*.vhdl"]),
303 ("vim", &["*.vim"]),
304 ("vimscript", &["*.vim"]),
305 ("wiki", &["*.mediawiki", "*.wiki"]),
306 ("webidl", &["*.idl", "*.webidl", "*.widl"]),
307 ("xml", &[
308 "*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
309 "*.rng", "*.sch",
310 ]),
311 ("xz", &["*.xz", "*.txz"]),
312 ("yacc", &["*.y"]),
313 ("yaml", &["*.yaml", "*.yml"]),
314 ("zig", &["*.zig"]),
315 ("zsh", &[
316 ".zshenv", "zshenv",
317 ".zlogin", "zlogin",
318 ".zlogout", "zlogout",
319 ".zprofile", "zprofile",
320 ".zshrc", "zshrc",
321 "*.zsh",
322 ]),
323 ("zstd", &["*.zst", "*.zstd"]),
324];
325
326#[derive(Clone, Debug)]
341pub struct Glob<'a>(GlobInner<'a>);
342
343#[derive(Clone, Debug)]
344enum GlobInner<'a> {
345 UnmatchedIgnore,
347 Matched {
349 def: &'a FileTypeDef,
351 which: usize,
353 negated: bool,
355 }
356}
357
358impl<'a> Glob<'a> {
359 fn unmatched() -> Glob<'a> {
360 Glob(GlobInner::UnmatchedIgnore)
361 }
362
363 pub fn file_type_def(&self) -> Option<&FileTypeDef> {
367 match self {
368 Glob(GlobInner::UnmatchedIgnore) => None,
369 Glob(GlobInner::Matched { def, .. }) => {
370 Some(def)
371 },
372 }
373 }
374}
375
376#[derive(Clone, Debug, Eq, PartialEq)]
382pub struct FileTypeDef {
383 name: String,
384 globs: Vec<String>,
385}
386
387impl FileTypeDef {
388 pub fn name(&self) -> &str {
390 &self.name
391 }
392
393 pub fn globs(&self) -> &[String] {
395 &self.globs
396 }
397}
398
399#[derive(Clone, Debug)]
401pub struct Types {
402 defs: Vec<FileTypeDef>,
404 selections: Vec<Selection<FileTypeDef>>,
406 has_selected: bool,
409 glob_to_selection: Vec<(usize, usize)>,
413 set: GlobSet,
415 matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
417}
418
419#[derive(Clone, Debug)]
421enum Selection<T> {
422 Select(String, T),
423 Negate(String, T),
424}
425
426impl<T> Selection<T> {
427 fn is_negated(&self) -> bool {
428 match *self {
429 Selection::Select(..) => false,
430 Selection::Negate(..) => true,
431 }
432 }
433
434 fn name(&self) -> &str {
435 match *self {
436 Selection::Select(ref name, _) => name,
437 Selection::Negate(ref name, _) => name,
438 }
439 }
440
441 fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
442 match self {
443 Selection::Select(name, inner) => {
444 Selection::Select(name, f(inner))
445 }
446 Selection::Negate(name, inner) => {
447 Selection::Negate(name, f(inner))
448 }
449 }
450 }
451
452 fn inner(&self) -> &T {
453 match *self {
454 Selection::Select(_, ref inner) => inner,
455 Selection::Negate(_, ref inner) => inner,
456 }
457 }
458}
459
460impl Types {
461 pub fn empty() -> Types {
464 Types {
465 defs: vec![],
466 selections: vec![],
467 has_selected: false,
468 glob_to_selection: vec![],
469 set: GlobSetBuilder::new().build().unwrap(),
470 matches: Arc::new(ThreadLocal::default()),
471 }
472 }
473
474 pub fn is_empty(&self) -> bool {
476 self.selections.is_empty()
477 }
478
479 pub fn len(&self) -> usize {
481 self.selections.len()
482 }
483
484 pub fn definitions(&self) -> &[FileTypeDef] {
488 &self.defs
489 }
490
491 pub fn matched<'a, P: AsRef<Path>>(
498 &'a self,
499 path: P,
500 is_dir: bool,
501 ) -> Match<Glob<'a>> {
502 if is_dir || self.set.is_empty() {
505 return Match::None;
506 }
507 let name = match file_name(path.as_ref()) {
510 Some(name) => name,
511 None if self.has_selected => {
512 return Match::Ignore(Glob::unmatched());
513 }
514 None => {
515 return Match::None;
516 }
517 };
518 let mut matches = self.matches.get_default().borrow_mut();
519 self.set.matches_into(name, &mut *matches);
520 if let Some(&i) = matches.last() {
522 let (isel, iglob) = self.glob_to_selection[i];
523 let sel = &self.selections[isel];
524 let glob = Glob(GlobInner::Matched {
525 def: sel.inner(),
526 which: iglob,
527 negated: sel.is_negated(),
528 });
529 return if sel.is_negated() {
530 Match::Ignore(glob)
531 } else {
532 Match::Whitelist(glob)
533 };
534 }
535 if self.has_selected {
536 Match::Ignore(Glob::unmatched())
537 } else {
538 Match::None
539 }
540 }
541}
542
543pub struct TypesBuilder {
546 types: HashMap<String, FileTypeDef>,
547 selections: Vec<Selection<()>>,
548}
549
550impl TypesBuilder {
551 pub fn new() -> TypesBuilder {
557 TypesBuilder {
558 types: HashMap::new(),
559 selections: vec![],
560 }
561 }
562
563 pub fn build(&self) -> Result<Types, Error> {
566 let defs = self.definitions();
567 let has_selected = self.selections.iter().any(|s| !s.is_negated());
568
569 let mut selections = vec![];
570 let mut glob_to_selection = vec![];
571 let mut build_set = GlobSetBuilder::new();
572 for (isel, selection) in self.selections.iter().enumerate() {
573 let def = match self.types.get(selection.name()) {
574 Some(def) => def.clone(),
575 None => {
576 let name = selection.name().to_string();
577 return Err(Error::UnrecognizedFileType(name));
578 }
579 };
580 for (iglob, glob) in def.globs.iter().enumerate() {
581 build_set.add(
582 GlobBuilder::new(glob)
583 .literal_separator(true)
584 .build()
585 .map_err(|err| {
586 Error::Glob {
587 glob: Some(glob.to_string()),
588 err: err.kind().to_string(),
589 }
590 })?);
591 glob_to_selection.push((isel, iglob));
592 }
593 selections.push(selection.clone().map(move |_| def));
594 }
595 let set = build_set.build().map_err(|err| {
596 Error::Glob { glob: None, err: err.to_string() }
597 })?;
598 Ok(Types {
599 defs: defs,
600 selections: selections,
601 has_selected: has_selected,
602 glob_to_selection: glob_to_selection,
603 set: set,
604 matches: Arc::new(ThreadLocal::default()),
605 })
606 }
607
608 pub fn definitions(&self) -> Vec<FileTypeDef> {
612 let mut defs = vec![];
613 for def in self.types.values() {
614 let mut def = def.clone();
615 def.globs.sort();
616 defs.push(def);
617 }
618 defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
619 defs
620 }
621
622 pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
626 if name == "all" {
627 for name in self.types.keys() {
628 self.selections.push(Selection::Select(name.to_string(), ()));
629 }
630 } else {
631 self.selections.push(Selection::Select(name.to_string(), ()));
632 }
633 self
634 }
635
636 pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
640 if name == "all" {
641 for name in self.types.keys() {
642 self.selections.push(Selection::Negate(name.to_string(), ()));
643 }
644 } else {
645 self.selections.push(Selection::Negate(name.to_string(), ()));
646 }
647 self
648 }
649
650 pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
652 self.types.remove(name);
653 self
654 }
655
656 pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
662 lazy_static! {
663 static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap();
664 };
665 if name == "all" || !RE.is_match(name) {
666 return Err(Error::InvalidDefinition);
667 }
668 let (key, glob) = (name.to_string(), glob.to_string());
669 self.types.entry(key).or_insert_with(|| {
670 FileTypeDef { name: name.to_string(), globs: vec![] }
671 }).globs.push(glob);
672 Ok(())
673 }
674
675 pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
685 let parts: Vec<&str> = def.split(':').collect();
686 match parts.len() {
687 2 => {
688 let name = parts[0];
689 let glob = parts[1];
690 if name.is_empty() || glob.is_empty() {
691 return Err(Error::InvalidDefinition);
692 }
693 self.add(name, glob)
694 }
695 3 => {
696 let name = parts[0];
697 let types_string = parts[2];
698 if name.is_empty() || parts[1] != "include" || types_string.is_empty() {
699 return Err(Error::InvalidDefinition);
700 }
701 let types = types_string.split(',');
702 if types.clone().any(|t| !self.types.contains_key(t)) {
705 return Err(Error::InvalidDefinition);
706 }
707 for type_name in types {
708 let globs = self.types.get(type_name).unwrap().globs.clone();
709 for glob in globs {
710 self.add(name, &glob)?;
711 }
712 }
713 Ok(())
714 }
715 _ => Err(Error::InvalidDefinition)
716 }
717 }
718
719 pub fn add_defaults(&mut self) -> &mut TypesBuilder {
721 static MSG: &'static str = "adding a default type should never fail";
722 for &(name, exts) in DEFAULT_TYPES {
723 for ext in exts {
724 self.add(name, ext).expect(MSG);
725 }
726 }
727 self
728 }
729}
730
731#[cfg(test)]
732mod tests {
733 use super::TypesBuilder;
734
735 macro_rules! matched {
736 ($name:ident, $types:expr, $sel:expr, $selnot:expr,
737 $path:expr) => {
738 matched!($name, $types, $sel, $selnot, $path, true);
739 };
740 (not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
741 $path:expr) => {
742 matched!($name, $types, $sel, $selnot, $path, false);
743 };
744 ($name:ident, $types:expr, $sel:expr, $selnot:expr,
745 $path:expr, $matched:expr) => {
746 #[test]
747 fn $name() {
748 let mut btypes = TypesBuilder::new();
749 for tydef in $types {
750 btypes.add_def(tydef).unwrap();
751 }
752 for sel in $sel {
753 btypes.select(sel);
754 }
755 for selnot in $selnot {
756 btypes.negate(selnot);
757 }
758 let types = btypes.build().unwrap();
759 let mat = types.matched($path, false);
760 assert_eq!($matched, !mat.is_ignore());
761 }
762 };
763 }
764
765 fn types() -> Vec<&'static str> {
766 vec![
767 "html:*.html",
768 "html:*.htm",
769 "rust:*.rs",
770 "js:*.js",
771 "foo:*.{rs,foo}",
772 "combo:include:html,rust"
773 ]
774 }
775
776 matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
777 matched!(match2, types(), vec!["html"], vec![], "index.html");
778 matched!(match3, types(), vec!["html"], vec![], "index.htm");
779 matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
780 matched!(match5, types(), vec![], vec![], "index.html");
781 matched!(match6, types(), vec![], vec!["rust"], "index.html");
782 matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
783 matched!(match8, types(), vec!["combo"], vec![], "index.html");
784 matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
785
786 matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
787 matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
788 matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
789 matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
790 matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
791 matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
792
793 #[test]
794 fn test_invalid_defs() {
795 let mut btypes = TypesBuilder::new();
796 for tydef in types() {
797 btypes.add_def(tydef).unwrap();
798 }
799 let original_defs = btypes.definitions();
801 let bad_defs = vec![
802 "combo:include:html,python",
804 "combo:foobar:html,rust",
806 ""
807 ];
808 for def in bad_defs {
809 assert!(btypes.add_def(def).is_err());
810 assert_eq!(btypes.definitions(), original_defs);
812 }
813 }
814}