use std::cell::RefCell;
use std::collections::HashMap;
use std::path::Path;
use std::sync::Arc;
use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
use regex::Regex;
use thread_local::ThreadLocal;
use pathutil::file_name;
use {Error, Match};
const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
("agda", &["*.agda", "*.lagda"]),
("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
("aidl", &["*.aidl"]),
("amake", &["*.mk", "*.bp"]),
("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
("asm", &["*.asm", "*.s", "*.S"]),
("asp", &["*.aspx", "*.aspx.cs", "*.aspx.cs", "*.ascx", "*.ascx.cs", "*.ascx.vb"]),
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
("awk", &["*.awk"]),
("bazel", &["*.bzl", "WORKSPACE", "BUILD", "BUILD.bazel"]),
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
("brotli", &["*.br"]),
("buildstream", &["*.bst"]),
("bzip2", &["*.bz2", "*.tbz2"]),
("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
("cabal", &["*.cabal"]),
("cbor", &["*.cbor"]),
("ceylon", &["*.ceylon"]),
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
("cmake", &["*.cmake", "CMakeLists.txt"]),
("coffeescript", &["*.coffee"]),
("creole", &["*.creole"]),
("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
("cpp", &[
"*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl",
"*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
]),
("crystal", &["Projectfile", "*.cr"]),
("cs", &["*.cs"]),
("csharp", &["*.cs"]),
("cshtml", &["*.cshtml"]),
("css", &["*.css", "*.scss"]),
("csv", &["*.csv"]),
("cython", &["*.pyx", "*.pxi", "*.pxd"]),
("dart", &["*.dart"]),
("d", &["*.d"]),
("dhall", &["*.dhall"]),
("docker", &["*Dockerfile*"]),
("edn", &["*.edn"]),
("elisp", &["*.el"]),
("elixir", &["*.ex", "*.eex", "*.exs"]),
("elm", &["*.elm"]),
("erlang", &["*.erl", "*.hrl"]),
("fidl", &["*.fidl"]),
("fish", &["*.fish"]),
("fortran", &[
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
"*.f90", "*.F90", "*.f95", "*.F95",
]),
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
("gn", &["*.gn", "*.gni"]),
("go", &["*.go"]),
("gzip", &["*.gz", "*.tgz"]),
("groovy", &["*.groovy", "*.gradle"]),
("h", &["*.h", "*.hpp"]),
("hbs", &["*.hbs"]),
("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
("hs", &["*.hs", "*.lhs"]),
("html", &["*.htm", "*.html", "*.ejs"]),
("idris", &["*.idr", "*.lidr"]),
("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
("js", &[
"*.js", "*.jsx", "*.vue",
]),
("json", &["*.json", "composer.lock"]),
("jsonl", &["*.jsonl"]),
("julia", &["*.jl"]),
("jupyter", &["*.ipynb", "*.jpynb"]),
("jl", &["*.jl"]),
("kotlin", &["*.kt", "*.kts"]),
("less", &["*.less"]),
("license", &[
"COPYING", "COPYING[.-]*",
"COPYRIGHT", "COPYRIGHT[.-]*",
"EULA", "EULA[.-]*",
"licen[cs]e", "licen[cs]e.*",
"LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*",
"NOTICE", "NOTICE[.-]*",
"PATENTS", "PATENTS[.-]*",
"UNLICEN[CS]E", "UNLICEN[CS]E[.-]*",
"agpl[.-]*",
"gpl[.-]*",
"lgpl[.-]*",
"AGPL-*[0-9]*",
"APACHE-*[0-9]*",
"BSD-*[0-9]*",
"CC-BY-*",
"GFDL-*[0-9]*",
"GNU-*[0-9]*",
"GPL-*[0-9]*",
"LGPL-*[0-9]*",
"MIT-*[0-9]*",
"MPL-*[0-9]*",
"OFL-*[0-9]*",
]),
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
("lock", &["*.lock", "package-lock.json"]),
("log", &["*.log"]),
("lua", &["*.lua"]),
("lzma", &["*.lzma"]),
("lz4", &["*.lz4"]),
("m4", &["*.ac", "*.m4"]),
("make", &[
"[Gg][Nn][Uu]makefile", "[Mm]akefile",
"[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
"[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
"*.mk", "*.mak"
]),
("mako", &["*.mako", "*.mao"]),
("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
("matlab", &["*.m"]),
("mk", &["mkfile"]),
("ml", &["*.ml"]),
("msbuild", &[
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets"
]),
("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
("nix", &["*.nix"]),
("objc", &["*.h", "*.m"]),
("objcpp", &["*.h", "*.mm"]),
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
("org", &["*.org"]),
("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
("pdf", &["*.pdf"]),
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
("pod", &["*.pod"]),
("postscript", &[".eps", ".ps"]),
("protobuf", &["*.proto"]),
("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
("puppet", &["*.erb", "*.pp", "*.rb"]),
("purs", &["*.purs"]),
("py", &["*.py"]),
("qmake", &["*.pro", "*.pri", "*.prf"]),
("qml", &["*.qml"]),
("readme", &["README*", "*README"]),
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
("rdoc", &["*.rdoc"]),
("robot", &["*.robot"]),
("rst", &["*.rst"]),
("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
("rust", &["*.rs"]),
("sass", &["*.sass", "*.scss"]),
("scala", &["*.scala", "*.sbt"]),
("sh", &[
".login", ".logout", ".profile", "profile",
".bash_login", "bash_login",
".bash_logout", "bash_logout",
".bash_profile", "bash_profile",
".bashrc", "bashrc", "*.bashrc",
".cshrc", "*.cshrc",
".kshrc", "*.kshrc",
".tcshrc",
".zshenv", "zshenv",
".zlogin", "zlogin",
".zlogout", "zlogout",
".zprofile", "zprofile",
".zshrc", "zshrc",
"*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
]),
("smarty", &["*.tpl"]),
("sml", &["*.sml", "*.sig"]),
("soy", &["*.soy"]),
("spark", &["*.spark"]),
("sql", &["*.sql", "*.psql"]),
("stylus", &["*.styl"]),
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
("svg", &["*.svg"]),
("swift", &["*.swift"]),
("swig", &["*.def", "*.i"]),
("systemd", &[
"*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path",
"*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target",
"*.timer",
]),
("taskpaper", &["*.taskpaper"]),
("tcl", &["*.tcl"]),
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
("textile", &["*.textile"]),
("thrift", &["*.thrift"]),
("tf", &["*.tf"]),
("ts", &["*.ts", "*.tsx"]),
("txt", &["*.txt"]),
("toml", &["*.toml", "Cargo.lock"]),
("twig", &["*.twig"]),
("vala", &["*.vala"]),
("vb", &["*.vb"]),
("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]),
("vhdl", &["*.vhd", "*.vhdl"]),
("vim", &["*.vim"]),
("vimscript", &["*.vim"]),
("wiki", &["*.mediawiki", "*.wiki"]),
("webidl", &["*.idl", "*.webidl", "*.widl"]),
("xml", &[
"*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
"*.rng", "*.sch",
]),
("xz", &["*.xz", "*.txz"]),
("yacc", &["*.y"]),
("yaml", &["*.yaml", "*.yml"]),
("zig", &["*.zig"]),
("zsh", &[
".zshenv", "zshenv",
".zlogin", "zlogin",
".zlogout", "zlogout",
".zprofile", "zprofile",
".zshrc", "zshrc",
"*.zsh",
]),
("zstd", &["*.zst", "*.zstd"]),
];
#[derive(Clone, Debug)]
pub struct Glob<'a>(GlobInner<'a>);
#[derive(Clone, Debug)]
enum GlobInner<'a> {
UnmatchedIgnore,
Matched {
def: &'a FileTypeDef,
which: usize,
negated: bool,
}
}
impl<'a> Glob<'a> {
fn unmatched() -> Glob<'a> {
Glob(GlobInner::UnmatchedIgnore)
}
pub fn file_type_def(&self) -> Option<&FileTypeDef> {
match self {
Glob(GlobInner::UnmatchedIgnore) => None,
Glob(GlobInner::Matched { def, .. }) => {
Some(def)
},
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct FileTypeDef {
name: String,
globs: Vec<String>,
}
impl FileTypeDef {
pub fn name(&self) -> &str {
&self.name
}
pub fn globs(&self) -> &[String] {
&self.globs
}
}
#[derive(Clone, Debug)]
pub struct Types {
defs: Vec<FileTypeDef>,
selections: Vec<Selection<FileTypeDef>>,
has_selected: bool,
glob_to_selection: Vec<(usize, usize)>,
set: GlobSet,
matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
}
#[derive(Clone, Debug)]
enum Selection<T> {
Select(String, T),
Negate(String, T),
}
impl<T> Selection<T> {
fn is_negated(&self) -> bool {
match *self {
Selection::Select(..) => false,
Selection::Negate(..) => true,
}
}
fn name(&self) -> &str {
match *self {
Selection::Select(ref name, _) => name,
Selection::Negate(ref name, _) => name,
}
}
fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
match self {
Selection::Select(name, inner) => {
Selection::Select(name, f(inner))
}
Selection::Negate(name, inner) => {
Selection::Negate(name, f(inner))
}
}
}
fn inner(&self) -> &T {
match *self {
Selection::Select(_, ref inner) => inner,
Selection::Negate(_, ref inner) => inner,
}
}
}
impl Types {
pub fn empty() -> Types {
Types {
defs: vec![],
selections: vec![],
has_selected: false,
glob_to_selection: vec![],
set: GlobSetBuilder::new().build().unwrap(),
matches: Arc::new(ThreadLocal::default()),
}
}
pub fn is_empty(&self) -> bool {
self.selections.is_empty()
}
pub fn len(&self) -> usize {
self.selections.len()
}
pub fn definitions(&self) -> &[FileTypeDef] {
&self.defs
}
pub fn matched<'a, P: AsRef<Path>>(
&'a self,
path: P,
is_dir: bool,
) -> Match<Glob<'a>> {
if is_dir || self.set.is_empty() {
return Match::None;
}
let name = match file_name(path.as_ref()) {
Some(name) => name,
None if self.has_selected => {
return Match::Ignore(Glob::unmatched());
}
None => {
return Match::None;
}
};
let mut matches = self.matches.get_or_default().borrow_mut();
self.set.matches_into(name, &mut *matches);
if let Some(&i) = matches.last() {
let (isel, iglob) = self.glob_to_selection[i];
let sel = &self.selections[isel];
let glob = Glob(GlobInner::Matched {
def: sel.inner(),
which: iglob,
negated: sel.is_negated(),
});
return if sel.is_negated() {
Match::Ignore(glob)
} else {
Match::Whitelist(glob)
};
}
if self.has_selected {
Match::Ignore(Glob::unmatched())
} else {
Match::None
}
}
}
pub struct TypesBuilder {
types: HashMap<String, FileTypeDef>,
selections: Vec<Selection<()>>,
}
impl TypesBuilder {
pub fn new() -> TypesBuilder {
TypesBuilder {
types: HashMap::new(),
selections: vec![],
}
}
pub fn build(&self) -> Result<Types, Error> {
let defs = self.definitions();
let has_selected = self.selections.iter().any(|s| !s.is_negated());
let mut selections = vec![];
let mut glob_to_selection = vec![];
let mut build_set = GlobSetBuilder::new();
for (isel, selection) in self.selections.iter().enumerate() {
let def = match self.types.get(selection.name()) {
Some(def) => def.clone(),
None => {
let name = selection.name().to_string();
return Err(Error::UnrecognizedFileType(name));
}
};
for (iglob, glob) in def.globs.iter().enumerate() {
build_set.add(
GlobBuilder::new(glob)
.literal_separator(true)
.build()
.map_err(|err| {
Error::Glob {
glob: Some(glob.to_string()),
err: err.kind().to_string(),
}
})?);
glob_to_selection.push((isel, iglob));
}
selections.push(selection.clone().map(move |_| def));
}
let set = build_set.build().map_err(|err| {
Error::Glob { glob: None, err: err.to_string() }
})?;
Ok(Types {
defs: defs,
selections: selections,
has_selected: has_selected,
glob_to_selection: glob_to_selection,
set: set,
matches: Arc::new(ThreadLocal::default()),
})
}
pub fn definitions(&self) -> Vec<FileTypeDef> {
let mut defs = vec![];
for def in self.types.values() {
let mut def = def.clone();
def.globs.sort();
defs.push(def);
}
defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
defs
}
pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
if name == "all" {
for name in self.types.keys() {
self.selections.push(Selection::Select(name.to_string(), ()));
}
} else {
self.selections.push(Selection::Select(name.to_string(), ()));
}
self
}
pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
if name == "all" {
for name in self.types.keys() {
self.selections.push(Selection::Negate(name.to_string(), ()));
}
} else {
self.selections.push(Selection::Negate(name.to_string(), ()));
}
self
}
pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
self.types.remove(name);
self
}
pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
lazy_static! {
static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap();
};
if name == "all" || !RE.is_match(name) {
return Err(Error::InvalidDefinition);
}
let (key, glob) = (name.to_string(), glob.to_string());
self.types.entry(key).or_insert_with(|| {
FileTypeDef { name: name.to_string(), globs: vec![] }
}).globs.push(glob);
Ok(())
}
pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
let parts: Vec<&str> = def.split(':').collect();
match parts.len() {
2 => {
let name = parts[0];
let glob = parts[1];
if name.is_empty() || glob.is_empty() {
return Err(Error::InvalidDefinition);
}
self.add(name, glob)
}
3 => {
let name = parts[0];
let types_string = parts[2];
if name.is_empty() || parts[1] != "include" || types_string.is_empty() {
return Err(Error::InvalidDefinition);
}
let types = types_string.split(',');
if types.clone().any(|t| !self.types.contains_key(t)) {
return Err(Error::InvalidDefinition);
}
for type_name in types {
let globs = self.types.get(type_name).unwrap().globs.clone();
for glob in globs {
self.add(name, &glob)?;
}
}
Ok(())
}
_ => Err(Error::InvalidDefinition)
}
}
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
static MSG: &'static str = "adding a default type should never fail";
for &(name, exts) in DEFAULT_TYPES {
for ext in exts {
self.add(name, ext).expect(MSG);
}
}
self
}
}
#[cfg(test)]
mod tests {
use super::TypesBuilder;
macro_rules! matched {
($name:ident, $types:expr, $sel:expr, $selnot:expr,
$path:expr) => {
matched!($name, $types, $sel, $selnot, $path, true);
};
(not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
$path:expr) => {
matched!($name, $types, $sel, $selnot, $path, false);
};
($name:ident, $types:expr, $sel:expr, $selnot:expr,
$path:expr, $matched:expr) => {
#[test]
fn $name() {
let mut btypes = TypesBuilder::new();
for tydef in $types {
btypes.add_def(tydef).unwrap();
}
for sel in $sel {
btypes.select(sel);
}
for selnot in $selnot {
btypes.negate(selnot);
}
let types = btypes.build().unwrap();
let mat = types.matched($path, false);
assert_eq!($matched, !mat.is_ignore());
}
};
}
fn types() -> Vec<&'static str> {
vec![
"html:*.html",
"html:*.htm",
"rust:*.rs",
"js:*.js",
"foo:*.{rs,foo}",
"combo:include:html,rust"
]
}
matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
matched!(match2, types(), vec!["html"], vec![], "index.html");
matched!(match3, types(), vec!["html"], vec![], "index.htm");
matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
matched!(match5, types(), vec![], vec![], "index.html");
matched!(match6, types(), vec![], vec!["rust"], "index.html");
matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
matched!(match8, types(), vec!["combo"], vec![], "index.html");
matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
#[test]
fn test_invalid_defs() {
let mut btypes = TypesBuilder::new();
for tydef in types() {
btypes.add_def(tydef).unwrap();
}
let original_defs = btypes.definitions();
let bad_defs = vec![
"combo:include:html,python",
"combo:foobar:html,rust",
""
];
for def in bad_defs {
assert!(btypes.add_def(def).is_err());
assert_eq!(btypes.definitions(), original_defs);
}
}
}