use once_cell::sync::Lazy;
use std::collections::HashMap;
pub static LANGUAGE_MAP: Lazy<HashMap<&'static str, Vec<&'static str>>> = Lazy::new(|| {
let mut m = HashMap::new();
m.insert("python", vec![".py", ".pyw", ".pyi"]);
m.insert("javascript", vec![".js", ".mjs", ".cjs"]);
m.insert("typescript", vec![".ts", ".tsx", ".mts"]);
m.insert("rust", vec![".rs"]);
m.insert("go", vec![".go"]);
m.insert("java", vec![".java"]);
m.insert("kotlin", vec![".kt", ".kts"]);
m.insert("swift", vec![".swift"]);
m.insert("c", vec![".c", ".h"]);
m.insert("cpp", vec![".cpp", ".cc", ".cxx", ".hpp", ".hxx", ".h++"]);
m.insert("csharp", vec![".cs"]);
m.insert("ruby", vec![".rb", ".rake", ".gemspec"]);
m.insert("php", vec![".php", ".php3", ".php4", ".php5", ".phtml"]);
m.insert("html", vec![".html", ".htm"]);
m.insert("css", vec![".css", ".scss", ".sass", ".less"]);
m.insert("shell", vec![".sh", ".bash", ".zsh", ".fish"]);
m.insert("sql", vec![".sql"]);
m.insert("markdown", vec![".md", ".markdown", ".mdx"]);
m.insert("json", vec![".json", ".jsonl", ".json5"]);
m.insert("yaml", vec![".yml", ".yaml"]);
m.insert("xml", vec![".xml", ".xsl", ".xslt"]);
m.insert("jsx", vec![".jsx"]);
m.insert("vue", vec![".vue"]);
m.insert("svelte", vec![".svelte"]);
m.insert("toml", vec![".toml"]);
m.insert("scala", vec![".scala", ".sc"]);
m.insert("haskell", vec![".hs", ".lhs"]);
m.insert("elixir", vec![".ex", ".exs"]);
m.insert("lua", vec![".lua"]);
m.insert("zig", vec![".zig"]);
m.insert("nim", vec![".nim", ".nims"]);
m
});
static ALIASES: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
let mut m = HashMap::new();
m.insert("py", "python");
m.insert("js", "javascript");
m.insert("ts", "typescript");
m.insert("tsx", "typescript");
m.insert("rs", "rust");
m.insert("c++", "cpp");
m.insert("cxx", "cpp");
m.insert("cc", "cpp");
m.insert("cs", "csharp");
m.insert("rb", "ruby");
m.insert("sh", "shell");
m.insert("bash", "shell");
m.insert("zsh", "shell");
m.insert("md", "markdown");
m.insert("yml", "yaml");
m.insert("kt", "kotlin");
m.insert("hs", "haskell");
m
});
pub fn resolve_extensions(input: &str) -> Vec<String> {
let lower = input.to_lowercase();
if lower.starts_with('.') {
return vec![lower];
}
let canonical = ALIASES
.get(lower.as_str())
.copied()
.unwrap_or(lower.as_str());
if let Some(exts) = LANGUAGE_MAP.get(canonical) {
return exts.iter().map(|e| e.to_string()).collect();
}
vec![format!(".{}", lower)]
}
pub static BINARY_EXTENSIONS: Lazy<std::collections::HashSet<&'static str>> = Lazy::new(|| {
[
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".pdf", ".zip", ".tar", ".gz", ".bz2",
".xz", ".rar", ".7z", ".exe", ".dll", ".so", ".dylib", ".bin", ".wasm", ".mp3", ".mp4",
".avi", ".mov", ".wav", ".flac", ".ogg", ".ttf", ".otf", ".woff", ".woff2", ".eot", ".pyc",
".pyo", ".class", ".o", ".a", ".lib", ".db", ".sqlite", ".sqlite3",
]
.iter()
.copied()
.collect()
});
#[derive(Debug, Clone, Copy)]
pub struct CommentSpec {
pub single: Option<&'static str>,
pub multi: Option<(&'static str, &'static str)>,
}
pub static COMMENT_REGISTRY: Lazy<HashMap<&'static str, CommentSpec>> = Lazy::new(|| {
let mut m = HashMap::new();
let c_style = CommentSpec {
single: Some("//"),
multi: Some(("/*", "*/")),
};
let bash_style = CommentSpec {
single: Some("#"),
multi: None,
};
let py_style = CommentSpec {
single: Some("#"),
multi: Some(("\"\"\"", "\"\"\"")), };
let html_style = CommentSpec {
single: None,
multi: Some(("<!--", "-->")),
};
let sql_style = CommentSpec {
single: Some("--"),
multi: Some(("/*", "*/")),
};
let lua_style = CommentSpec {
single: Some("--"),
multi: Some(("--[[", "]]")),
};
let haskell_style = CommentSpec {
single: Some("--"),
multi: Some(("{-", "-}")),
};
let ruby_style = CommentSpec {
single: Some("#"),
multi: Some(("=begin", "=end")),
};
let mappings = [
(
vec![
".rs", ".go", ".java", ".kt", ".kts", ".swift", ".c", ".h", ".cpp", ".cc", ".cxx",
".hpp", ".hxx", ".h++", ".cs", ".js", ".mjs", ".cjs", ".ts", ".tsx", ".mts",
".php", ".scala", ".sc", ".zig",
],
c_style,
),
(vec![".py", ".pyw", ".pyi"], py_style),
(
vec![
".sh", ".bash", ".zsh", ".fish", ".rb", ".rake", ".gemspec", ".yaml", ".yml",
".toml", ".ex", ".exs", ".nim", ".nims",
],
bash_style,
),
(
vec![".html", ".htm", ".xml", ".xsl", ".xslt", ".vue", ".svelte"],
html_style,
),
(vec![".sql"], sql_style),
(vec![".lua"], lua_style),
(vec![".hs", ".lhs"], haskell_style),
(vec![".rb"], ruby_style), ];
for (exts, spec) in mappings {
for ext in exts {
m.insert(ext, spec);
}
}
m
});
pub static EXCLUDED_DIRS: Lazy<std::collections::HashSet<&'static str>> = Lazy::new(|| {
[
"node_modules",
".git",
"vendor",
".venv",
"venv",
"__pycache__",
"dist",
"build",
".next",
".nuxt",
"target",
"bin",
"obj",
".gradle",
".idea",
".vscode",
"coverage",
".pytest_cache",
".mypy_cache",
".tox",
"eggs",
".eggs",
".cargo",
]
.iter()
.copied()
.collect()
});
#[allow(dead_code)]
pub fn all_languages() -> Vec<&'static str> {
let mut langs: Vec<_> = LANGUAGE_MAP.keys().copied().collect();
langs.sort_unstable();
langs
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_resolve_extensions_basic() {
assert_eq!(resolve_extensions("rust"), vec![".rs".to_string()]);
assert_eq!(resolve_extensions("rs"), vec![".rs".to_string()]);
assert_eq!(resolve_extensions(".rs"), vec![".rs".to_string()]);
}
#[test]
fn test_resolve_extensions_aliases() {
assert_eq!(
resolve_extensions("py"),
vec![".py", ".pyw", ".pyi"]
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
);
assert_eq!(
resolve_extensions("javascript"),
vec![".js", ".mjs", ".cjs"]
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
);
assert_eq!(
resolve_extensions("js"),
vec![".js", ".mjs", ".cjs"]
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
);
}
#[test]
fn test_resolve_extensions_case_insensitive() {
assert_eq!(resolve_extensions("RUST"), vec![".rs".to_string()]);
assert_eq!(
resolve_extensions("Py"),
vec![".py", ".pyw", ".pyi"]
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
);
}
#[test]
fn test_resolve_extensions_unknown() {
assert_eq!(resolve_extensions("xyzzy"), vec![".xyzzy".to_string()]);
}
#[test]
fn test_all_languages() {
let langs = all_languages();
assert!(langs.contains(&"rust"));
assert!(langs.contains(&"python"));
assert!(langs.is_sorted());
}
}