use std::collections::HashMap;
use std::sync::LazyLock;
pub struct LinguistResolver {
alias_map: &'static HashMap<&'static str, &'static str>,
}
impl LinguistResolver {
pub fn new() -> Self {
Self {
alias_map: &LANGUAGE_ALIASES,
}
}
pub fn resolve(&self, language: &str) -> String {
let lower = language.to_lowercase();
self.alias_map
.get(lower.as_str())
.map(|&s| s.to_string())
.unwrap_or(lower)
}
pub fn is_known(&self, language: &str) -> bool {
let lower = language.to_lowercase();
self.alias_map.contains_key(lower.as_str())
}
}
impl Default for LinguistResolver {
fn default() -> Self {
Self::new()
}
}
static LANGUAGE_ALIASES: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
let mut m = HashMap::new();
m.insert("python", "python");
m.insert("py", "python");
m.insert("python3", "python");
m.insert("py3", "python");
m.insert("pyw", "python");
m.insert("javascript", "javascript");
m.insert("js", "javascript");
m.insert("node", "javascript");
m.insert("nodejs", "javascript");
m.insert("mjs", "javascript");
m.insert("cjs", "javascript");
m.insert("typescript", "typescript");
m.insert("ts", "typescript");
m.insert("mts", "typescript");
m.insert("cts", "typescript");
m.insert("shell", "shell");
m.insert("bash", "shell");
m.insert("sh", "shell");
m.insert("zsh", "shell");
m.insert("ksh", "shell");
m.insert("fish", "shell");
m.insert("shellscript", "shell");
m.insert("shell-script", "shell");
m.insert("rust", "rust");
m.insert("rs", "rust");
m.insert("go", "go");
m.insert("golang", "go");
m.insert("ruby", "ruby");
m.insert("rb", "ruby");
m.insert("jruby", "ruby");
m.insert("java", "java");
m.insert("kotlin", "kotlin");
m.insert("kt", "kotlin");
m.insert("kts", "kotlin");
m.insert("scala", "scala");
m.insert("c", "c");
m.insert("h", "c");
m.insert("c++", "cpp");
m.insert("cpp", "cpp");
m.insert("cxx", "cpp");
m.insert("cc", "cpp");
m.insert("hpp", "cpp");
m.insert("hxx", "cpp");
m.insert("c#", "csharp");
m.insert("csharp", "csharp");
m.insert("cs", "csharp");
m.insert("f#", "fsharp");
m.insert("fsharp", "fsharp");
m.insert("fs", "fsharp");
m.insert("swift", "swift");
m.insert("objective-c", "objective-c");
m.insert("objc", "objective-c");
m.insert("obj-c", "objective-c");
m.insert("php", "php");
m.insert("perl", "perl");
m.insert("pl", "perl");
m.insert("r", "r");
m.insert("lua", "lua");
m.insert("haskell", "haskell");
m.insert("hs", "haskell");
m.insert("elixir", "elixir");
m.insert("ex", "elixir");
m.insert("exs", "elixir");
m.insert("erlang", "erlang");
m.insert("erl", "erlang");
m.insert("clojure", "clojure");
m.insert("clj", "clojure");
m.insert("cljs", "clojure");
m.insert("cljc", "clojure");
m.insert("html", "html");
m.insert("htm", "html");
m.insert("xhtml", "html");
m.insert("css", "css");
m.insert("scss", "scss");
m.insert("sass", "sass");
m.insert("less", "less");
m.insert("json", "json");
m.insert("jsonc", "json");
m.insert("json5", "json");
m.insert("yaml", "yaml");
m.insert("yml", "yaml");
m.insert("toml", "toml");
m.insert("xml", "xml");
m.insert("xsd", "xml");
m.insert("xsl", "xml");
m.insert("xslt", "xml");
m.insert("markdown", "markdown");
m.insert("md", "markdown");
m.insert("mkd", "markdown");
m.insert("mdx", "markdown");
m.insert("sql", "sql");
m.insert("mysql", "sql");
m.insert("postgresql", "sql");
m.insert("postgres", "sql");
m.insert("sqlite", "sql");
m.insert("plsql", "sql");
m.insert("tsql", "sql");
m.insert("graphql", "graphql");
m.insert("gql", "graphql");
m.insert("protobuf", "protobuf");
m.insert("proto", "protobuf");
m.insert("terraform", "terraform");
m.insert("tf", "terraform");
m.insert("hcl", "hcl");
m.insert("dockerfile", "dockerfile");
m.insert("docker", "dockerfile");
m.insert("makefile", "makefile");
m.insert("make", "makefile");
m.insert("nix", "nix");
m.insert("vim", "vim");
m.insert("viml", "vim");
m.insert("vimscript", "vim");
m.insert("zig", "zig");
m.insert("nim", "nim");
m.insert("julia", "julia");
m.insert("jl", "julia");
m.insert("ocaml", "ocaml");
m.insert("ml", "ocaml");
m.insert("reason", "reason");
m.insert("re", "reason");
m.insert("dart", "dart");
m.insert("v", "v");
m.insert("vlang", "v");
m.insert("awk", "awk");
m.insert("gawk", "awk");
m.insert("sed", "sed");
m.insert("powershell", "powershell");
m.insert("pwsh", "powershell");
m.insert("ps1", "powershell");
m.insert("batch", "batch");
m.insert("bat", "batch");
m.insert("cmd", "batch");
m.insert("diff", "diff");
m.insert("patch", "diff");
m.insert("ini", "ini");
m.insert("cfg", "ini");
m.insert("conf", "ini");
m.insert("applescript", "applescript");
m.insert("groovy", "groovy");
m.insert("latex", "latex");
m.insert("tex", "latex");
m.insert("text", "text");
m.insert("txt", "text");
m.insert("plaintext", "text");
m.insert("plain", "text");
m
});
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_resolve_known_alias() {
let resolver = LinguistResolver::new();
assert_eq!(resolver.resolve("py"), "python");
assert_eq!(resolver.resolve("python3"), "python");
assert_eq!(resolver.resolve("Python"), "python");
assert_eq!(resolver.resolve("PY"), "python");
assert_eq!(resolver.resolve("bash"), "shell");
assert_eq!(resolver.resolve("sh"), "shell");
assert_eq!(resolver.resolve("zsh"), "shell");
assert_eq!(resolver.resolve("js"), "javascript");
assert_eq!(resolver.resolve("node"), "javascript");
assert_eq!(resolver.resolve("rs"), "rust");
assert_eq!(resolver.resolve("Rust"), "rust");
}
#[test]
fn test_resolve_unknown_language() {
let resolver = LinguistResolver::new();
assert_eq!(resolver.resolve("UnknownLang"), "unknownlang");
assert_eq!(resolver.resolve("CUSTOM"), "custom");
}
#[test]
fn test_resolve_canonical_name() {
let resolver = LinguistResolver::new();
assert_eq!(resolver.resolve("python"), "python");
assert_eq!(resolver.resolve("javascript"), "javascript");
assert_eq!(resolver.resolve("rust"), "rust");
}
#[test]
fn test_is_known() {
let resolver = LinguistResolver::new();
assert!(resolver.is_known("python"));
assert!(resolver.is_known("py"));
assert!(resolver.is_known("bash"));
assert!(resolver.is_known("JavaScript"));
assert!(!resolver.is_known("unknownlang"));
assert!(!resolver.is_known("customformat"));
}
#[test]
fn test_case_insensitivity() {
let resolver = LinguistResolver::new();
assert_eq!(resolver.resolve("PYTHON"), "python");
assert_eq!(resolver.resolve("Python"), "python");
assert_eq!(resolver.resolve("pYtHoN"), "python");
assert_eq!(resolver.resolve("JAVASCRIPT"), "javascript");
assert_eq!(resolver.resolve("JavaScript"), "javascript");
}
}