1use std::collections::HashMap;
8use std::sync::LazyLock;
9
10pub struct LinguistResolver {
12 alias_map: &'static HashMap<&'static str, &'static str>,
14}
15
16impl LinguistResolver {
17 pub fn new() -> Self {
19 Self {
20 alias_map: &LANGUAGE_ALIASES,
21 }
22 }
23
24 pub fn resolve(&self, language: &str) -> String {
29 let lower = language.to_lowercase();
30 self.alias_map
31 .get(lower.as_str())
32 .map(|&s| s.to_string())
33 .unwrap_or(lower)
34 }
35
36 pub fn is_known(&self, language: &str) -> bool {
38 let lower = language.to_lowercase();
39 self.alias_map.contains_key(lower.as_str())
40 }
41}
42
43impl Default for LinguistResolver {
44 fn default() -> Self {
45 Self::new()
46 }
47}
48
49static LANGUAGE_ALIASES: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
59 let mut m = HashMap::new();
60
61 m.insert("python", "python");
63 m.insert("py", "python");
64 m.insert("python3", "python");
65 m.insert("py3", "python");
66 m.insert("pyw", "python");
67
68 m.insert("javascript", "javascript");
70 m.insert("js", "javascript");
71 m.insert("node", "javascript");
72 m.insert("nodejs", "javascript");
73 m.insert("mjs", "javascript");
74 m.insert("cjs", "javascript");
75
76 m.insert("typescript", "typescript");
78 m.insert("ts", "typescript");
79 m.insert("mts", "typescript");
80 m.insert("cts", "typescript");
81
82 m.insert("shell", "shell");
84 m.insert("bash", "shell");
85 m.insert("sh", "shell");
86 m.insert("zsh", "shell");
87 m.insert("ksh", "shell");
88 m.insert("fish", "shell");
89 m.insert("shellscript", "shell");
90 m.insert("shell-script", "shell");
91
92 m.insert("rust", "rust");
94 m.insert("rs", "rust");
95
96 m.insert("go", "go");
98 m.insert("golang", "go");
99
100 m.insert("ruby", "ruby");
102 m.insert("rb", "ruby");
103 m.insert("jruby", "ruby");
104
105 m.insert("java", "java");
107
108 m.insert("kotlin", "kotlin");
110 m.insert("kt", "kotlin");
111 m.insert("kts", "kotlin");
112
113 m.insert("scala", "scala");
115
116 m.insert("c", "c");
118 m.insert("h", "c");
119
120 m.insert("c++", "cpp");
122 m.insert("cpp", "cpp");
123 m.insert("cxx", "cpp");
124 m.insert("cc", "cpp");
125 m.insert("hpp", "cpp");
126 m.insert("hxx", "cpp");
127
128 m.insert("c#", "csharp");
130 m.insert("csharp", "csharp");
131 m.insert("cs", "csharp");
132
133 m.insert("f#", "fsharp");
135 m.insert("fsharp", "fsharp");
136 m.insert("fs", "fsharp");
137
138 m.insert("swift", "swift");
140
141 m.insert("objective-c", "objective-c");
143 m.insert("objc", "objective-c");
144 m.insert("obj-c", "objective-c");
145
146 m.insert("php", "php");
148
149 m.insert("perl", "perl");
151 m.insert("pl", "perl");
152
153 m.insert("r", "r");
155
156 m.insert("lua", "lua");
158
159 m.insert("haskell", "haskell");
161 m.insert("hs", "haskell");
162
163 m.insert("elixir", "elixir");
165 m.insert("ex", "elixir");
166 m.insert("exs", "elixir");
167
168 m.insert("erlang", "erlang");
170 m.insert("erl", "erlang");
171
172 m.insert("clojure", "clojure");
174 m.insert("clj", "clojure");
175 m.insert("cljs", "clojure");
176 m.insert("cljc", "clojure");
177
178 m.insert("html", "html");
180 m.insert("htm", "html");
181 m.insert("xhtml", "html");
182
183 m.insert("css", "css");
185
186 m.insert("scss", "scss");
188 m.insert("sass", "sass");
189
190 m.insert("less", "less");
192
193 m.insert("json", "json");
195 m.insert("jsonc", "json");
196 m.insert("json5", "json");
197
198 m.insert("yaml", "yaml");
200 m.insert("yml", "yaml");
201
202 m.insert("toml", "toml");
204
205 m.insert("xml", "xml");
207 m.insert("xsd", "xml");
208 m.insert("xsl", "xml");
209 m.insert("xslt", "xml");
210
211 m.insert("markdown", "markdown");
213 m.insert("md", "markdown");
214 m.insert("mkd", "markdown");
215 m.insert("mdx", "markdown");
216
217 m.insert("sql", "sql");
219 m.insert("mysql", "sql");
220 m.insert("postgresql", "sql");
221 m.insert("postgres", "sql");
222 m.insert("sqlite", "sql");
223 m.insert("plsql", "sql");
224 m.insert("tsql", "sql");
225
226 m.insert("graphql", "graphql");
228 m.insert("gql", "graphql");
229
230 m.insert("protobuf", "protobuf");
232 m.insert("proto", "protobuf");
233
234 m.insert("terraform", "terraform");
236 m.insert("tf", "terraform");
237 m.insert("hcl", "hcl");
238
239 m.insert("dockerfile", "dockerfile");
241 m.insert("docker", "dockerfile");
242
243 m.insert("makefile", "makefile");
245 m.insert("make", "makefile");
246
247 m.insert("nix", "nix");
249
250 m.insert("vim", "vim");
252 m.insert("viml", "vim");
253 m.insert("vimscript", "vim");
254
255 m.insert("zig", "zig");
257
258 m.insert("nim", "nim");
260
261 m.insert("julia", "julia");
263 m.insert("jl", "julia");
264
265 m.insert("ocaml", "ocaml");
267 m.insert("ml", "ocaml");
268
269 m.insert("reason", "reason");
271 m.insert("re", "reason");
272
273 m.insert("dart", "dart");
275
276 m.insert("v", "v");
278 m.insert("vlang", "v");
279
280 m.insert("awk", "awk");
282 m.insert("gawk", "awk");
283
284 m.insert("sed", "sed");
286
287 m.insert("powershell", "powershell");
289 m.insert("pwsh", "powershell");
290 m.insert("ps1", "powershell");
291
292 m.insert("batch", "batch");
294 m.insert("bat", "batch");
295 m.insert("cmd", "batch");
296
297 m.insert("diff", "diff");
299 m.insert("patch", "diff");
300
301 m.insert("ini", "ini");
303 m.insert("cfg", "ini");
304 m.insert("conf", "ini");
305
306 m.insert("applescript", "applescript");
308
309 m.insert("groovy", "groovy");
311
312 m.insert("latex", "latex");
314 m.insert("tex", "latex");
315
316 m.insert("text", "text");
318 m.insert("txt", "text");
319 m.insert("plaintext", "text");
320 m.insert("plain", "text");
321
322 m
323});
324
325#[cfg(test)]
326mod tests {
327 use super::*;
328
329 #[test]
330 fn test_resolve_known_alias() {
331 let resolver = LinguistResolver::new();
332
333 assert_eq!(resolver.resolve("py"), "python");
335 assert_eq!(resolver.resolve("python3"), "python");
336 assert_eq!(resolver.resolve("Python"), "python");
337 assert_eq!(resolver.resolve("PY"), "python");
338
339 assert_eq!(resolver.resolve("bash"), "shell");
341 assert_eq!(resolver.resolve("sh"), "shell");
342 assert_eq!(resolver.resolve("zsh"), "shell");
343
344 assert_eq!(resolver.resolve("js"), "javascript");
346 assert_eq!(resolver.resolve("node"), "javascript");
347
348 assert_eq!(resolver.resolve("rs"), "rust");
350 assert_eq!(resolver.resolve("Rust"), "rust");
351 }
352
353 #[test]
354 fn test_resolve_unknown_language() {
355 let resolver = LinguistResolver::new();
356
357 assert_eq!(resolver.resolve("UnknownLang"), "unknownlang");
359 assert_eq!(resolver.resolve("CUSTOM"), "custom");
360 }
361
362 #[test]
363 fn test_resolve_canonical_name() {
364 let resolver = LinguistResolver::new();
365
366 assert_eq!(resolver.resolve("python"), "python");
368 assert_eq!(resolver.resolve("javascript"), "javascript");
369 assert_eq!(resolver.resolve("rust"), "rust");
370 }
371
372 #[test]
373 fn test_is_known() {
374 let resolver = LinguistResolver::new();
375
376 assert!(resolver.is_known("python"));
377 assert!(resolver.is_known("py"));
378 assert!(resolver.is_known("bash"));
379 assert!(resolver.is_known("JavaScript"));
380
381 assert!(!resolver.is_known("unknownlang"));
382 assert!(!resolver.is_known("customformat"));
383 }
384
385 #[test]
386 fn test_case_insensitivity() {
387 let resolver = LinguistResolver::new();
388
389 assert_eq!(resolver.resolve("PYTHON"), "python");
390 assert_eq!(resolver.resolve("Python"), "python");
391 assert_eq!(resolver.resolve("pYtHoN"), "python");
392 assert_eq!(resolver.resolve("JAVASCRIPT"), "javascript");
393 assert_eq!(resolver.resolve("JavaScript"), "javascript");
394 }
395}