1use std::collections::HashMap;
2
3use once_cell::sync::Lazy;
4
5#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6pub struct LanguageSpec {
7 original: String,
8 canonical: String,
9}
10
11impl LanguageSpec {
12 pub fn new(token: impl Into<String>) -> Self {
13 let raw = token.into();
14 let canonical = canonical_language_id(&raw);
15 Self {
16 original: raw,
17 canonical,
18 }
19 }
20
21 pub fn canonical_id(&self) -> &str {
22 &self.canonical
23 }
24
25 pub fn original(&self) -> &str {
26 &self.original
27 }
28}
29
30impl std::fmt::Display for LanguageSpec {
31 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32 if self.original.eq_ignore_ascii_case(&self.canonical) {
33 write!(f, "{}", self.canonical)
34 } else {
35 write!(f, "{} ({})", self.canonical, self.original)
36 }
37 }
38}
39
40static ALIASES: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
41 let pairs: &[(&str, &str)] = &[
42 ("python", "python"),
43 ("py", "python"),
44 ("py3", "python"),
45 ("python3", "python"),
46 ("rust", "rust"),
47 ("rs", "rust"),
48 ("go", "go"),
49 ("golang", "go"),
50 ("csharp", "csharp"),
51 ("cs", "csharp"),
52 ("c#", "csharp"),
53 ("dotnet", "csharp"),
54 ("dotnetcore", "csharp"),
55 ("typescript", "typescript"),
56 ("ts", "typescript"),
57 ("ts-node", "typescript"),
58 ("javascript", "javascript"),
59 ("js", "javascript"),
60 ("node", "javascript"),
61 ("nodejs", "javascript"),
62 ("ecmascript", "javascript"),
63 ("groovy", "groovy"),
64 ("grv", "groovy"),
65 ("groovysh", "groovy"),
66 ("deno", "typescript"),
67 ("denojs", "typescript"),
68 ("ruby", "ruby"),
69 ("rb", "ruby"),
70 ("irb", "ruby"),
71 ("lua", "lua"),
72 ("luajit", "lua"),
73 ("bash", "bash"),
74 ("sh", "bash"),
75 ("shell", "bash"),
76 ("zsh", "bash"),
77 ("java", "java"),
78 ("c", "c"),
79 ("cpp", "cpp"),
80 ("c++", "cpp"),
81 ("php", "php"),
82 ("php-cli", "php"),
83 ("kotlin", "kotlin"),
84 ("kt", "kotlin"),
85 ("kts", "kotlin"),
86 ("r", "r"),
87 ("rscript", "r"),
88 ("cran", "r"),
89 ("dart", "dart"),
90 ("dartlang", "dart"),
91 ("flutter", "dart"),
92 ("swift", "swift"),
93 ("swiftlang", "swift"),
94 ("perl", "perl"),
95 ("pl", "perl"),
96 ("julia", "julia"),
97 ("jl", "julia"),
98 ("haskell", "haskell"),
99 ("hs", "haskell"),
100 ("ghci", "haskell"),
101 ("elixir", "elixir"),
102 ("ex", "elixir"),
103 ("exs", "elixir"),
104 ("iex", "elixir"),
105 ("zig", "zig"),
106 ("ziglang", "zig"),
107 ("crystal", "crystal"),
108 ("cr", "crystal"),
109 ("crystal-lang", "crystal"),
110 ("nim", "nim"),
111 ("nimlang", "nim"),
112 ];
113 pairs.iter().cloned().collect()
114});
115
116pub fn canonical_language_id(token: &str) -> String {
117 language_alias_lookup(token)
118 .unwrap_or_else(|| token.trim())
119 .to_ascii_lowercase()
120}
121
122pub fn language_alias_lookup(token: &str) -> Option<&'static str> {
123 let normalized = token.trim().to_ascii_lowercase();
124 ALIASES.get(normalized.as_str()).copied()
125}
126
127pub fn is_language_token(token: &str) -> bool {
128 language_alias_lookup(token).is_some()
129}
130
131pub fn known_canonical_languages() -> Vec<&'static str> {
132 let mut unique: Vec<_> = ALIASES.values().copied().collect();
133 unique.sort_unstable();
134 unique.dedup();
135 unique
136}