1use std::collections::HashMap;
2
3use once_cell::sync::Lazy;
4
5#[derive(Debug, Clone, PartialEq, Eq, Hash)]
7pub struct LanguageSpec {
8 original: String,
9 canonical: String,
10}
11
12impl LanguageSpec {
13 pub fn new(token: impl Into<String>) -> Self {
14 let raw = token.into();
15 let canonical = canonical_language_id(&raw);
16 Self {
17 original: raw,
18 canonical,
19 }
20 }
21
22 pub fn canonical_id(&self) -> &str {
23 &self.canonical
24 }
25
26 pub fn original(&self) -> &str {
27 &self.original
28 }
29}
30
31impl std::fmt::Display for LanguageSpec {
32 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33 if self.original.eq_ignore_ascii_case(&self.canonical) {
34 write!(f, "{}", self.canonical)
35 } else {
36 write!(f, "{} ({})", self.canonical, self.original)
37 }
38 }
39}
40
41static ALIASES: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
42 let pairs: &[(&str, &str)] = &[
43 ("python", "python"),
44 ("py", "python"),
45 ("py3", "python"),
46 ("python3", "python"),
47 ("rust", "rust"),
48 ("rs", "rust"),
49 ("go", "go"),
50 ("golang", "go"),
51 ("csharp", "csharp"),
52 ("cs", "csharp"),
53 ("c#", "csharp"),
54 ("dotnet", "csharp"),
55 ("dotnetcore", "csharp"),
56 ("typescript", "typescript"),
57 ("ts", "typescript"),
58 ("ts-node", "typescript"),
59 ("javascript", "javascript"),
60 ("js", "javascript"),
61 ("node", "javascript"),
62 ("nodejs", "javascript"),
63 ("ecmascript", "javascript"),
64 ("groovy", "groovy"),
65 ("grv", "groovy"),
66 ("groovysh", "groovy"),
67 ("deno", "typescript"),
68 ("denojs", "typescript"),
69 ("ruby", "ruby"),
70 ("rb", "ruby"),
71 ("irb", "ruby"),
72 ("lua", "lua"),
73 ("luajit", "lua"),
74 ("bash", "bash"),
75 ("sh", "bash"),
76 ("shell", "bash"),
77 ("zsh", "bash"),
78 ("java", "java"),
79 ("c", "c"),
80 ("cpp", "cpp"),
81 ("c++", "cpp"),
82 ("php", "php"),
83 ("php-cli", "php"),
84 ("kotlin", "kotlin"),
85 ("kt", "kotlin"),
86 ("kts", "kotlin"),
87 ("r", "r"),
88 ("rscript", "r"),
89 ("cran", "r"),
90 ("dart", "dart"),
91 ("dartlang", "dart"),
92 ("flutter", "dart"),
93 ("swift", "swift"),
94 ("swiftlang", "swift"),
95 ("perl", "perl"),
96 ("pl", "perl"),
97 ("julia", "julia"),
98 ("jl", "julia"),
99 ("haskell", "haskell"),
100 ("hs", "haskell"),
101 ("ghci", "haskell"),
102 ("elixir", "elixir"),
103 ("ex", "elixir"),
104 ("exs", "elixir"),
105 ("iex", "elixir"),
106 ("zig", "zig"),
107 ("ziglang", "zig"),
108 ("crystal", "crystal"),
109 ("cr", "crystal"),
110 ("crystal-lang", "crystal"),
111 ("nim", "nim"),
112 ("nimlang", "nim"),
113 ];
114 pairs.iter().cloned().collect()
115});
116
117pub fn canonical_language_id(token: &str) -> String {
118 language_alias_lookup(token)
119 .unwrap_or_else(|| token.trim())
120 .to_ascii_lowercase()
121}
122
123pub fn language_alias_lookup(token: &str) -> Option<&'static str> {
124 let normalized = token.trim().to_ascii_lowercase();
125 ALIASES.get(normalized.as_str()).copied()
126}
127
128pub fn is_language_token(token: &str) -> bool {
129 language_alias_lookup(token).is_some()
130}
131
132pub fn known_canonical_languages() -> Vec<&'static str> {
134 let mut unique: Vec<_> = ALIASES.values().copied().collect();
135 unique.sort_unstable();
136 unique.dedup();
137 unique
138}