1use std::borrow::Cow;
11
12#[derive(Debug, Clone, Copy)]
13pub struct CodeLanguage {
14 pub canonical: &'static str,
16 pub aliases: &'static [&'static str],
18}
19
20pub const KNOWN_CODE_LANGUAGES: &[CodeLanguage] = &[
26 CodeLanguage {
27 canonical: "Rust",
28 aliases: &["rs", "rust"],
29 },
30 CodeLanguage {
31 canonical: "JavaScript",
32 aliases: &["js", "javascript", "jsx", "mjs", "cjs", "node"],
33 },
34 CodeLanguage {
35 canonical: "TypeScript",
36 aliases: &["ts", "tsx", "mts", "cts", "typescript"],
37 },
38 CodeLanguage {
39 canonical: "Python",
40 aliases: &["py", "python", "python3", "pycon"],
41 },
42 CodeLanguage {
43 canonical: "Bash",
44 aliases: &["sh", "bash", "zsh", "shell"],
45 },
46 CodeLanguage {
47 canonical: "HTML",
48 aliases: &["html", "htm"],
49 },
50 CodeLanguage {
51 canonical: "CSS",
52 aliases: &["css"],
53 },
54 CodeLanguage {
55 canonical: "JSON",
56 aliases: &["json", "jsonc", "json5"],
57 },
58 CodeLanguage {
59 canonical: "YAML",
60 aliases: &["yaml", "yml"],
61 },
62 CodeLanguage {
63 canonical: "TOML",
64 aliases: &["toml"],
65 },
66 CodeLanguage {
67 canonical: "XML",
68 aliases: &["xml"],
69 },
70 CodeLanguage {
71 canonical: "Markdown",
72 aliases: &["md", "markdown", "mkd", "mkdown"],
73 },
74 CodeLanguage {
75 canonical: "Mermaid",
76 aliases: &["mermaid", "mmd"],
77 },
78 CodeLanguage {
79 canonical: "SQL",
80 aliases: &["sql"],
81 },
82 CodeLanguage {
83 canonical: "C",
84 aliases: &["c", "h"],
85 },
86 CodeLanguage {
87 canonical: "C++",
88 aliases: &["cpp", "c++", "cxx", "cc", "hpp", "hh", "h++", "hxx"],
89 },
90 CodeLanguage {
91 canonical: "C#",
92 aliases: &["cs", "c#", "csharp"],
93 },
94 CodeLanguage {
95 canonical: "Java",
96 aliases: &["java"],
97 },
98 CodeLanguage {
99 canonical: "Dart",
100 aliases: &["dart"],
101 },
102 CodeLanguage {
103 canonical: "Go",
104 aliases: &["go", "golang"],
105 },
106 CodeLanguage {
107 canonical: "Scala",
108 aliases: &["scala"],
109 },
110 CodeLanguage {
111 canonical: "Groovy",
112 aliases: &["groovy", "gradle"],
113 },
114 CodeLanguage {
115 canonical: "Clojure",
116 aliases: &["clojure", "clj", "edn"],
117 },
118 CodeLanguage {
119 canonical: "Ruby",
120 aliases: &["rb", "ruby"],
121 },
122 CodeLanguage {
123 canonical: "Perl",
124 aliases: &["perl", "pl", "pm"],
125 },
126 CodeLanguage {
127 canonical: "PHP",
128 aliases: &["php"],
129 },
130 CodeLanguage {
131 canonical: "Haskell",
132 aliases: &["haskell", "hs"],
133 },
134 CodeLanguage {
135 canonical: "Elixir",
136 aliases: &["elixir", "ex", "exs"],
137 },
138 CodeLanguage {
139 canonical: "Erlang",
140 aliases: &["erlang", "erl"],
141 },
142 CodeLanguage {
143 canonical: "F#",
144 aliases: &["fsharp", "fs", "fsx", "fsi", "fsscript"],
145 },
146 CodeLanguage {
147 canonical: "Kotlin",
148 aliases: &["kotlin", "kt"],
149 },
150 CodeLanguage {
151 canonical: "Swift",
152 aliases: &["swift"],
153 },
154 CodeLanguage {
155 canonical: "Objective-C",
156 aliases: &["objectivec", "objc", "obj-c", "mm"],
157 },
158 CodeLanguage {
159 canonical: "OCaml",
160 aliases: &["ocaml", "ml"],
161 },
162 CodeLanguage {
163 canonical: "Lua",
164 aliases: &["lua"],
165 },
166 CodeLanguage {
167 canonical: "Nim",
168 aliases: &["nim", "nimrod"],
169 },
170 CodeLanguage {
171 canonical: "Nix",
172 aliases: &["nix"],
173 },
174 CodeLanguage {
175 canonical: "Zig",
176 aliases: &["zig"],
177 },
178 CodeLanguage {
179 canonical: "R",
180 aliases: &["r"],
181 },
182 CodeLanguage {
183 canonical: "Matlab",
184 aliases: &["matlab"],
185 },
186 CodeLanguage {
187 canonical: "Fortran",
188 aliases: &["fortran", "f90", "f95"],
189 },
190 CodeLanguage {
191 canonical: "GraphQL",
192 aliases: &["graphql", "gql"],
193 },
194 CodeLanguage {
195 canonical: "Protocol Buffers",
196 aliases: &["proto", "protobuf"],
197 },
198 CodeLanguage {
199 canonical: "Solidity",
200 aliases: &["solidity", "sol"],
201 },
202 CodeLanguage {
203 canonical: "Terraform (HCL)",
204 aliases: &["terraform", "tf", "hcl"],
205 },
206 CodeLanguage {
207 canonical: "INI",
208 aliases: &["ini"],
209 },
210 CodeLanguage {
211 canonical: "MathML",
212 aliases: &["mathml", "katex"],
213 },
214 CodeLanguage {
215 canonical: "Dockerfile",
216 aliases: &["dockerfile", "docker"],
217 },
218 CodeLanguage {
219 canonical: "Makefile",
220 aliases: &["makefile", "make", "mk", "mak"],
221 },
222 CodeLanguage {
223 canonical: "PowerShell",
224 aliases: &["powershell", "pwsh", "ps", "ps1"],
225 },
226 CodeLanguage {
227 canonical: "Vim Script",
228 aliases: &["vim", "vimscript"],
229 },
230 CodeLanguage {
231 canonical: "Assembly",
232 aliases: &["asm", "nasm", "x86asm"],
233 },
234 CodeLanguage {
235 canonical: "Plaintext",
236 aliases: &["plaintext", "text", "txt"],
237 },
238 CodeLanguage {
239 canonical: "Diff",
240 aliases: &["diff", "patch"],
241 },
242];
243
244pub fn canonical_language_name(raw: &str) -> Option<&'static str> {
246 let raw = raw.trim();
247 if raw.is_empty() {
248 return None;
249 }
250
251 for lang in KNOWN_CODE_LANGUAGES {
253 if raw.eq_ignore_ascii_case(lang.canonical) {
254 return Some(lang.canonical);
255 }
256 }
257
258 let lower = raw.to_ascii_lowercase();
259 for lang in KNOWN_CODE_LANGUAGES {
260 if lang.aliases.iter().any(|a| *a == lower) {
261 return Some(lang.canonical);
262 }
263 }
264
265 None
266}
267
268pub fn language_display_label<'a>(raw: &'a str) -> Option<Cow<'a, str>> {
273 let raw_trimmed = raw.trim();
274 if raw_trimmed.is_empty() {
275 return None;
276 }
277
278 if let Some(canonical) = canonical_language_name(raw_trimmed) {
279 return Some(Cow::Borrowed(canonical));
280 }
281
282 Some(Cow::Borrowed(raw_trimmed))
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn smoke_test_canonical_language_name_aliases() {
291 assert_eq!(canonical_language_name("rs"), Some("Rust"));
292 assert_eq!(canonical_language_name("Rust"), Some("Rust"));
293 assert_eq!(canonical_language_name("JS"), Some("JavaScript"));
294 assert_eq!(canonical_language_name("c++"), Some("C++"));
295 assert_eq!(canonical_language_name("tsx"), Some("TypeScript"));
296 assert_eq!(canonical_language_name("gql"), Some("GraphQL"));
297 assert_eq!(canonical_language_name("proto"), Some("Protocol Buffers"));
298 assert_eq!(canonical_language_name("tf"), Some("Terraform (HCL)"));
299 assert_eq!(canonical_language_name("nimrod"), Some("Nim"));
300 assert_eq!(canonical_language_name("ps"), Some("PowerShell"));
301 assert_eq!(canonical_language_name("mmd"), Some("Mermaid"));
302 assert_eq!(canonical_language_name("katex"), Some("MathML"));
303 }
304
305 #[test]
306 fn smoke_test_language_display_label_unknown_falls_back() {
307 assert_eq!(language_display_label(" mylang ").unwrap(), "mylang");
308 }
309}