1use std::borrow::Cow;
11
12#[derive(Debug, Clone, Copy)]
13pub struct CodeLanguage {
15 pub canonical: &'static str,
17 pub aliases: &'static [&'static str],
19}
20
21pub const KNOWN_CODE_LANGUAGES: &[CodeLanguage] = &[
27 CodeLanguage {
28 canonical: "Rust",
29 aliases: &["rs", "rust"],
30 },
31 CodeLanguage {
32 canonical: "JavaScript",
33 aliases: &["js", "javascript", "jsx", "mjs", "cjs", "node"],
34 },
35 CodeLanguage {
36 canonical: "TypeScript",
37 aliases: &["ts", "tsx", "mts", "cts", "typescript"],
38 },
39 CodeLanguage {
40 canonical: "Python",
41 aliases: &["py", "python", "python3", "pycon"],
42 },
43 CodeLanguage {
44 canonical: "Bash",
45 aliases: &["sh", "bash", "zsh", "shell"],
46 },
47 CodeLanguage {
48 canonical: "HTML",
49 aliases: &["html", "htm"],
50 },
51 CodeLanguage {
52 canonical: "CSS",
53 aliases: &["css"],
54 },
55 CodeLanguage {
56 canonical: "JSON",
57 aliases: &["json", "jsonc", "json5"],
58 },
59 CodeLanguage {
60 canonical: "YAML",
61 aliases: &["yaml", "yml"],
62 },
63 CodeLanguage {
64 canonical: "TOML",
65 aliases: &["toml"],
66 },
67 CodeLanguage {
68 canonical: "XML",
69 aliases: &["xml"],
70 },
71 CodeLanguage {
72 canonical: "Markdown",
73 aliases: &["md", "markdown", "mkd", "mkdown"],
74 },
75 CodeLanguage {
76 canonical: "Mermaid",
77 aliases: &["mermaid", "mmd"],
78 },
79 CodeLanguage {
80 canonical: "SQL",
81 aliases: &["sql"],
82 },
83 CodeLanguage {
84 canonical: "C",
85 aliases: &["c", "h"],
86 },
87 CodeLanguage {
88 canonical: "C++",
89 aliases: &["cpp", "c++", "cxx", "cc", "hpp", "hh", "h++", "hxx"],
90 },
91 CodeLanguage {
92 canonical: "C#",
93 aliases: &["cs", "c#", "csharp"],
94 },
95 CodeLanguage {
96 canonical: "Java",
97 aliases: &["java"],
98 },
99 CodeLanguage {
100 canonical: "Dart",
101 aliases: &["dart"],
102 },
103 CodeLanguage {
104 canonical: "Go",
105 aliases: &["go", "golang"],
106 },
107 CodeLanguage {
108 canonical: "Scala",
109 aliases: &["scala"],
110 },
111 CodeLanguage {
112 canonical: "Groovy",
113 aliases: &["groovy", "gradle"],
114 },
115 CodeLanguage {
116 canonical: "Clojure",
117 aliases: &["clojure", "clj", "edn"],
118 },
119 CodeLanguage {
120 canonical: "Ruby",
121 aliases: &["rb", "ruby"],
122 },
123 CodeLanguage {
124 canonical: "Perl",
125 aliases: &["perl", "pl", "pm"],
126 },
127 CodeLanguage {
128 canonical: "PHP",
129 aliases: &["php"],
130 },
131 CodeLanguage {
132 canonical: "Haskell",
133 aliases: &["haskell", "hs"],
134 },
135 CodeLanguage {
136 canonical: "Elixir",
137 aliases: &["elixir", "ex", "exs"],
138 },
139 CodeLanguage {
140 canonical: "Erlang",
141 aliases: &["erlang", "erl"],
142 },
143 CodeLanguage {
144 canonical: "F#",
145 aliases: &["fsharp", "fs", "fsx", "fsi", "fsscript"],
146 },
147 CodeLanguage {
148 canonical: "Kotlin",
149 aliases: &["kotlin", "kt"],
150 },
151 CodeLanguage {
152 canonical: "Swift",
153 aliases: &["swift"],
154 },
155 CodeLanguage {
156 canonical: "Objective-C",
157 aliases: &["objectivec", "objc", "obj-c", "mm"],
158 },
159 CodeLanguage {
160 canonical: "OCaml",
161 aliases: &["ocaml", "ml"],
162 },
163 CodeLanguage {
164 canonical: "Lua",
165 aliases: &["lua"],
166 },
167 CodeLanguage {
168 canonical: "Nim",
169 aliases: &["nim", "nimrod"],
170 },
171 CodeLanguage {
172 canonical: "Nix",
173 aliases: &["nix"],
174 },
175 CodeLanguage {
176 canonical: "Zig",
177 aliases: &["zig"],
178 },
179 CodeLanguage {
180 canonical: "R",
181 aliases: &["r"],
182 },
183 CodeLanguage {
184 canonical: "Matlab",
185 aliases: &["matlab"],
186 },
187 CodeLanguage {
188 canonical: "Fortran",
189 aliases: &["fortran", "f90", "f95"],
190 },
191 CodeLanguage {
192 canonical: "GraphQL",
193 aliases: &["graphql", "gql"],
194 },
195 CodeLanguage {
196 canonical: "Protocol Buffers",
197 aliases: &["proto", "protobuf"],
198 },
199 CodeLanguage {
200 canonical: "Solidity",
201 aliases: &["solidity", "sol"],
202 },
203 CodeLanguage {
204 canonical: "Terraform (HCL)",
205 aliases: &["terraform", "tf", "hcl"],
206 },
207 CodeLanguage {
208 canonical: "INI",
209 aliases: &["ini"],
210 },
211 CodeLanguage {
212 canonical: "MathML",
213 aliases: &["mathml", "katex"],
214 },
215 CodeLanguage {
216 canonical: "Dockerfile",
217 aliases: &["dockerfile", "docker"],
218 },
219 CodeLanguage {
220 canonical: "Makefile",
221 aliases: &["makefile", "make", "mk", "mak"],
222 },
223 CodeLanguage {
224 canonical: "PowerShell",
225 aliases: &["powershell", "pwsh", "ps", "ps1"],
226 },
227 CodeLanguage {
228 canonical: "Vim Script",
229 aliases: &["vim", "vimscript"],
230 },
231 CodeLanguage {
232 canonical: "Assembly",
233 aliases: &["asm", "nasm", "x86asm"],
234 },
235 CodeLanguage {
236 canonical: "Plaintext",
237 aliases: &["plaintext", "text", "txt"],
238 },
239 CodeLanguage {
240 canonical: "Diff",
241 aliases: &["diff", "patch"],
242 },
243];
244
245pub fn canonical_language_name(raw: &str) -> Option<&'static str> {
247 let raw = raw.trim();
248 if raw.is_empty() {
249 return None;
250 }
251
252 for lang in KNOWN_CODE_LANGUAGES {
254 if raw.eq_ignore_ascii_case(lang.canonical) {
255 return Some(lang.canonical);
256 }
257 }
258
259 let lower = raw.to_ascii_lowercase();
260 for lang in KNOWN_CODE_LANGUAGES {
261 if lang.aliases.iter().any(|a| *a == lower) {
262 return Some(lang.canonical);
263 }
264 }
265
266 None
267}
268
269pub fn language_display_label<'a>(raw: &'a str) -> Option<Cow<'a, str>> {
274 let raw_trimmed = raw.trim();
275 if raw_trimmed.is_empty() {
276 return None;
277 }
278
279 if let Some(canonical) = canonical_language_name(raw_trimmed) {
280 return Some(Cow::Borrowed(canonical));
281 }
282
283 Some(Cow::Borrowed(raw_trimmed))
284}
285
286#[cfg(test)]
287mod tests {
288 use super::*;
289
290 #[test]
291 fn smoke_test_canonical_language_name_aliases() {
292 assert_eq!(canonical_language_name("rs"), Some("Rust"));
293 assert_eq!(canonical_language_name("Rust"), Some("Rust"));
294 assert_eq!(canonical_language_name("JS"), Some("JavaScript"));
295 assert_eq!(canonical_language_name("c++"), Some("C++"));
296 assert_eq!(canonical_language_name("tsx"), Some("TypeScript"));
297 assert_eq!(canonical_language_name("gql"), Some("GraphQL"));
298 assert_eq!(canonical_language_name("proto"), Some("Protocol Buffers"));
299 assert_eq!(canonical_language_name("tf"), Some("Terraform (HCL)"));
300 assert_eq!(canonical_language_name("nimrod"), Some("Nim"));
301 assert_eq!(canonical_language_name("ps"), Some("PowerShell"));
302 assert_eq!(canonical_language_name("mmd"), Some("Mermaid"));
303 assert_eq!(canonical_language_name("katex"), Some("MathML"));
304 }
305
306 #[test]
307 fn smoke_test_language_display_label_unknown_falls_back() {
308 assert_eq!(language_display_label(" mylang ").unwrap(), "mylang");
309 }
310}