run/
language.rs

1use std::collections::HashMap;
2
3use once_cell::sync::Lazy;
4
5/// Canonical identifier for a supported language along with user-provided token.
6#[derive(Debug, Clone, PartialEq, Eq, Hash)]
7pub struct LanguageSpec {
8    original: String,
9    canonical: String,
10}
11
12impl LanguageSpec {
13    pub fn new(token: impl Into<String>) -> Self {
14        let raw = token.into();
15        let canonical = canonical_language_id(&raw);
16        Self {
17            original: raw,
18            canonical,
19        }
20    }
21
22    pub fn canonical_id(&self) -> &str {
23        &self.canonical
24    }
25
26    pub fn original(&self) -> &str {
27        &self.original
28    }
29}
30
31impl std::fmt::Display for LanguageSpec {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        if self.original.eq_ignore_ascii_case(&self.canonical) {
34            write!(f, "{}", self.canonical)
35        } else {
36            write!(f, "{} ({})", self.canonical, self.original)
37        }
38    }
39}
40
41static ALIASES: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
42    let pairs: &[(&str, &str)] = &[
43        ("python", "python"),
44        ("py", "python"),
45        ("py3", "python"),
46        ("python3", "python"),
47        ("rust", "rust"),
48        ("rs", "rust"),
49        ("go", "go"),
50        ("golang", "go"),
51        ("csharp", "csharp"),
52        ("cs", "csharp"),
53        ("c#", "csharp"),
54        ("dotnet", "csharp"),
55        ("dotnetcore", "csharp"),
56        ("typescript", "typescript"),
57        ("ts", "typescript"),
58        ("ts-node", "typescript"),
59        ("javascript", "javascript"),
60        ("js", "javascript"),
61        ("node", "javascript"),
62        ("nodejs", "javascript"),
63        ("ecmascript", "javascript"),
64        ("groovy", "groovy"),
65        ("grv", "groovy"),
66        ("groovysh", "groovy"),
67        ("deno", "typescript"),
68        ("denojs", "typescript"),
69        ("ruby", "ruby"),
70        ("rb", "ruby"),
71        ("irb", "ruby"),
72        ("lua", "lua"),
73        ("luajit", "lua"),
74        ("bash", "bash"),
75        ("sh", "bash"),
76        ("shell", "bash"),
77        ("zsh", "bash"),
78        ("java", "java"),
79        ("c", "c"),
80        ("cpp", "cpp"),
81        ("c++", "cpp"),
82        ("php", "php"),
83        ("php-cli", "php"),
84        ("kotlin", "kotlin"),
85        ("kt", "kotlin"),
86        ("kts", "kotlin"),
87        ("r", "r"),
88        ("rscript", "r"),
89        ("cran", "r"),
90        ("dart", "dart"),
91        ("dartlang", "dart"),
92        ("flutter", "dart"),
93        ("swift", "swift"),
94        ("swiftlang", "swift"),
95        ("perl", "perl"),
96        ("pl", "perl"),
97        ("julia", "julia"),
98        ("jl", "julia"),
99        ("haskell", "haskell"),
100        ("hs", "haskell"),
101        ("ghci", "haskell"),
102        ("elixir", "elixir"),
103        ("ex", "elixir"),
104        ("exs", "elixir"),
105        ("iex", "elixir"),
106        ("zig", "zig"),
107        ("ziglang", "zig"),
108        ("crystal", "crystal"),
109        ("cr", "crystal"),
110        ("crystal-lang", "crystal"),
111        ("nim", "nim"),
112        ("nimlang", "nim"),
113    ];
114    pairs.iter().cloned().collect()
115});
116
117pub fn canonical_language_id(token: &str) -> String {
118    language_alias_lookup(token)
119        .unwrap_or_else(|| token.trim())
120        .to_ascii_lowercase()
121}
122
123pub fn language_alias_lookup(token: &str) -> Option<&'static str> {
124    let normalized = token.trim().to_ascii_lowercase();
125    ALIASES.get(normalized.as_str()).copied()
126}
127
128pub fn is_language_token(token: &str) -> bool {
129    language_alias_lookup(token).is_some()
130}
131
132/// All canonical languages registered in the alias table.
133pub fn known_canonical_languages() -> Vec<&'static str> {
134    let mut unique: Vec<_> = ALIASES.values().copied().collect();
135    unique.sort_unstable();
136    unique.dedup();
137    unique
138}