Skip to main content

harn_hostlib/ast/
language.rs

1//! Tree-sitter language registry.
2//!
3//! Mirrors the Swift `TreeSitterLanguage` enum in
4//! `~/projects/burin-code/Sources/ASTEngine/TreeSitterIntegration.swift`
5//! verbatim. The set of languages, their canonical names, and their file
6//! extensions all match Swift exactly so the bridged outputs round-trip
7//! across the harn ↔ burin-code boundary without translation. Adding or
8//! dropping a language requires a coordinated change in both repos.
9
10use tree_sitter::Language as TsLanguage;
11
12/// Languages with tree-sitter symbol extraction support.
13///
14/// The string returned by [`Language::name`] is the canonical wire name;
15/// callers (and the JSON schemas) refer to languages by that string.
16#[allow(missing_docs)]
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
18pub enum Language {
19    TypeScript,
20    Tsx,
21    JavaScript,
22    Jsx,
23    Python,
24    Go,
25    Rust,
26    Java,
27    C,
28    Cpp,
29    CSharp,
30    Ruby,
31    Kotlin,
32    Php,
33    Scala,
34    Bash,
35    Swift,
36    Zig,
37    Elixir,
38    Lua,
39    Haskell,
40    R,
41}
42
43impl Language {
44    /// Canonical wire name.
45    pub fn name(self) -> &'static str {
46        match self {
47            Language::TypeScript => "typescript",
48            Language::Tsx => "tsx",
49            Language::JavaScript => "javascript",
50            Language::Jsx => "jsx",
51            Language::Python => "python",
52            Language::Go => "go",
53            Language::Rust => "rust",
54            Language::Java => "java",
55            Language::C => "c",
56            Language::Cpp => "cpp",
57            Language::CSharp => "csharp",
58            Language::Ruby => "ruby",
59            Language::Kotlin => "kotlin",
60            Language::Php => "php",
61            Language::Scala => "scala",
62            Language::Bash => "bash",
63            Language::Swift => "swift",
64            Language::Zig => "zig",
65            Language::Elixir => "elixir",
66            Language::Lua => "lua",
67            Language::Haskell => "haskell",
68            Language::R => "r",
69        }
70    }
71
72    /// Tree-sitter grammar handle. Cheap; the underlying `LANGUAGE`
73    /// constants are static.
74    pub fn ts_language(self) -> TsLanguage {
75        match self {
76            Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
77            Language::Tsx => tree_sitter_typescript::LANGUAGE_TSX.into(),
78            Language::JavaScript | Language::Jsx => tree_sitter_javascript::LANGUAGE.into(),
79            Language::Python => tree_sitter_python::LANGUAGE.into(),
80            Language::Go => tree_sitter_go::LANGUAGE.into(),
81            Language::Rust => tree_sitter_rust::LANGUAGE.into(),
82            Language::Java => tree_sitter_java::LANGUAGE.into(),
83            Language::C => tree_sitter_c::LANGUAGE.into(),
84            Language::Cpp => tree_sitter_cpp::LANGUAGE.into(),
85            Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
86            Language::Ruby => tree_sitter_ruby::LANGUAGE.into(),
87            Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
88            Language::Php => tree_sitter_php::LANGUAGE_PHP.into(),
89            Language::Scala => tree_sitter_scala::LANGUAGE.into(),
90            Language::Bash => tree_sitter_bash::LANGUAGE.into(),
91            Language::Swift => tree_sitter_swift::LANGUAGE.into(),
92            Language::Zig => tree_sitter_zig::LANGUAGE.into(),
93            Language::Elixir => tree_sitter_elixir::LANGUAGE.into(),
94            Language::Lua => tree_sitter_lua::LANGUAGE.into(),
95            Language::Haskell => tree_sitter_haskell::LANGUAGE.into(),
96            Language::R => tree_sitter_r::LANGUAGE.into(),
97        }
98    }
99
100    /// Resolve a language from its canonical wire name. Accepts a few
101    /// historical aliases (`ts`, `js`, `c++`, …) so users don't have to
102    /// memorize the exact spelling.
103    pub fn from_name(name: &str) -> Option<Self> {
104        let normalized = name.trim().to_ascii_lowercase();
105        Some(match normalized.as_str() {
106            "typescript" | "ts" => Language::TypeScript,
107            "tsx" => Language::Tsx,
108            "javascript" | "js" => Language::JavaScript,
109            "jsx" => Language::Jsx,
110            "python" | "py" => Language::Python,
111            "go" | "golang" => Language::Go,
112            "rust" | "rs" => Language::Rust,
113            "java" => Language::Java,
114            "c" => Language::C,
115            "cpp" | "c++" | "cxx" => Language::Cpp,
116            "csharp" | "c#" | "cs" => Language::CSharp,
117            "ruby" | "rb" => Language::Ruby,
118            "kotlin" | "kt" => Language::Kotlin,
119            "php" => Language::Php,
120            "scala" => Language::Scala,
121            "bash" | "shell" | "sh" | "zsh" => Language::Bash,
122            "swift" => Language::Swift,
123            "zig" => Language::Zig,
124            "elixir" | "ex" => Language::Elixir,
125            "lua" => Language::Lua,
126            "haskell" | "hs" => Language::Haskell,
127            "r" => Language::R,
128            _ => return None,
129        })
130    }
131
132    /// Resolve a language from a file extension. The mapping mirrors the
133    /// Swift `extensionMap` in `TreeSitterIntegration.swift`.
134    pub fn from_extension(ext: &str) -> Option<Self> {
135        let normalized = ext.trim_start_matches('.').to_ascii_lowercase();
136        Some(match normalized.as_str() {
137            "ts" => Language::TypeScript,
138            "tsx" => Language::Tsx,
139            "js" | "mjs" | "cjs" => Language::JavaScript,
140            "jsx" => Language::Jsx,
141            "py" => Language::Python,
142            "go" => Language::Go,
143            "rs" => Language::Rust,
144            "java" => Language::Java,
145            "c" | "h" => Language::C,
146            "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => Language::Cpp,
147            "cs" | "csx" => Language::CSharp,
148            "rb" => Language::Ruby,
149            "kt" | "kts" => Language::Kotlin,
150            "php" => Language::Php,
151            "scala" | "sc" => Language::Scala,
152            "sh" | "bash" | "zsh" => Language::Bash,
153            "swift" => Language::Swift,
154            "zig" | "zon" => Language::Zig,
155            "ex" | "exs" => Language::Elixir,
156            "lua" => Language::Lua,
157            "hs" | "lhs" => Language::Haskell,
158            "r" => Language::R,
159            _ => return None,
160        })
161    }
162
163    /// Resolve from a file path: prefer explicit `language_hint` if
164    /// supplied, otherwise fall back to extension-based detection.
165    pub fn detect(path: &std::path::Path, language_hint: Option<&str>) -> Option<Self> {
166        if let Some(name) = language_hint.and_then(|s| (!s.is_empty()).then_some(s)) {
167            return Self::from_name(name);
168        }
169        let ext = path.extension().and_then(|s| s.to_str())?;
170        Self::from_extension(ext)
171    }
172
173    /// Every language we ship support for. Useful for tests + introspection.
174    pub fn all() -> &'static [Language] {
175        &[
176            Language::TypeScript,
177            Language::Tsx,
178            Language::JavaScript,
179            Language::Jsx,
180            Language::Python,
181            Language::Go,
182            Language::Rust,
183            Language::Java,
184            Language::C,
185            Language::Cpp,
186            Language::CSharp,
187            Language::Ruby,
188            Language::Kotlin,
189            Language::Php,
190            Language::Scala,
191            Language::Bash,
192            Language::Swift,
193            Language::Zig,
194            Language::Elixir,
195            Language::Lua,
196            Language::Haskell,
197            Language::R,
198        ]
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn every_language_is_loadable() {
208        for &lang in Language::all() {
209            // Constructing the tree-sitter Language must not panic and must
210            // produce a non-trivial grammar.
211            let ts = lang.ts_language();
212            assert!(ts.node_kind_count() > 0, "{} grammar is empty", lang.name());
213        }
214    }
215
216    #[test]
217    fn extension_detection_round_trips_canonical_extensions() {
218        let cases: &[(&str, Language)] = &[
219            ("ts", Language::TypeScript),
220            ("tsx", Language::Tsx),
221            ("js", Language::JavaScript),
222            ("jsx", Language::Jsx),
223            ("py", Language::Python),
224            ("rs", Language::Rust),
225            ("go", Language::Go),
226            ("java", Language::Java),
227            ("c", Language::C),
228            ("cpp", Language::Cpp),
229            ("cs", Language::CSharp),
230            ("rb", Language::Ruby),
231            ("kt", Language::Kotlin),
232            ("php", Language::Php),
233            ("scala", Language::Scala),
234            ("sh", Language::Bash),
235            ("swift", Language::Swift),
236            ("zig", Language::Zig),
237            ("ex", Language::Elixir),
238            ("lua", Language::Lua),
239            ("hs", Language::Haskell),
240            ("r", Language::R),
241        ];
242        for (ext, want) in cases {
243            assert_eq!(Language::from_extension(ext), Some(*want), "ext {ext}");
244        }
245    }
246
247    #[test]
248    fn name_round_trips_for_every_language() {
249        for &lang in Language::all() {
250            assert_eq!(Language::from_name(lang.name()), Some(lang));
251        }
252    }
253
254    #[test]
255    fn detect_prefers_hint_over_extension() {
256        let path = std::path::Path::new("foo.ts");
257        assert_eq!(Language::detect(path, None), Some(Language::TypeScript));
258        assert_eq!(
259            Language::detect(path, Some("javascript")),
260            Some(Language::JavaScript)
261        );
262    }
263}