1use super::core::ParserError;
7use super::queries;
8use tree_sitter::{Language as TSLanguage, Parser as TSParser, Query};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub enum Language {
13 Python,
14 JavaScript,
15 TypeScript,
16 Rust,
17 Go,
18 Java,
19 C,
20 Cpp,
21 CSharp,
22 Ruby,
23 Bash,
24 Php,
25 Kotlin,
26 Swift,
27 Scala,
28 Haskell,
29 Elixir,
30 Clojure,
31 OCaml,
32 FSharp,
33 Lua,
34 R,
35}
36
37impl Language {
38 #[must_use]
40 pub fn from_extension(ext: &str) -> Option<Self> {
41 match ext.to_lowercase().as_str() {
42 "py" | "pyw" => Some(Self::Python),
43 "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript),
44 "ts" | "tsx" => Some(Self::TypeScript),
45 "rs" => Some(Self::Rust),
46 "go" => Some(Self::Go),
47 "java" => Some(Self::Java),
48 "c" | "h" => Some(Self::C),
49 "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => Some(Self::Cpp),
50 "cs" => Some(Self::CSharp),
51 "rb" | "rake" | "gemspec" => Some(Self::Ruby),
52 "sh" | "bash" | "zsh" | "fish" => Some(Self::Bash),
53 "php" | "phtml" | "php3" | "php4" | "php5" | "phps" => Some(Self::Php),
54 "kt" | "kts" => Some(Self::Kotlin),
55 "swift" => Some(Self::Swift),
56 "scala" | "sc" => Some(Self::Scala),
57 "hs" | "lhs" => Some(Self::Haskell),
58 "ex" | "exs" | "eex" | "heex" | "leex" => Some(Self::Elixir),
59 "clj" | "cljs" | "cljc" | "edn" => Some(Self::Clojure),
60 "ml" | "mli" => Some(Self::OCaml),
61 "fs" | "fsi" | "fsx" | "fsscript" => Some(Self::FSharp),
62 "lua" => Some(Self::Lua),
63 "r" | "rmd" => Some(Self::R),
64 _ => None,
65 }
66 }
67
68 #[must_use]
70 pub const fn name(self) -> &'static str {
71 match self {
72 Self::Python => "python",
73 Self::JavaScript => "javascript",
74 Self::TypeScript => "typescript",
75 Self::Rust => "rust",
76 Self::Go => "go",
77 Self::Java => "java",
78 Self::C => "c",
79 Self::Cpp => "cpp",
80 Self::CSharp => "csharp",
81 Self::Ruby => "ruby",
82 Self::Bash => "bash",
83 Self::Php => "php",
84 Self::Kotlin => "kotlin",
85 Self::Swift => "swift",
86 Self::Scala => "scala",
87 Self::Haskell => "haskell",
88 Self::Elixir => "elixir",
89 Self::Clojure => "clojure",
90 Self::OCaml => "ocaml",
91 Self::FSharp => "fsharp",
92 Self::Lua => "lua",
93 Self::R => "r",
94 }
95 }
96
97 #[must_use]
99 pub const fn display_name(self) -> &'static str {
100 match self {
101 Self::Python => "Python",
102 Self::JavaScript => "JavaScript",
103 Self::TypeScript => "TypeScript",
104 Self::Rust => "Rust",
105 Self::Go => "Go",
106 Self::Java => "Java",
107 Self::C => "C",
108 Self::Cpp => "C++",
109 Self::CSharp => "C#",
110 Self::Ruby => "Ruby",
111 Self::Bash => "Bash",
112 Self::Php => "PHP",
113 Self::Kotlin => "Kotlin",
114 Self::Swift => "Swift",
115 Self::Scala => "Scala",
116 Self::Haskell => "Haskell",
117 Self::Elixir => "Elixir",
118 Self::Clojure => "Clojure",
119 Self::OCaml => "OCaml",
120 Self::FSharp => "F#",
121 Self::Lua => "Lua",
122 Self::R => "R",
123 }
124 }
125
126 #[must_use]
128 pub const fn has_parser_support(self) -> bool {
129 !matches!(self, Self::FSharp)
130 }
131
132 pub fn tree_sitter_language(self) -> Option<TSLanguage> {
134 Some(match self {
135 Self::Python => tree_sitter_python::LANGUAGE.into(),
136 Self::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
137 Self::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
138 Self::Rust => tree_sitter_rust::LANGUAGE.into(),
139 Self::Go => tree_sitter_go::LANGUAGE.into(),
140 Self::Java => tree_sitter_java::LANGUAGE.into(),
141 Self::C => tree_sitter_c::LANGUAGE.into(),
142 Self::Cpp => tree_sitter_cpp::LANGUAGE.into(),
143 Self::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
144 Self::Ruby => tree_sitter_ruby::LANGUAGE.into(),
145 Self::Bash => tree_sitter_bash::LANGUAGE.into(),
146 Self::Php => tree_sitter_php::LANGUAGE_PHP.into(),
147 Self::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
148 Self::Swift => tree_sitter_swift::LANGUAGE.into(),
149 Self::Scala => tree_sitter_scala::LANGUAGE.into(),
150 Self::Haskell => tree_sitter_haskell::LANGUAGE.into(),
151 Self::Elixir => tree_sitter_elixir::LANGUAGE.into(),
152 Self::Clojure => tree_sitter_clojure::LANGUAGE.into(),
153 Self::OCaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
154 Self::Lua => tree_sitter_lua::LANGUAGE.into(),
155 Self::R => tree_sitter_r::LANGUAGE.into(),
156 Self::FSharp => return None,
157 })
158 }
159
160 #[must_use]
162 pub const fn query_string(self) -> Option<&'static str> {
163 Some(match self {
164 Self::Python => queries::PYTHON,
165 Self::JavaScript => queries::JAVASCRIPT,
166 Self::TypeScript => queries::TYPESCRIPT,
167 Self::Rust => queries::RUST,
168 Self::Go => queries::GO,
169 Self::Java => queries::JAVA,
170 Self::C => queries::C,
171 Self::Cpp => queries::CPP,
172 Self::CSharp => queries::CSHARP,
173 Self::Ruby => queries::RUBY,
174 Self::Bash => queries::BASH,
175 Self::Php => queries::PHP,
176 Self::Kotlin => queries::KOTLIN,
177 Self::Swift => queries::SWIFT,
178 Self::Scala => queries::SCALA,
179 Self::Haskell => queries::HASKELL,
180 Self::Elixir => queries::ELIXIR,
181 Self::Clojure => queries::CLOJURE,
182 Self::OCaml => queries::OCAML,
183 Self::Lua => queries::LUA,
184 Self::R => queries::R,
185 Self::FSharp => return None,
186 })
187 }
188
189 pub fn init_parser(self) -> Result<TSParser, ParserError> {
191 let ts_lang = self.tree_sitter_language().ok_or_else(|| {
192 ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
193 })?;
194
195 let mut parser = TSParser::new();
196 parser
197 .set_language(&ts_lang)
198 .map_err(|e| ParserError::ParseError(e.to_string()))?;
199 Ok(parser)
200 }
201
202 pub fn create_query(self) -> Result<Query, ParserError> {
204 let ts_lang = self.tree_sitter_language().ok_or_else(|| {
205 ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
206 })?;
207
208 let query_str = self.query_string().ok_or_else(|| {
209 ParserError::UnsupportedLanguage(format!("{} has no query defined", self.name()))
210 })?;
211
212 Query::new(&ts_lang, query_str).map_err(|e| ParserError::QueryError(e.to_string()))
213 }
214
215 #[must_use]
217 pub const fn all() -> &'static [Self] {
218 &[
219 Self::Python,
220 Self::JavaScript,
221 Self::TypeScript,
222 Self::Rust,
223 Self::Go,
224 Self::Java,
225 Self::C,
226 Self::Cpp,
227 Self::CSharp,
228 Self::Ruby,
229 Self::Bash,
230 Self::Php,
231 Self::Kotlin,
232 Self::Swift,
233 Self::Scala,
234 Self::Haskell,
235 Self::Elixir,
236 Self::Clojure,
237 Self::OCaml,
238 Self::FSharp,
239 Self::Lua,
240 Self::R,
241 ]
242 }
243
244 #[must_use]
246 pub fn all_with_parser_support() -> Vec<Self> {
247 Self::all()
248 .iter()
249 .copied()
250 .filter(|l| l.has_parser_support())
251 .collect()
252 }
253
254 #[must_use]
256 pub const fn uses_indentation_blocks(self) -> bool {
257 matches!(self, Self::Python | Self::Haskell)
258 }
259
260 #[must_use]
262 pub const fn is_c_family(self) -> bool {
263 matches!(
264 self,
265 Self::C
266 | Self::Cpp
267 | Self::CSharp
268 | Self::Java
269 | Self::JavaScript
270 | Self::TypeScript
271 | Self::Go
272 | Self::Rust
273 | Self::Kotlin
274 | Self::Swift
275 | Self::Scala
276 | Self::Php
277 )
278 }
279
280 #[must_use]
282 pub const fn is_functional(self) -> bool {
283 matches!(self, Self::Haskell | Self::OCaml | Self::Elixir | Self::Clojure | Self::Scala)
284 }
285}
286
287impl std::fmt::Display for Language {
288 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
289 write!(f, "{}", self.display_name())
290 }
291}
292
293impl std::str::FromStr for Language {
294 type Err = ParserError;
295
296 fn from_str(s: &str) -> Result<Self, Self::Err> {
297 match s.to_lowercase().as_str() {
298 "python" | "py" => Ok(Self::Python),
299 "javascript" | "js" => Ok(Self::JavaScript),
300 "typescript" | "ts" => Ok(Self::TypeScript),
301 "rust" | "rs" => Ok(Self::Rust),
302 "go" | "golang" => Ok(Self::Go),
303 "java" => Ok(Self::Java),
304 "c" => Ok(Self::C),
305 "cpp" | "c++" | "cxx" => Ok(Self::Cpp),
306 "csharp" | "c#" | "cs" => Ok(Self::CSharp),
307 "ruby" | "rb" => Ok(Self::Ruby),
308 "bash" | "shell" | "sh" => Ok(Self::Bash),
309 "php" => Ok(Self::Php),
310 "kotlin" | "kt" => Ok(Self::Kotlin),
311 "swift" => Ok(Self::Swift),
312 "scala" => Ok(Self::Scala),
313 "haskell" | "hs" => Ok(Self::Haskell),
314 "elixir" | "ex" => Ok(Self::Elixir),
315 "clojure" | "clj" => Ok(Self::Clojure),
316 "ocaml" | "ml" => Ok(Self::OCaml),
317 "fsharp" | "f#" | "fs" => Ok(Self::FSharp),
318 "lua" => Ok(Self::Lua),
319 "r" => Ok(Self::R),
320 _ => Err(ParserError::UnsupportedLanguage(s.to_owned())),
321 }
322 }
323}
324
325#[cfg(test)]
326mod tests {
327 use super::*;
328
329 #[test]
330 fn test_language_from_extension() {
331 assert_eq!(Language::from_extension("py"), Some(Language::Python));
332 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
333 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
334 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
335 assert_eq!(Language::from_extension("unknown"), None);
336 }
337
338 #[test]
339 fn test_language_name() {
340 assert_eq!(Language::Python.name(), "python");
341 assert_eq!(Language::Rust.name(), "rust");
342 assert_eq!(Language::TypeScript.name(), "typescript");
343 }
344
345 #[test]
346 fn test_language_display_name() {
347 assert_eq!(Language::Python.display_name(), "Python");
348 assert_eq!(Language::Cpp.display_name(), "C++");
349 assert_eq!(Language::CSharp.display_name(), "C#");
350 }
351
352 #[test]
353 fn test_parser_support() {
354 assert!(Language::Python.has_parser_support());
355 assert!(Language::Rust.has_parser_support());
356 assert!(!Language::FSharp.has_parser_support());
357 }
358
359 #[test]
360 fn test_language_from_str() {
361 assert_eq!("python".parse::<Language>().unwrap(), Language::Python);
362 assert_eq!("c++".parse::<Language>().unwrap(), Language::Cpp);
363 assert_eq!("c#".parse::<Language>().unwrap(), Language::CSharp);
364 assert!("invalid".parse::<Language>().is_err());
365 }
366
367 #[test]
368 fn test_all_languages() {
369 let all = Language::all();
370 assert_eq!(all.len(), 22);
371 assert!(all.contains(&Language::Python));
372 assert!(all.contains(&Language::Rust));
373 }
374
375 #[test]
376 fn test_tree_sitter_language() {
377 assert!(Language::Python.tree_sitter_language().is_some());
378 assert!(Language::Rust.tree_sitter_language().is_some());
379 assert!(Language::FSharp.tree_sitter_language().is_none());
380 }
381
382 #[test]
383 fn test_query_string() {
384 assert!(Language::Python.query_string().is_some());
385 assert!(Language::Rust.query_string().is_some());
386 assert!(Language::FSharp.query_string().is_none());
387 }
388
389 #[test]
390 fn test_init_parser() {
391 assert!(Language::Python.init_parser().is_ok());
392 assert!(Language::Rust.init_parser().is_ok());
393 assert!(Language::FSharp.init_parser().is_err());
394 }
395
396 #[test]
397 fn test_create_query() {
398 assert!(Language::Python.create_query().is_ok());
399 assert!(Language::Rust.create_query().is_ok());
400 assert!(Language::FSharp.create_query().is_err());
401 }
402
403 #[test]
404 fn test_language_categories() {
405 assert!(Language::Python.uses_indentation_blocks());
406 assert!(!Language::Rust.uses_indentation_blocks());
407
408 assert!(Language::Rust.is_c_family());
409 assert!(!Language::Python.is_c_family());
410
411 assert!(Language::Haskell.is_functional());
412 assert!(!Language::Python.is_functional());
413 }
414}