1use super::core::ParserError;
7use super::queries;
8use tree_sitter::{Language as TSLanguage, Parser as TSParser, Query};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub enum Language {
13 Python,
14 JavaScript,
15 TypeScript,
16 Rust,
17 Go,
18 Java,
19 C,
20 Cpp,
21 CSharp,
22 Ruby,
23 Bash,
24 Php,
25 Kotlin,
26 Swift,
27 Scala,
28 Haskell,
29 Elixir,
30 Clojure,
31 OCaml,
32 FSharp,
33 Lua,
34 R,
35}
36
37impl Language {
38 #[must_use]
40 pub fn from_extension(ext: &str) -> Option<Self> {
41 match ext.to_lowercase().as_str() {
42 "py" | "pyw" => Some(Self::Python),
43 "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript),
44 "ts" | "tsx" => Some(Self::TypeScript),
45 "rs" => Some(Self::Rust),
46 "go" => Some(Self::Go),
47 "java" => Some(Self::Java),
48 "c" | "h" => Some(Self::C),
49 "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => Some(Self::Cpp),
50 "cs" => Some(Self::CSharp),
51 "rb" | "rake" | "gemspec" => Some(Self::Ruby),
52 "sh" | "bash" | "zsh" | "fish" => Some(Self::Bash),
53 "php" | "phtml" | "php3" | "php4" | "php5" | "phps" => Some(Self::Php),
54 "kt" | "kts" => Some(Self::Kotlin),
55 "swift" => Some(Self::Swift),
56 "scala" | "sc" => Some(Self::Scala),
57 "hs" | "lhs" => Some(Self::Haskell),
58 "ex" | "exs" | "eex" | "heex" | "leex" => Some(Self::Elixir),
59 "clj" | "cljs" | "cljc" | "edn" => Some(Self::Clojure),
60 "ml" | "mli" => Some(Self::OCaml),
61 "fs" | "fsi" | "fsx" | "fsscript" => Some(Self::FSharp),
62 "lua" => Some(Self::Lua),
63 "r" | "rmd" => Some(Self::R),
64 _ => None,
65 }
66 }
67
68 #[must_use]
70 pub const fn name(self) -> &'static str {
71 match self {
72 Self::Python => "python",
73 Self::JavaScript => "javascript",
74 Self::TypeScript => "typescript",
75 Self::Rust => "rust",
76 Self::Go => "go",
77 Self::Java => "java",
78 Self::C => "c",
79 Self::Cpp => "cpp",
80 Self::CSharp => "csharp",
81 Self::Ruby => "ruby",
82 Self::Bash => "bash",
83 Self::Php => "php",
84 Self::Kotlin => "kotlin",
85 Self::Swift => "swift",
86 Self::Scala => "scala",
87 Self::Haskell => "haskell",
88 Self::Elixir => "elixir",
89 Self::Clojure => "clojure",
90 Self::OCaml => "ocaml",
91 Self::FSharp => "fsharp",
92 Self::Lua => "lua",
93 Self::R => "r",
94 }
95 }
96
97 #[must_use]
99 pub const fn display_name(self) -> &'static str {
100 match self {
101 Self::Python => "Python",
102 Self::JavaScript => "JavaScript",
103 Self::TypeScript => "TypeScript",
104 Self::Rust => "Rust",
105 Self::Go => "Go",
106 Self::Java => "Java",
107 Self::C => "C",
108 Self::Cpp => "C++",
109 Self::CSharp => "C#",
110 Self::Ruby => "Ruby",
111 Self::Bash => "Bash",
112 Self::Php => "PHP",
113 Self::Kotlin => "Kotlin",
114 Self::Swift => "Swift",
115 Self::Scala => "Scala",
116 Self::Haskell => "Haskell",
117 Self::Elixir => "Elixir",
118 Self::Clojure => "Clojure",
119 Self::OCaml => "OCaml",
120 Self::FSharp => "F#",
121 Self::Lua => "Lua",
122 Self::R => "R",
123 }
124 }
125
126 #[must_use]
128 pub const fn has_parser_support(self) -> bool {
129 !matches!(self, Self::FSharp)
130 }
131
132 pub fn tree_sitter_language(self) -> Option<TSLanguage> {
134 Some(match self {
135 Self::Python => tree_sitter_python::LANGUAGE.into(),
136 Self::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
137 Self::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
138 Self::Rust => tree_sitter_rust::LANGUAGE.into(),
139 Self::Go => tree_sitter_go::LANGUAGE.into(),
140 Self::Java => tree_sitter_java::LANGUAGE.into(),
141 Self::C => tree_sitter_c::LANGUAGE.into(),
142 Self::Cpp => tree_sitter_cpp::LANGUAGE.into(),
143 Self::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
144 Self::Ruby => tree_sitter_ruby::LANGUAGE.into(),
145 Self::Bash => tree_sitter_bash::LANGUAGE.into(),
146 Self::Php => tree_sitter_php::LANGUAGE_PHP.into(),
147 Self::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
148 Self::Swift => tree_sitter_swift::LANGUAGE.into(),
149 Self::Scala => tree_sitter_scala::LANGUAGE.into(),
150 Self::Haskell => tree_sitter_haskell::LANGUAGE.into(),
151 Self::Elixir => tree_sitter_elixir::LANGUAGE.into(),
152 Self::Clojure => tree_sitter_clojure::LANGUAGE.into(),
153 Self::OCaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
154 Self::Lua => tree_sitter_lua::LANGUAGE.into(),
155 Self::R => tree_sitter_r::LANGUAGE.into(),
156 Self::FSharp => return None,
157 })
158 }
159
160 #[must_use]
162 pub const fn query_string(self) -> Option<&'static str> {
163 Some(match self {
164 Self::Python => queries::PYTHON,
165 Self::JavaScript => queries::JAVASCRIPT,
166 Self::TypeScript => queries::TYPESCRIPT,
167 Self::Rust => queries::RUST,
168 Self::Go => queries::GO,
169 Self::Java => queries::JAVA,
170 Self::C => queries::C,
171 Self::Cpp => queries::CPP,
172 Self::CSharp => queries::CSHARP,
173 Self::Ruby => queries::RUBY,
174 Self::Bash => queries::BASH,
175 Self::Php => queries::PHP,
176 Self::Kotlin => queries::KOTLIN,
177 Self::Swift => queries::SWIFT,
178 Self::Scala => queries::SCALA,
179 Self::Haskell => queries::HASKELL,
180 Self::Elixir => queries::ELIXIR,
181 Self::Clojure => queries::CLOJURE,
182 Self::OCaml => queries::OCAML,
183 Self::Lua => queries::LUA,
184 Self::R => queries::R,
185 Self::FSharp => return None,
186 })
187 }
188
189 pub fn init_parser(self) -> Result<TSParser, ParserError> {
191 let ts_lang = self.tree_sitter_language().ok_or_else(|| {
192 ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
193 })?;
194
195 let mut parser = TSParser::new();
196 parser
197 .set_language(&ts_lang)
198 .map_err(|e| ParserError::ParseError(e.to_string()))?;
199 Ok(parser)
200 }
201
202 pub fn create_query(self) -> Result<Query, ParserError> {
204 let ts_lang = self.tree_sitter_language().ok_or_else(|| {
205 ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
206 })?;
207
208 let query_str = self.query_string().ok_or_else(|| {
209 ParserError::UnsupportedLanguage(format!("{} has no query defined", self.name()))
210 })?;
211
212 Query::new(&ts_lang, query_str).map_err(|e| ParserError::QueryError(e.to_string()))
213 }
214
215 #[must_use]
217 pub const fn all() -> &'static [Self] {
218 &[
219 Self::Python,
220 Self::JavaScript,
221 Self::TypeScript,
222 Self::Rust,
223 Self::Go,
224 Self::Java,
225 Self::C,
226 Self::Cpp,
227 Self::CSharp,
228 Self::Ruby,
229 Self::Bash,
230 Self::Php,
231 Self::Kotlin,
232 Self::Swift,
233 Self::Scala,
234 Self::Haskell,
235 Self::Elixir,
236 Self::Clojure,
237 Self::OCaml,
238 Self::FSharp,
239 Self::Lua,
240 Self::R,
241 ]
242 }
243
244 #[must_use]
246 pub fn all_with_parser_support() -> Vec<Self> {
247 Self::all()
248 .iter()
249 .copied()
250 .filter(|l| l.has_parser_support())
251 .collect()
252 }
253
254 #[must_use]
256 pub const fn uses_indentation_blocks(self) -> bool {
257 matches!(self, Self::Python | Self::Haskell)
258 }
259
260 #[must_use]
262 pub const fn is_c_family(self) -> bool {
263 matches!(
264 self,
265 Self::C
266 | Self::Cpp
267 | Self::CSharp
268 | Self::Java
269 | Self::JavaScript
270 | Self::TypeScript
271 | Self::Go
272 | Self::Rust
273 | Self::Kotlin
274 | Self::Swift
275 | Self::Scala
276 | Self::Php
277 )
278 }
279
280 #[must_use]
282 pub const fn is_functional(self) -> bool {
283 matches!(self, Self::Haskell | Self::OCaml | Self::Elixir | Self::Clojure | Self::Scala)
284 }
285}
286
287impl std::fmt::Display for Language {
288 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
289 write!(f, "{}", self.display_name())
290 }
291}
292
293#[must_use]
297pub fn detect_file_language(path: &std::path::Path) -> Option<String> {
298 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
300 let lower = filename.to_lowercase();
301 let lang =
302 match lower.as_str() {
303 "dockerfile" | "dockerfile.dev" | "dockerfile.prod" | "dockerfile.test" => {
305 Some("dockerfile")
306 },
307 "makefile" | "gnumakefile" | "bsdmakefile" => Some("make"),
309 "gemfile" | "rakefile" | "guardfile" | "vagrantfile" | "berksfile" | "podfile"
311 | "fastfile" | "appfile" | "matchfile" | "deliverfile" | "snapfile"
312 | "brewfile" => Some("ruby"),
313 ".bashrc" | ".bash_profile" | ".zshrc" | ".zprofile" | ".profile"
315 | ".bash_aliases" => Some("shell"),
316 ".gitignore" | ".gitattributes" | ".gitmodules" => Some("gitignore"),
318 ".editorconfig" => Some("editorconfig"),
320 "procfile" => Some("procfile"),
322 "justfile" => Some("just"),
324 "caddyfile" => Some("caddyfile"),
326 _ => None,
327 };
328 if lang.is_some() {
329 return lang.map(|s| s.to_owned());
330 }
331 if lower.starts_with("dockerfile") {
333 return Some("dockerfile".to_owned());
334 }
335 if lower.starts_with("makefile") {
336 return Some("make".to_owned());
337 }
338 }
339
340 let ext = path.extension()?.to_str()?;
342 let lang = match ext.to_lowercase().as_str() {
343 "py" | "pyi" | "pyx" => "python",
345 "js" | "mjs" | "cjs" => "javascript",
347 "jsx" => "jsx",
348 "ts" | "mts" | "cts" => "typescript",
349 "tsx" => "tsx",
350 "rs" => "rust",
352 "go" => "go",
354 "java" => "java",
356 "kt" | "kts" => "kotlin",
357 "scala" => "scala",
358 "groovy" => "groovy",
359 "clj" | "cljs" | "cljc" => "clojure",
360 "c" | "h" => "c",
362 "cpp" | "hpp" | "cc" | "cxx" | "hxx" => "cpp",
363 "cs" => "csharp",
365 "rb" | "rake" | "gemspec" => "ruby",
367 "php" => "php",
369 "swift" => "swift",
371 "sh" | "bash" => "bash",
373 "zsh" => "zsh",
374 "fish" => "fish",
375 "ps1" | "psm1" => "powershell",
376 "html" | "htm" => "html",
378 "css" => "css",
379 "scss" => "scss",
380 "sass" => "sass",
381 "less" => "less",
382 "json" => "json",
384 "yaml" | "yml" => "yaml",
385 "toml" => "toml",
386 "xml" => "xml",
387 "ini" | "cfg" => "ini",
388 "md" | "markdown" => "markdown",
390 "mdx" => "mdx",
391 "rst" => "rst",
392 "txt" => "text",
393 "zig" => "zig",
395 "lua" => "lua",
397 "sql" => "sql",
399 "ex" | "exs" => "elixir",
401 "erl" | "hrl" => "erlang",
402 "hs" | "lhs" => "haskell",
404 "ml" | "mli" => "ocaml",
406 "fs" | "fsi" | "fsx" => "fsharp",
407 "vue" => "vue",
409 "svelte" => "svelte",
410 "dockerfile" => "dockerfile",
412 "tf" | "tfvars" => "terraform",
414 "makefile" | "mk" => "make",
416 "cmake" => "cmake",
417 "nix" => "nix",
419 "jl" => "julia",
421 "r" | "rmd" => "r",
423 "dart" => "dart",
425 "nim" => "nim",
427 "v" => "vlang",
429 "cr" => "crystal",
431 _ => return None,
432 };
433
434 Some(lang.to_owned())
435}
436
437impl std::str::FromStr for Language {
438 type Err = ParserError;
439
440 fn from_str(s: &str) -> Result<Self, Self::Err> {
441 match s.to_lowercase().as_str() {
442 "python" | "py" => Ok(Self::Python),
443 "javascript" | "js" => Ok(Self::JavaScript),
444 "typescript" | "ts" => Ok(Self::TypeScript),
445 "rust" | "rs" => Ok(Self::Rust),
446 "go" | "golang" => Ok(Self::Go),
447 "java" => Ok(Self::Java),
448 "c" => Ok(Self::C),
449 "cpp" | "c++" | "cxx" => Ok(Self::Cpp),
450 "csharp" | "c#" | "cs" => Ok(Self::CSharp),
451 "ruby" | "rb" => Ok(Self::Ruby),
452 "bash" | "shell" | "sh" => Ok(Self::Bash),
453 "php" => Ok(Self::Php),
454 "kotlin" | "kt" => Ok(Self::Kotlin),
455 "swift" => Ok(Self::Swift),
456 "scala" => Ok(Self::Scala),
457 "haskell" | "hs" => Ok(Self::Haskell),
458 "elixir" | "ex" => Ok(Self::Elixir),
459 "clojure" | "clj" => Ok(Self::Clojure),
460 "ocaml" | "ml" => Ok(Self::OCaml),
461 "fsharp" | "f#" | "fs" => Ok(Self::FSharp),
462 "lua" => Ok(Self::Lua),
463 "r" => Ok(Self::R),
464 _ => Err(ParserError::UnsupportedLanguage(s.to_owned())),
465 }
466 }
467}
468
469#[cfg(test)]
470mod tests {
471 use super::*;
472
473 #[test]
474 fn test_language_from_extension() {
475 assert_eq!(Language::from_extension("py"), Some(Language::Python));
476 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
477 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
478 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
479 assert_eq!(Language::from_extension("unknown"), None);
480 }
481
482 #[test]
483 fn test_language_name() {
484 assert_eq!(Language::Python.name(), "python");
485 assert_eq!(Language::Rust.name(), "rust");
486 assert_eq!(Language::TypeScript.name(), "typescript");
487 }
488
489 #[test]
490 fn test_language_display_name() {
491 assert_eq!(Language::Python.display_name(), "Python");
492 assert_eq!(Language::Cpp.display_name(), "C++");
493 assert_eq!(Language::CSharp.display_name(), "C#");
494 }
495
496 #[test]
497 fn test_parser_support() {
498 assert!(Language::Python.has_parser_support());
499 assert!(Language::Rust.has_parser_support());
500 assert!(!Language::FSharp.has_parser_support());
501 }
502
503 #[test]
504 fn test_language_from_str() {
505 assert_eq!("python".parse::<Language>().unwrap(), Language::Python);
506 assert_eq!("c++".parse::<Language>().unwrap(), Language::Cpp);
507 assert_eq!("c#".parse::<Language>().unwrap(), Language::CSharp);
508 assert!("invalid".parse::<Language>().is_err());
509 }
510
511 #[test]
512 fn test_all_languages() {
513 let all = Language::all();
514 assert_eq!(all.len(), 22);
515 assert!(all.contains(&Language::Python));
516 assert!(all.contains(&Language::Rust));
517 }
518
519 #[test]
520 fn test_tree_sitter_language() {
521 assert!(Language::Python.tree_sitter_language().is_some());
522 assert!(Language::Rust.tree_sitter_language().is_some());
523 assert!(Language::FSharp.tree_sitter_language().is_none());
524 }
525
526 #[test]
527 fn test_query_string() {
528 assert!(Language::Python.query_string().is_some());
529 assert!(Language::Rust.query_string().is_some());
530 assert!(Language::FSharp.query_string().is_none());
531 }
532
533 #[test]
534 fn test_init_parser() {
535 assert!(Language::Python.init_parser().is_ok());
536 assert!(Language::Rust.init_parser().is_ok());
537 assert!(Language::FSharp.init_parser().is_err());
538 }
539
540 #[test]
541 fn test_create_query() {
542 assert!(Language::Python.create_query().is_ok());
543 assert!(Language::Rust.create_query().is_ok());
544 assert!(Language::FSharp.create_query().is_err());
545 }
546
547 #[test]
548 fn test_language_categories() {
549 assert!(Language::Python.uses_indentation_blocks());
550 assert!(!Language::Rust.uses_indentation_blocks());
551
552 assert!(Language::Rust.is_c_family());
553 assert!(!Language::Python.is_c_family());
554
555 assert!(Language::Haskell.is_functional());
556 assert!(!Language::Python.is_functional());
557 }
558}