1use super::core::ParserError;
7use super::queries;
8use tree_sitter::{Language as TSLanguage, Parser as TSParser, Query};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
16pub enum Language {
17 Python,
18 JavaScript,
19 TypeScript,
20 Rust,
21 Go,
22 Java,
23 C,
24 Cpp,
25 CSharp,
26 Ruby,
27 Bash,
28 Php,
29 Kotlin,
30 Swift,
31 Scala,
32 Haskell,
33 Elixir,
34 #[deprecated(note = "No compatible tree-sitter grammar available")]
37 Clojure,
38 OCaml,
39 #[deprecated(note = "No compatible tree-sitter grammar available")]
42 FSharp,
43 Lua,
44 R,
45 Hcl,
46 Zig,
47 Dart,
48}
49
50#[allow(deprecated)]
51impl Language {
52 #[must_use]
54 pub fn from_extension(ext: &str) -> Option<Self> {
55 match ext.to_lowercase().as_str() {
56 "py" | "pyw" => Some(Self::Python),
57 "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript),
58 "ts" | "tsx" => Some(Self::TypeScript),
59 "rs" => Some(Self::Rust),
60 "go" => Some(Self::Go),
61 "java" => Some(Self::Java),
62 "c" | "h" => Some(Self::C),
63 "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => Some(Self::Cpp),
64 "cs" => Some(Self::CSharp),
65 "rb" | "rake" | "gemspec" => Some(Self::Ruby),
66 "sh" | "bash" | "zsh" | "fish" => Some(Self::Bash),
67 "php" | "phtml" | "php3" | "php4" | "php5" | "phps" => Some(Self::Php),
68 "kt" | "kts" => Some(Self::Kotlin),
69 "swift" => Some(Self::Swift),
70 "scala" | "sc" => Some(Self::Scala),
71 "hs" | "lhs" => Some(Self::Haskell),
72 "ex" | "exs" | "eex" | "heex" | "leex" => Some(Self::Elixir),
73 "clj" | "cljs" | "cljc" | "edn" => Some(Self::Clojure),
74 "ml" | "mli" => Some(Self::OCaml),
75 "fs" | "fsi" | "fsx" | "fsscript" => Some(Self::FSharp),
76 "lua" => Some(Self::Lua),
77 "r" | "rmd" => Some(Self::R),
78 "tf" | "hcl" | "tfvars" => Some(Self::Hcl),
79 "zig" | "zon" => Some(Self::Zig),
80 "dart" => Some(Self::Dart),
81 _ => None,
82 }
83 }
84
85 #[must_use]
87 pub const fn name(self) -> &'static str {
88 match self {
89 Self::Python => "python",
90 Self::JavaScript => "javascript",
91 Self::TypeScript => "typescript",
92 Self::Rust => "rust",
93 Self::Go => "go",
94 Self::Java => "java",
95 Self::C => "c",
96 Self::Cpp => "cpp",
97 Self::CSharp => "csharp",
98 Self::Ruby => "ruby",
99 Self::Bash => "bash",
100 Self::Php => "php",
101 Self::Kotlin => "kotlin",
102 Self::Swift => "swift",
103 Self::Scala => "scala",
104 Self::Haskell => "haskell",
105 Self::Elixir => "elixir",
106 Self::Clojure => "clojure",
107 Self::OCaml => "ocaml",
108 Self::FSharp => "fsharp",
109 Self::Lua => "lua",
110 Self::R => "r",
111 Self::Hcl => "hcl",
112 Self::Zig => "zig",
113 Self::Dart => "dart",
114 }
115 }
116
117 #[must_use]
119 pub const fn display_name(self) -> &'static str {
120 match self {
121 Self::Python => "Python",
122 Self::JavaScript => "JavaScript",
123 Self::TypeScript => "TypeScript",
124 Self::Rust => "Rust",
125 Self::Go => "Go",
126 Self::Java => "Java",
127 Self::C => "C",
128 Self::Cpp => "C++",
129 Self::CSharp => "C#",
130 Self::Ruby => "Ruby",
131 Self::Bash => "Bash",
132 Self::Php => "PHP",
133 Self::Kotlin => "Kotlin",
134 Self::Swift => "Swift",
135 Self::Scala => "Scala",
136 Self::Haskell => "Haskell",
137 Self::Elixir => "Elixir",
138 Self::Clojure => "Clojure",
139 Self::OCaml => "OCaml",
140 Self::FSharp => "F#",
141 Self::Lua => "Lua",
142 Self::R => "R",
143 Self::Hcl => "HCL",
144 Self::Zig => "Zig",
145 Self::Dart => "Dart",
146 }
147 }
148
149 #[must_use]
151 pub const fn has_parser_support(self) -> bool {
152 !matches!(self, Self::FSharp | Self::Clojure)
155 }
156
157 pub fn tree_sitter_language(self) -> Option<TSLanguage> {
159 Some(match self {
160 Self::Python => tree_sitter_python::LANGUAGE.into(),
161 Self::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
162 Self::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
163 Self::Rust => tree_sitter_rust::LANGUAGE.into(),
164 Self::Go => tree_sitter_go::LANGUAGE.into(),
165 Self::Java => tree_sitter_java::LANGUAGE.into(),
166 Self::C => tree_sitter_c::LANGUAGE.into(),
167 Self::Cpp => tree_sitter_cpp::LANGUAGE.into(),
168 Self::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
169 Self::Ruby => tree_sitter_ruby::LANGUAGE.into(),
170 Self::Bash => tree_sitter_bash::LANGUAGE.into(),
171 Self::Php => tree_sitter_php::LANGUAGE_PHP.into(),
172 Self::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
173 Self::Swift => tree_sitter_swift::LANGUAGE.into(),
174 Self::Scala => tree_sitter_scala::LANGUAGE.into(),
175 Self::Haskell => tree_sitter_haskell::LANGUAGE.into(),
176 Self::Elixir => tree_sitter_elixir::LANGUAGE.into(),
177 Self::Clojure => return None, Self::OCaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
179 Self::Lua => tree_sitter_lua::LANGUAGE.into(),
180 Self::R => tree_sitter_r::LANGUAGE.into(),
181 Self::Hcl => tree_sitter_hcl::LANGUAGE.into(),
182 Self::Zig => tree_sitter_zig::LANGUAGE.into(),
183 Self::Dart => tree_sitter_dart_orchard::LANGUAGE.into(),
184 Self::FSharp => return None,
185 })
186 }
187
188 #[must_use]
190 pub const fn query_string(self) -> Option<&'static str> {
191 Some(match self {
192 Self::Python => queries::PYTHON,
193 Self::JavaScript => queries::JAVASCRIPT,
194 Self::TypeScript => queries::TYPESCRIPT,
195 Self::Rust => queries::RUST,
196 Self::Go => queries::GO,
197 Self::Java => queries::JAVA,
198 Self::C => queries::C,
199 Self::Cpp => queries::CPP,
200 Self::CSharp => queries::CSHARP,
201 Self::Ruby => queries::RUBY,
202 Self::Bash => queries::BASH,
203 Self::Php => queries::PHP,
204 Self::Kotlin => queries::KOTLIN,
205 Self::Swift => queries::SWIFT,
206 Self::Scala => queries::SCALA,
207 Self::Haskell => queries::HASKELL,
208 Self::Elixir => queries::ELIXIR,
209 Self::Clojure => return None, Self::OCaml => queries::OCAML,
211 Self::Lua => queries::LUA,
212 Self::R => queries::R,
213 Self::Hcl => queries::HCL,
214 Self::Zig => queries::ZIG,
215 Self::Dart => queries::DART,
216 Self::FSharp => return None,
217 })
218 }
219
220 pub fn init_parser(self) -> Result<TSParser, ParserError> {
222 let ts_lang = self.tree_sitter_language().ok_or_else(|| {
223 ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
224 })?;
225
226 let mut parser = TSParser::new();
227 parser
228 .set_language(&ts_lang)
229 .map_err(|e| ParserError::ParseError(e.to_string()))?;
230 Ok(parser)
231 }
232
233 pub fn create_query(self) -> Result<Query, ParserError> {
235 let ts_lang = self.tree_sitter_language().ok_or_else(|| {
236 ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
237 })?;
238
239 let query_str = self.query_string().ok_or_else(|| {
240 ParserError::UnsupportedLanguage(format!("{} has no query defined", self.name()))
241 })?;
242
243 Query::new(&ts_lang, query_str).map_err(|e| ParserError::QueryError(e.to_string()))
244 }
245
246 #[must_use]
248 pub const fn all() -> &'static [Self] {
249 &[
250 Self::Python,
251 Self::JavaScript,
252 Self::TypeScript,
253 Self::Rust,
254 Self::Go,
255 Self::Java,
256 Self::C,
257 Self::Cpp,
258 Self::CSharp,
259 Self::Ruby,
260 Self::Bash,
261 Self::Php,
262 Self::Kotlin,
263 Self::Swift,
264 Self::Scala,
265 Self::Haskell,
266 Self::Elixir,
267 Self::Clojure,
268 Self::OCaml,
269 Self::FSharp,
270 Self::Lua,
271 Self::R,
272 Self::Hcl,
273 Self::Zig,
274 Self::Dart,
275 ]
276 }
277
278 #[must_use]
280 pub fn all_with_parser_support() -> Vec<Self> {
281 Self::all()
282 .iter()
283 .copied()
284 .filter(|l| l.has_parser_support())
285 .collect()
286 }
287
288 #[must_use]
290 pub const fn uses_indentation_blocks(self) -> bool {
291 matches!(self, Self::Python | Self::Haskell)
292 }
293
294 #[must_use]
296 pub const fn is_c_family(self) -> bool {
297 matches!(
298 self,
299 Self::C
300 | Self::Cpp
301 | Self::CSharp
302 | Self::Java
303 | Self::JavaScript
304 | Self::TypeScript
305 | Self::Go
306 | Self::Rust
307 | Self::Kotlin
308 | Self::Swift
309 | Self::Scala
310 | Self::Php
311 | Self::Dart
312 )
313 }
314
315 #[must_use]
317 pub const fn is_functional(self) -> bool {
318 matches!(self, Self::Haskell | Self::OCaml | Self::Elixir | Self::Clojure | Self::Scala)
319 }
320}
321
322#[allow(deprecated)]
323impl std::fmt::Display for Language {
324 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
325 write!(f, "{}", self.display_name())
326 }
327}
328
329#[must_use]
333pub fn detect_file_language(path: &std::path::Path) -> Option<String> {
334 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
336 let lower = filename.to_lowercase();
337 let lang =
338 match lower.as_str() {
339 "dockerfile" | "dockerfile.dev" | "dockerfile.prod" | "dockerfile.test" => {
341 Some("dockerfile")
342 },
343 "makefile" | "gnumakefile" | "bsdmakefile" => Some("make"),
345 "gemfile" | "rakefile" | "guardfile" | "vagrantfile" | "berksfile" | "podfile"
347 | "fastfile" | "appfile" | "matchfile" | "deliverfile" | "snapfile"
348 | "brewfile" => Some("ruby"),
349 ".bashrc" | ".bash_profile" | ".zshrc" | ".zprofile" | ".profile"
351 | ".bash_aliases" => Some("shell"),
352 ".gitignore" | ".gitattributes" | ".gitmodules" => Some("gitignore"),
354 ".editorconfig" => Some("editorconfig"),
356 "procfile" => Some("procfile"),
358 "justfile" => Some("just"),
360 "caddyfile" => Some("caddyfile"),
362 _ => None,
363 };
364 if lang.is_some() {
365 return lang.map(|s| s.to_owned());
366 }
367 if lower.starts_with("dockerfile") {
369 return Some("dockerfile".to_owned());
370 }
371 if lower.starts_with("makefile") {
372 return Some("make".to_owned());
373 }
374 }
375
376 let ext = path.extension()?.to_str()?;
378 let lang = match ext.to_lowercase().as_str() {
379 "py" | "pyi" | "pyx" => "python",
381 "js" | "mjs" | "cjs" => "javascript",
383 "jsx" => "jsx",
384 "ts" | "mts" | "cts" => "typescript",
385 "tsx" => "tsx",
386 "rs" => "rust",
388 "go" => "go",
390 "java" => "java",
392 "kt" | "kts" => "kotlin",
393 "scala" => "scala",
394 "groovy" => "groovy",
395 "clj" | "cljs" | "cljc" => "clojure",
396 "c" | "h" => "c",
398 "cpp" | "hpp" | "cc" | "cxx" | "hxx" => "cpp",
399 "cs" => "csharp",
401 "rb" | "rake" | "gemspec" => "ruby",
403 "php" => "php",
405 "swift" => "swift",
407 "sh" | "bash" => "bash",
409 "zsh" => "zsh",
410 "fish" => "fish",
411 "ps1" | "psm1" => "powershell",
412 "html" | "htm" => "html",
414 "css" => "css",
415 "scss" => "scss",
416 "sass" => "sass",
417 "less" => "less",
418 "json" => "json",
420 "yaml" | "yml" => "yaml",
421 "toml" => "toml",
422 "xml" => "xml",
423 "ini" | "cfg" => "ini",
424 "md" | "markdown" => "markdown",
426 "mdx" => "mdx",
427 "rst" => "rst",
428 "txt" => "text",
429 "zig" => "zig",
431 "lua" => "lua",
433 "sql" => "sql",
435 "ex" | "exs" => "elixir",
437 "erl" | "hrl" => "erlang",
438 "hs" | "lhs" => "haskell",
440 "ml" | "mli" => "ocaml",
442 "fs" | "fsi" | "fsx" => "fsharp",
443 "vue" => "vue",
445 "svelte" => "svelte",
446 "dockerfile" => "dockerfile",
448 "tf" | "tfvars" | "hcl" => "hcl",
450 "makefile" | "mk" => "make",
452 "cmake" => "cmake",
453 "nix" => "nix",
455 "jl" => "julia",
457 "r" | "rmd" => "r",
459 "dart" => "dart",
461 "nim" => "nim",
463 "v" => "vlang",
465 "cr" => "crystal",
467 _ => return None,
468 };
469
470 Some(lang.to_owned())
471}
472
473#[allow(deprecated)]
474impl std::str::FromStr for Language {
475 type Err = ParserError;
476
477 fn from_str(s: &str) -> Result<Self, Self::Err> {
478 match s.to_lowercase().as_str() {
479 "python" | "py" => Ok(Self::Python),
480 "javascript" | "js" => Ok(Self::JavaScript),
481 "typescript" | "ts" => Ok(Self::TypeScript),
482 "rust" | "rs" => Ok(Self::Rust),
483 "go" | "golang" => Ok(Self::Go),
484 "java" => Ok(Self::Java),
485 "c" => Ok(Self::C),
486 "cpp" | "c++" | "cxx" => Ok(Self::Cpp),
487 "csharp" | "c#" | "cs" => Ok(Self::CSharp),
488 "ruby" | "rb" => Ok(Self::Ruby),
489 "bash" | "shell" | "sh" => Ok(Self::Bash),
490 "php" => Ok(Self::Php),
491 "kotlin" | "kt" => Ok(Self::Kotlin),
492 "swift" => Ok(Self::Swift),
493 "scala" => Ok(Self::Scala),
494 "haskell" | "hs" => Ok(Self::Haskell),
495 "elixir" | "ex" => Ok(Self::Elixir),
496 "clojure" | "clj" => Ok(Self::Clojure),
497 "ocaml" | "ml" => Ok(Self::OCaml),
498 "fsharp" | "f#" | "fs" => Ok(Self::FSharp),
499 "lua" => Ok(Self::Lua),
500 "r" => Ok(Self::R),
501 "hcl" | "terraform" | "tf" => Ok(Self::Hcl),
502 "zig" => Ok(Self::Zig),
503 "dart" => Ok(Self::Dart),
504 _ => Err(ParserError::UnsupportedLanguage(s.to_owned())),
505 }
506 }
507}
508
509#[cfg(test)]
510#[allow(deprecated)]
511mod tests {
512 use super::*;
513
514 #[test]
515 fn test_language_from_extension() {
516 assert_eq!(Language::from_extension("py"), Some(Language::Python));
517 assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
518 assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
519 assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
520 assert_eq!(Language::from_extension("unknown"), None);
521 }
522
523 #[test]
524 fn test_language_name() {
525 assert_eq!(Language::Python.name(), "python");
526 assert_eq!(Language::Rust.name(), "rust");
527 assert_eq!(Language::TypeScript.name(), "typescript");
528 }
529
530 #[test]
531 fn test_language_display_name() {
532 assert_eq!(Language::Python.display_name(), "Python");
533 assert_eq!(Language::Cpp.display_name(), "C++");
534 assert_eq!(Language::CSharp.display_name(), "C#");
535 }
536
537 #[test]
538 fn test_parser_support() {
539 assert!(Language::Python.has_parser_support());
540 assert!(Language::Rust.has_parser_support());
541 assert!(!Language::FSharp.has_parser_support());
542 }
543
544 #[test]
545 fn test_language_from_str() {
546 assert_eq!("python".parse::<Language>().unwrap(), Language::Python);
547 assert_eq!("c++".parse::<Language>().unwrap(), Language::Cpp);
548 assert_eq!("c#".parse::<Language>().unwrap(), Language::CSharp);
549 assert!("invalid".parse::<Language>().is_err());
550 }
551
552 #[test]
553 fn test_all_languages() {
554 let all = Language::all();
555 assert_eq!(all.len(), 25);
556 assert!(all.contains(&Language::Python));
557 assert!(all.contains(&Language::Rust));
558 }
559
560 #[test]
561 fn test_tree_sitter_language() {
562 assert!(Language::Python.tree_sitter_language().is_some());
563 assert!(Language::Rust.tree_sitter_language().is_some());
564 assert!(Language::FSharp.tree_sitter_language().is_none());
565 }
566
567 #[test]
568 fn test_query_string() {
569 assert!(Language::Python.query_string().is_some());
570 assert!(Language::Rust.query_string().is_some());
571 assert!(Language::FSharp.query_string().is_none());
572 }
573
574 #[test]
575 fn test_init_parser() {
576 assert!(Language::Python.init_parser().is_ok());
577 assert!(Language::Rust.init_parser().is_ok());
578 assert!(Language::FSharp.init_parser().is_err());
579 }
580
581 #[test]
582 fn test_create_query() {
583 assert!(Language::Python.create_query().is_ok());
584 assert!(Language::Rust.create_query().is_ok());
585 assert!(Language::FSharp.create_query().is_err());
586 }
587
588 #[test]
589 fn test_language_categories() {
590 assert!(Language::Python.uses_indentation_blocks());
591 assert!(!Language::Rust.uses_indentation_blocks());
592
593 assert!(Language::Rust.is_c_family());
594 assert!(!Language::Python.is_c_family());
595
596 assert!(Language::Haskell.is_functional());
597 assert!(!Language::Python.is_functional());
598 }
599
600 #[test]
601 fn test_hcl_language() {
602 assert_eq!(Language::from_extension("tf"), Some(Language::Hcl));
603 assert_eq!(Language::from_extension("hcl"), Some(Language::Hcl));
604 assert_eq!(Language::from_extension("tfvars"), Some(Language::Hcl));
605 assert_eq!(Language::Hcl.name(), "hcl");
606 assert_eq!(Language::Hcl.display_name(), "HCL");
607 assert!(Language::Hcl.has_parser_support());
608 assert!(Language::Hcl.tree_sitter_language().is_some());
609 assert!(Language::Hcl.query_string().is_some());
610 assert!(Language::Hcl.init_parser().is_ok());
611 assert!(Language::Hcl.create_query().is_ok());
612 assert_eq!("hcl".parse::<Language>().unwrap(), Language::Hcl);
613 assert_eq!("terraform".parse::<Language>().unwrap(), Language::Hcl);
614 assert_eq!("tf".parse::<Language>().unwrap(), Language::Hcl);
615 }
616}