Skip to main content

infiniloom_engine/parser/
language.rs

1//! Language definitions and support traits
2//!
3//! This module defines the supported programming languages and provides
4//! a uniform interface for language-specific operations.
5
6use super::core::ParserError;
7use super::queries;
8use tree_sitter::{Language as TSLanguage, Parser as TSParser, Query};
9
10/// Supported programming languages
11///
12/// Note: [`Language::Clojure`] and [`Language::FSharp`] are detection-only
13/// (file extension recognition and display names). They have no working
14/// tree-sitter parser and cannot perform AST-based symbol extraction.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
16pub enum Language {
17    Python,
18    JavaScript,
19    TypeScript,
20    Rust,
21    Go,
22    Java,
23    C,
24    Cpp,
25    CSharp,
26    Ruby,
27    Bash,
28    Php,
29    Kotlin,
30    Swift,
31    Scala,
32    Haskell,
33    Elixir,
34    /// Deprecated: No compatible tree-sitter grammar available (requires tree-sitter ^0.25).
35    /// Detection-only -- file extension recognition works, but no AST parsing.
36    #[deprecated(note = "No compatible tree-sitter grammar available")]
37    Clojure,
38    OCaml,
39    /// Deprecated: No tree-sitter grammar crate exists for F#.
40    /// Detection-only -- file extension recognition works, but no AST parsing.
41    #[deprecated(note = "No compatible tree-sitter grammar available")]
42    FSharp,
43    Lua,
44    R,
45    Hcl,
46    Zig,
47    Dart,
48}
49
50#[allow(deprecated)]
51impl Language {
52    /// Detect language from file extension
53    #[must_use]
54    pub fn from_extension(ext: &str) -> Option<Self> {
55        match ext.to_lowercase().as_str() {
56            "py" | "pyw" => Some(Self::Python),
57            "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript),
58            "ts" | "tsx" => Some(Self::TypeScript),
59            "rs" => Some(Self::Rust),
60            "go" => Some(Self::Go),
61            "java" => Some(Self::Java),
62            "c" | "h" => Some(Self::C),
63            "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => Some(Self::Cpp),
64            "cs" => Some(Self::CSharp),
65            "rb" | "rake" | "gemspec" => Some(Self::Ruby),
66            "sh" | "bash" | "zsh" | "fish" => Some(Self::Bash),
67            "php" | "phtml" | "php3" | "php4" | "php5" | "phps" => Some(Self::Php),
68            "kt" | "kts" => Some(Self::Kotlin),
69            "swift" => Some(Self::Swift),
70            "scala" | "sc" => Some(Self::Scala),
71            "hs" | "lhs" => Some(Self::Haskell),
72            "ex" | "exs" | "eex" | "heex" | "leex" => Some(Self::Elixir),
73            "clj" | "cljs" | "cljc" | "edn" => Some(Self::Clojure),
74            "ml" | "mli" => Some(Self::OCaml),
75            "fs" | "fsi" | "fsx" | "fsscript" => Some(Self::FSharp),
76            "lua" => Some(Self::Lua),
77            "r" | "rmd" => Some(Self::R),
78            "tf" | "hcl" | "tfvars" => Some(Self::Hcl),
79            "zig" | "zon" => Some(Self::Zig),
80            "dart" => Some(Self::Dart),
81            _ => None,
82        }
83    }
84
85    /// Get language name as string
86    #[must_use]
87    pub const fn name(self) -> &'static str {
88        match self {
89            Self::Python => "python",
90            Self::JavaScript => "javascript",
91            Self::TypeScript => "typescript",
92            Self::Rust => "rust",
93            Self::Go => "go",
94            Self::Java => "java",
95            Self::C => "c",
96            Self::Cpp => "cpp",
97            Self::CSharp => "csharp",
98            Self::Ruby => "ruby",
99            Self::Bash => "bash",
100            Self::Php => "php",
101            Self::Kotlin => "kotlin",
102            Self::Swift => "swift",
103            Self::Scala => "scala",
104            Self::Haskell => "haskell",
105            Self::Elixir => "elixir",
106            Self::Clojure => "clojure",
107            Self::OCaml => "ocaml",
108            Self::FSharp => "fsharp",
109            Self::Lua => "lua",
110            Self::R => "r",
111            Self::Hcl => "hcl",
112            Self::Zig => "zig",
113            Self::Dart => "dart",
114        }
115    }
116
117    /// Get display name for pretty printing
118    #[must_use]
119    pub const fn display_name(self) -> &'static str {
120        match self {
121            Self::Python => "Python",
122            Self::JavaScript => "JavaScript",
123            Self::TypeScript => "TypeScript",
124            Self::Rust => "Rust",
125            Self::Go => "Go",
126            Self::Java => "Java",
127            Self::C => "C",
128            Self::Cpp => "C++",
129            Self::CSharp => "C#",
130            Self::Ruby => "Ruby",
131            Self::Bash => "Bash",
132            Self::Php => "PHP",
133            Self::Kotlin => "Kotlin",
134            Self::Swift => "Swift",
135            Self::Scala => "Scala",
136            Self::Haskell => "Haskell",
137            Self::Elixir => "Elixir",
138            Self::Clojure => "Clojure",
139            Self::OCaml => "OCaml",
140            Self::FSharp => "F#",
141            Self::Lua => "Lua",
142            Self::R => "R",
143            Self::Hcl => "HCL",
144            Self::Zig => "Zig",
145            Self::Dart => "Dart",
146        }
147    }
148
149    /// Check if this language has full tree-sitter support
150    #[must_use]
151    pub const fn has_parser_support(self) -> bool {
152        // Clojure: tree-sitter-clojure 0.1.0 depends on tree-sitter ^0.25 (normal dep),
153        // which is incompatible with tree-sitter 0.26. No compatible version exists.
154        !matches!(self, Self::FSharp | Self::Clojure)
155    }
156
157    /// Get the tree-sitter language for this language
158    pub fn tree_sitter_language(self) -> Option<TSLanguage> {
159        Some(match self {
160            Self::Python => tree_sitter_python::LANGUAGE.into(),
161            Self::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
162            Self::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
163            Self::Rust => tree_sitter_rust::LANGUAGE.into(),
164            Self::Go => tree_sitter_go::LANGUAGE.into(),
165            Self::Java => tree_sitter_java::LANGUAGE.into(),
166            Self::C => tree_sitter_c::LANGUAGE.into(),
167            Self::Cpp => tree_sitter_cpp::LANGUAGE.into(),
168            Self::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
169            Self::Ruby => tree_sitter_ruby::LANGUAGE.into(),
170            Self::Bash => tree_sitter_bash::LANGUAGE.into(),
171            Self::Php => tree_sitter_php::LANGUAGE_PHP.into(),
172            Self::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
173            Self::Swift => tree_sitter_swift::LANGUAGE.into(),
174            Self::Scala => tree_sitter_scala::LANGUAGE.into(),
175            Self::Haskell => tree_sitter_haskell::LANGUAGE.into(),
176            Self::Elixir => tree_sitter_elixir::LANGUAGE.into(),
177            Self::Clojure => return None, // tree-sitter-clojure incompatible with tree-sitter 0.26
178            Self::OCaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
179            Self::Lua => tree_sitter_lua::LANGUAGE.into(),
180            Self::R => tree_sitter_r::LANGUAGE.into(),
181            Self::Hcl => tree_sitter_hcl::LANGUAGE.into(),
182            Self::Zig => tree_sitter_zig::LANGUAGE.into(),
183            Self::Dart => tree_sitter_dart_orchard::LANGUAGE.into(),
184            Self::FSharp => return None,
185        })
186    }
187
188    /// Get the query string for symbol extraction
189    #[must_use]
190    pub const fn query_string(self) -> Option<&'static str> {
191        Some(match self {
192            Self::Python => queries::PYTHON,
193            Self::JavaScript => queries::JAVASCRIPT,
194            Self::TypeScript => queries::TYPESCRIPT,
195            Self::Rust => queries::RUST,
196            Self::Go => queries::GO,
197            Self::Java => queries::JAVA,
198            Self::C => queries::C,
199            Self::Cpp => queries::CPP,
200            Self::CSharp => queries::CSHARP,
201            Self::Ruby => queries::RUBY,
202            Self::Bash => queries::BASH,
203            Self::Php => queries::PHP,
204            Self::Kotlin => queries::KOTLIN,
205            Self::Swift => queries::SWIFT,
206            Self::Scala => queries::SCALA,
207            Self::Haskell => queries::HASKELL,
208            Self::Elixir => queries::ELIXIR,
209            Self::Clojure => return None, // tree-sitter-clojure incompatible with tree-sitter 0.26
210            Self::OCaml => queries::OCAML,
211            Self::Lua => queries::LUA,
212            Self::R => queries::R,
213            Self::Hcl => queries::HCL,
214            Self::Zig => queries::ZIG,
215            Self::Dart => queries::DART,
216            Self::FSharp => return None,
217        })
218    }
219
220    /// Initialize a tree-sitter parser for this language
221    pub fn init_parser(self) -> Result<TSParser, ParserError> {
222        let ts_lang = self.tree_sitter_language().ok_or_else(|| {
223            ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
224        })?;
225
226        let mut parser = TSParser::new();
227        parser
228            .set_language(&ts_lang)
229            .map_err(|e| ParserError::ParseError(e.to_string()))?;
230        Ok(parser)
231    }
232
233    /// Create a tree-sitter query for symbol extraction
234    pub fn create_query(self) -> Result<Query, ParserError> {
235        let ts_lang = self.tree_sitter_language().ok_or_else(|| {
236            ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
237        })?;
238
239        let query_str = self.query_string().ok_or_else(|| {
240            ParserError::UnsupportedLanguage(format!("{} has no query defined", self.name()))
241        })?;
242
243        Query::new(&ts_lang, query_str).map_err(|e| ParserError::QueryError(e.to_string()))
244    }
245
246    /// Get all supported languages
247    #[must_use]
248    pub const fn all() -> &'static [Self] {
249        &[
250            Self::Python,
251            Self::JavaScript,
252            Self::TypeScript,
253            Self::Rust,
254            Self::Go,
255            Self::Java,
256            Self::C,
257            Self::Cpp,
258            Self::CSharp,
259            Self::Ruby,
260            Self::Bash,
261            Self::Php,
262            Self::Kotlin,
263            Self::Swift,
264            Self::Scala,
265            Self::Haskell,
266            Self::Elixir,
267            Self::Clojure,
268            Self::OCaml,
269            Self::FSharp,
270            Self::Lua,
271            Self::R,
272            Self::Hcl,
273            Self::Zig,
274            Self::Dart,
275        ]
276    }
277
278    /// Get all languages with full parser support
279    #[must_use]
280    pub fn all_with_parser_support() -> Vec<Self> {
281        Self::all()
282            .iter()
283            .copied()
284            .filter(|l| l.has_parser_support())
285            .collect()
286    }
287
288    /// Check if this language uses indentation for blocks (like Python)
289    #[must_use]
290    pub const fn uses_indentation_blocks(self) -> bool {
291        matches!(self, Self::Python | Self::Haskell)
292    }
293
294    /// Check if this is a C-family language (uses braces for blocks)
295    #[must_use]
296    pub const fn is_c_family(self) -> bool {
297        matches!(
298            self,
299            Self::C
300                | Self::Cpp
301                | Self::CSharp
302                | Self::Java
303                | Self::JavaScript
304                | Self::TypeScript
305                | Self::Go
306                | Self::Rust
307                | Self::Kotlin
308                | Self::Swift
309                | Self::Scala
310                | Self::Php
311                | Self::Dart
312        )
313    }
314
315    /// Check if this is a functional language
316    #[must_use]
317    pub const fn is_functional(self) -> bool {
318        matches!(self, Self::Haskell | Self::OCaml | Self::Elixir | Self::Clojure | Self::Scala)
319    }
320}
321
322#[allow(deprecated)]
323impl std::fmt::Display for Language {
324    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
325        write!(f, "{}", self.display_name())
326    }
327}
328
329/// Detect programming language from file path (filename or extension).
330/// Returns a language name as a string, supporting many more formats than the `Language` enum.
331/// This is useful for display purposes and handling non-parseable file types.
332#[must_use]
333pub fn detect_file_language(path: &std::path::Path) -> Option<String> {
334    // First, check for well-known filenames without extensions
335    if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
336        let lower = filename.to_lowercase();
337        let lang =
338            match lower.as_str() {
339                // Docker
340                "dockerfile" | "dockerfile.dev" | "dockerfile.prod" | "dockerfile.test" => {
341                    Some("dockerfile")
342                },
343                // Make
344                "makefile" | "gnumakefile" | "bsdmakefile" => Some("make"),
345                // Ruby
346                "gemfile" | "rakefile" | "guardfile" | "vagrantfile" | "berksfile" | "podfile"
347                | "fastfile" | "appfile" | "matchfile" | "deliverfile" | "snapfile"
348                | "brewfile" => Some("ruby"),
349                // Shell
350                ".bashrc" | ".bash_profile" | ".zshrc" | ".zprofile" | ".profile"
351                | ".bash_aliases" => Some("shell"),
352                // Git
353                ".gitignore" | ".gitattributes" | ".gitmodules" => Some("gitignore"),
354                // Editor config
355                ".editorconfig" => Some("editorconfig"),
356                // Procfile (Heroku)
357                "procfile" => Some("procfile"),
358                // Justfile
359                "justfile" => Some("just"),
360                // Caddyfile
361                "caddyfile" => Some("caddyfile"),
362                _ => None,
363            };
364        if lang.is_some() {
365            return lang.map(|s| s.to_owned());
366        }
367        // Check for patterns like Dockerfile.something
368        if lower.starts_with("dockerfile") {
369            return Some("dockerfile".to_owned());
370        }
371        if lower.starts_with("makefile") {
372            return Some("make".to_owned());
373        }
374    }
375
376    // Then check extensions
377    let ext = path.extension()?.to_str()?;
378    let lang = match ext.to_lowercase().as_str() {
379        // Python
380        "py" | "pyi" | "pyx" => "python",
381        // JavaScript/TypeScript
382        "js" | "mjs" | "cjs" => "javascript",
383        "jsx" => "jsx",
384        "ts" | "mts" | "cts" => "typescript",
385        "tsx" => "tsx",
386        // Rust
387        "rs" => "rust",
388        // Go
389        "go" => "go",
390        // Java/JVM
391        "java" => "java",
392        "kt" | "kts" => "kotlin",
393        "scala" => "scala",
394        "groovy" => "groovy",
395        "clj" | "cljs" | "cljc" => "clojure",
396        // C/C++
397        "c" | "h" => "c",
398        "cpp" | "hpp" | "cc" | "cxx" | "hxx" => "cpp",
399        // C#
400        "cs" => "csharp",
401        // Ruby
402        "rb" | "rake" | "gemspec" => "ruby",
403        // PHP
404        "php" => "php",
405        // Swift
406        "swift" => "swift",
407        // Shell
408        "sh" | "bash" => "bash",
409        "zsh" => "zsh",
410        "fish" => "fish",
411        "ps1" | "psm1" => "powershell",
412        // Web
413        "html" | "htm" => "html",
414        "css" => "css",
415        "scss" => "scss",
416        "sass" => "sass",
417        "less" => "less",
418        // Data/Config
419        "json" => "json",
420        "yaml" | "yml" => "yaml",
421        "toml" => "toml",
422        "xml" => "xml",
423        "ini" | "cfg" => "ini",
424        // Documentation
425        "md" | "markdown" => "markdown",
426        "mdx" => "mdx",
427        "rst" => "rst",
428        "txt" => "text",
429        // Zig
430        "zig" => "zig",
431        // Lua
432        "lua" => "lua",
433        // SQL
434        "sql" => "sql",
435        // Elixir/Erlang
436        "ex" | "exs" => "elixir",
437        "erl" | "hrl" => "erlang",
438        // Haskell
439        "hs" | "lhs" => "haskell",
440        // OCaml/F#
441        "ml" | "mli" => "ocaml",
442        "fs" | "fsi" | "fsx" => "fsharp",
443        // Vue/Svelte
444        "vue" => "vue",
445        "svelte" => "svelte",
446        // Docker
447        "dockerfile" => "dockerfile",
448        // Terraform/HCL
449        "tf" | "tfvars" | "hcl" => "hcl",
450        // Makefile-like
451        "makefile" | "mk" => "make",
452        "cmake" => "cmake",
453        // Nix
454        "nix" => "nix",
455        // Julia
456        "jl" => "julia",
457        // R
458        "r" | "rmd" => "r",
459        // Dart
460        "dart" => "dart",
461        // Nim
462        "nim" => "nim",
463        // V
464        "v" => "vlang",
465        // Crystal
466        "cr" => "crystal",
467        _ => return None,
468    };
469
470    Some(lang.to_owned())
471}
472
473#[allow(deprecated)]
474impl std::str::FromStr for Language {
475    type Err = ParserError;
476
477    fn from_str(s: &str) -> Result<Self, Self::Err> {
478        match s.to_lowercase().as_str() {
479            "python" | "py" => Ok(Self::Python),
480            "javascript" | "js" => Ok(Self::JavaScript),
481            "typescript" | "ts" => Ok(Self::TypeScript),
482            "rust" | "rs" => Ok(Self::Rust),
483            "go" | "golang" => Ok(Self::Go),
484            "java" => Ok(Self::Java),
485            "c" => Ok(Self::C),
486            "cpp" | "c++" | "cxx" => Ok(Self::Cpp),
487            "csharp" | "c#" | "cs" => Ok(Self::CSharp),
488            "ruby" | "rb" => Ok(Self::Ruby),
489            "bash" | "shell" | "sh" => Ok(Self::Bash),
490            "php" => Ok(Self::Php),
491            "kotlin" | "kt" => Ok(Self::Kotlin),
492            "swift" => Ok(Self::Swift),
493            "scala" => Ok(Self::Scala),
494            "haskell" | "hs" => Ok(Self::Haskell),
495            "elixir" | "ex" => Ok(Self::Elixir),
496            "clojure" | "clj" => Ok(Self::Clojure),
497            "ocaml" | "ml" => Ok(Self::OCaml),
498            "fsharp" | "f#" | "fs" => Ok(Self::FSharp),
499            "lua" => Ok(Self::Lua),
500            "r" => Ok(Self::R),
501            "hcl" | "terraform" | "tf" => Ok(Self::Hcl),
502            "zig" => Ok(Self::Zig),
503            "dart" => Ok(Self::Dart),
504            _ => Err(ParserError::UnsupportedLanguage(s.to_owned())),
505        }
506    }
507}
508
509#[cfg(test)]
510#[allow(deprecated)]
511mod tests {
512    use super::*;
513
514    #[test]
515    fn test_language_from_extension() {
516        assert_eq!(Language::from_extension("py"), Some(Language::Python));
517        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
518        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
519        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
520        assert_eq!(Language::from_extension("unknown"), None);
521    }
522
523    #[test]
524    fn test_language_name() {
525        assert_eq!(Language::Python.name(), "python");
526        assert_eq!(Language::Rust.name(), "rust");
527        assert_eq!(Language::TypeScript.name(), "typescript");
528    }
529
530    #[test]
531    fn test_language_display_name() {
532        assert_eq!(Language::Python.display_name(), "Python");
533        assert_eq!(Language::Cpp.display_name(), "C++");
534        assert_eq!(Language::CSharp.display_name(), "C#");
535    }
536
537    #[test]
538    fn test_parser_support() {
539        assert!(Language::Python.has_parser_support());
540        assert!(Language::Rust.has_parser_support());
541        assert!(!Language::FSharp.has_parser_support());
542    }
543
544    #[test]
545    fn test_language_from_str() {
546        assert_eq!("python".parse::<Language>().unwrap(), Language::Python);
547        assert_eq!("c++".parse::<Language>().unwrap(), Language::Cpp);
548        assert_eq!("c#".parse::<Language>().unwrap(), Language::CSharp);
549        assert!("invalid".parse::<Language>().is_err());
550    }
551
552    #[test]
553    fn test_all_languages() {
554        let all = Language::all();
555        assert_eq!(all.len(), 25);
556        assert!(all.contains(&Language::Python));
557        assert!(all.contains(&Language::Rust));
558    }
559
560    #[test]
561    fn test_tree_sitter_language() {
562        assert!(Language::Python.tree_sitter_language().is_some());
563        assert!(Language::Rust.tree_sitter_language().is_some());
564        assert!(Language::FSharp.tree_sitter_language().is_none());
565    }
566
567    #[test]
568    fn test_query_string() {
569        assert!(Language::Python.query_string().is_some());
570        assert!(Language::Rust.query_string().is_some());
571        assert!(Language::FSharp.query_string().is_none());
572    }
573
574    #[test]
575    fn test_init_parser() {
576        assert!(Language::Python.init_parser().is_ok());
577        assert!(Language::Rust.init_parser().is_ok());
578        assert!(Language::FSharp.init_parser().is_err());
579    }
580
581    #[test]
582    fn test_create_query() {
583        assert!(Language::Python.create_query().is_ok());
584        assert!(Language::Rust.create_query().is_ok());
585        assert!(Language::FSharp.create_query().is_err());
586    }
587
588    #[test]
589    fn test_language_categories() {
590        assert!(Language::Python.uses_indentation_blocks());
591        assert!(!Language::Rust.uses_indentation_blocks());
592
593        assert!(Language::Rust.is_c_family());
594        assert!(!Language::Python.is_c_family());
595
596        assert!(Language::Haskell.is_functional());
597        assert!(!Language::Python.is_functional());
598    }
599
600    #[test]
601    fn test_hcl_language() {
602        assert_eq!(Language::from_extension("tf"), Some(Language::Hcl));
603        assert_eq!(Language::from_extension("hcl"), Some(Language::Hcl));
604        assert_eq!(Language::from_extension("tfvars"), Some(Language::Hcl));
605        assert_eq!(Language::Hcl.name(), "hcl");
606        assert_eq!(Language::Hcl.display_name(), "HCL");
607        assert!(Language::Hcl.has_parser_support());
608        assert!(Language::Hcl.tree_sitter_language().is_some());
609        assert!(Language::Hcl.query_string().is_some());
610        assert!(Language::Hcl.init_parser().is_ok());
611        assert!(Language::Hcl.create_query().is_ok());
612        assert_eq!("hcl".parse::<Language>().unwrap(), Language::Hcl);
613        assert_eq!("terraform".parse::<Language>().unwrap(), Language::Hcl);
614        assert_eq!("tf".parse::<Language>().unwrap(), Language::Hcl);
615    }
616}