infiniloom_engine/parser/
language.rs

1//! Language definitions and support traits
2//!
3//! This module defines the supported programming languages and provides
4//! a uniform interface for language-specific operations.
5
6use super::core::ParserError;
7use super::queries;
8use tree_sitter::{Language as TSLanguage, Parser as TSParser, Query};
9
10/// Supported programming languages
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub enum Language {
13    Python,
14    JavaScript,
15    TypeScript,
16    Rust,
17    Go,
18    Java,
19    C,
20    Cpp,
21    CSharp,
22    Ruby,
23    Bash,
24    Php,
25    Kotlin,
26    Swift,
27    Scala,
28    Haskell,
29    Elixir,
30    Clojure,
31    OCaml,
32    FSharp,
33    Lua,
34    R,
35}
36
37impl Language {
38    /// Detect language from file extension
39    #[must_use]
40    pub fn from_extension(ext: &str) -> Option<Self> {
41        match ext.to_lowercase().as_str() {
42            "py" | "pyw" => Some(Self::Python),
43            "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript),
44            "ts" | "tsx" => Some(Self::TypeScript),
45            "rs" => Some(Self::Rust),
46            "go" => Some(Self::Go),
47            "java" => Some(Self::Java),
48            "c" | "h" => Some(Self::C),
49            "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => Some(Self::Cpp),
50            "cs" => Some(Self::CSharp),
51            "rb" | "rake" | "gemspec" => Some(Self::Ruby),
52            "sh" | "bash" | "zsh" | "fish" => Some(Self::Bash),
53            "php" | "phtml" | "php3" | "php4" | "php5" | "phps" => Some(Self::Php),
54            "kt" | "kts" => Some(Self::Kotlin),
55            "swift" => Some(Self::Swift),
56            "scala" | "sc" => Some(Self::Scala),
57            "hs" | "lhs" => Some(Self::Haskell),
58            "ex" | "exs" | "eex" | "heex" | "leex" => Some(Self::Elixir),
59            "clj" | "cljs" | "cljc" | "edn" => Some(Self::Clojure),
60            "ml" | "mli" => Some(Self::OCaml),
61            "fs" | "fsi" | "fsx" | "fsscript" => Some(Self::FSharp),
62            "lua" => Some(Self::Lua),
63            "r" | "rmd" => Some(Self::R),
64            _ => None,
65        }
66    }
67
68    /// Get language name as string
69    #[must_use]
70    pub const fn name(self) -> &'static str {
71        match self {
72            Self::Python => "python",
73            Self::JavaScript => "javascript",
74            Self::TypeScript => "typescript",
75            Self::Rust => "rust",
76            Self::Go => "go",
77            Self::Java => "java",
78            Self::C => "c",
79            Self::Cpp => "cpp",
80            Self::CSharp => "csharp",
81            Self::Ruby => "ruby",
82            Self::Bash => "bash",
83            Self::Php => "php",
84            Self::Kotlin => "kotlin",
85            Self::Swift => "swift",
86            Self::Scala => "scala",
87            Self::Haskell => "haskell",
88            Self::Elixir => "elixir",
89            Self::Clojure => "clojure",
90            Self::OCaml => "ocaml",
91            Self::FSharp => "fsharp",
92            Self::Lua => "lua",
93            Self::R => "r",
94        }
95    }
96
97    /// Get display name for pretty printing
98    #[must_use]
99    pub const fn display_name(self) -> &'static str {
100        match self {
101            Self::Python => "Python",
102            Self::JavaScript => "JavaScript",
103            Self::TypeScript => "TypeScript",
104            Self::Rust => "Rust",
105            Self::Go => "Go",
106            Self::Java => "Java",
107            Self::C => "C",
108            Self::Cpp => "C++",
109            Self::CSharp => "C#",
110            Self::Ruby => "Ruby",
111            Self::Bash => "Bash",
112            Self::Php => "PHP",
113            Self::Kotlin => "Kotlin",
114            Self::Swift => "Swift",
115            Self::Scala => "Scala",
116            Self::Haskell => "Haskell",
117            Self::Elixir => "Elixir",
118            Self::Clojure => "Clojure",
119            Self::OCaml => "OCaml",
120            Self::FSharp => "F#",
121            Self::Lua => "Lua",
122            Self::R => "R",
123        }
124    }
125
126    /// Check if this language has full tree-sitter support
127    #[must_use]
128    pub const fn has_parser_support(self) -> bool {
129        !matches!(self, Self::FSharp)
130    }
131
132    /// Get the tree-sitter language for this language
133    pub fn tree_sitter_language(self) -> Option<TSLanguage> {
134        Some(match self {
135            Self::Python => tree_sitter_python::LANGUAGE.into(),
136            Self::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
137            Self::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
138            Self::Rust => tree_sitter_rust::LANGUAGE.into(),
139            Self::Go => tree_sitter_go::LANGUAGE.into(),
140            Self::Java => tree_sitter_java::LANGUAGE.into(),
141            Self::C => tree_sitter_c::LANGUAGE.into(),
142            Self::Cpp => tree_sitter_cpp::LANGUAGE.into(),
143            Self::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
144            Self::Ruby => tree_sitter_ruby::LANGUAGE.into(),
145            Self::Bash => tree_sitter_bash::LANGUAGE.into(),
146            Self::Php => tree_sitter_php::LANGUAGE_PHP.into(),
147            Self::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
148            Self::Swift => tree_sitter_swift::LANGUAGE.into(),
149            Self::Scala => tree_sitter_scala::LANGUAGE.into(),
150            Self::Haskell => tree_sitter_haskell::LANGUAGE.into(),
151            Self::Elixir => tree_sitter_elixir::LANGUAGE.into(),
152            Self::Clojure => tree_sitter_clojure::LANGUAGE.into(),
153            Self::OCaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
154            Self::Lua => tree_sitter_lua::LANGUAGE.into(),
155            Self::R => tree_sitter_r::LANGUAGE.into(),
156            Self::FSharp => return None,
157        })
158    }
159
160    /// Get the query string for symbol extraction
161    #[must_use]
162    pub const fn query_string(self) -> Option<&'static str> {
163        Some(match self {
164            Self::Python => queries::PYTHON,
165            Self::JavaScript => queries::JAVASCRIPT,
166            Self::TypeScript => queries::TYPESCRIPT,
167            Self::Rust => queries::RUST,
168            Self::Go => queries::GO,
169            Self::Java => queries::JAVA,
170            Self::C => queries::C,
171            Self::Cpp => queries::CPP,
172            Self::CSharp => queries::CSHARP,
173            Self::Ruby => queries::RUBY,
174            Self::Bash => queries::BASH,
175            Self::Php => queries::PHP,
176            Self::Kotlin => queries::KOTLIN,
177            Self::Swift => queries::SWIFT,
178            Self::Scala => queries::SCALA,
179            Self::Haskell => queries::HASKELL,
180            Self::Elixir => queries::ELIXIR,
181            Self::Clojure => queries::CLOJURE,
182            Self::OCaml => queries::OCAML,
183            Self::Lua => queries::LUA,
184            Self::R => queries::R,
185            Self::FSharp => return None,
186        })
187    }
188
189    /// Initialize a tree-sitter parser for this language
190    pub fn init_parser(self) -> Result<TSParser, ParserError> {
191        let ts_lang = self.tree_sitter_language().ok_or_else(|| {
192            ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
193        })?;
194
195        let mut parser = TSParser::new();
196        parser
197            .set_language(&ts_lang)
198            .map_err(|e| ParserError::ParseError(e.to_string()))?;
199        Ok(parser)
200    }
201
202    /// Create a tree-sitter query for symbol extraction
203    pub fn create_query(self) -> Result<Query, ParserError> {
204        let ts_lang = self.tree_sitter_language().ok_or_else(|| {
205            ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
206        })?;
207
208        let query_str = self.query_string().ok_or_else(|| {
209            ParserError::UnsupportedLanguage(format!("{} has no query defined", self.name()))
210        })?;
211
212        Query::new(&ts_lang, query_str).map_err(|e| ParserError::QueryError(e.to_string()))
213    }
214
215    /// Get all supported languages
216    #[must_use]
217    pub const fn all() -> &'static [Self] {
218        &[
219            Self::Python,
220            Self::JavaScript,
221            Self::TypeScript,
222            Self::Rust,
223            Self::Go,
224            Self::Java,
225            Self::C,
226            Self::Cpp,
227            Self::CSharp,
228            Self::Ruby,
229            Self::Bash,
230            Self::Php,
231            Self::Kotlin,
232            Self::Swift,
233            Self::Scala,
234            Self::Haskell,
235            Self::Elixir,
236            Self::Clojure,
237            Self::OCaml,
238            Self::FSharp,
239            Self::Lua,
240            Self::R,
241        ]
242    }
243
244    /// Get all languages with full parser support
245    #[must_use]
246    pub fn all_with_parser_support() -> Vec<Self> {
247        Self::all()
248            .iter()
249            .copied()
250            .filter(|l| l.has_parser_support())
251            .collect()
252    }
253
254    /// Check if this language uses indentation for blocks (like Python)
255    #[must_use]
256    pub const fn uses_indentation_blocks(self) -> bool {
257        matches!(self, Self::Python | Self::Haskell)
258    }
259
260    /// Check if this is a C-family language (uses braces for blocks)
261    #[must_use]
262    pub const fn is_c_family(self) -> bool {
263        matches!(
264            self,
265            Self::C
266                | Self::Cpp
267                | Self::CSharp
268                | Self::Java
269                | Self::JavaScript
270                | Self::TypeScript
271                | Self::Go
272                | Self::Rust
273                | Self::Kotlin
274                | Self::Swift
275                | Self::Scala
276                | Self::Php
277        )
278    }
279
280    /// Check if this is a functional language
281    #[must_use]
282    pub const fn is_functional(self) -> bool {
283        matches!(self, Self::Haskell | Self::OCaml | Self::Elixir | Self::Clojure | Self::Scala)
284    }
285}
286
287impl std::fmt::Display for Language {
288    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
289        write!(f, "{}", self.display_name())
290    }
291}
292
293/// Detect programming language from file path (filename or extension).
294/// Returns a language name as a string, supporting many more formats than the `Language` enum.
295/// This is useful for display purposes and handling non-parseable file types.
296#[must_use]
297pub fn detect_file_language(path: &std::path::Path) -> Option<String> {
298    // First, check for well-known filenames without extensions
299    if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
300        let lower = filename.to_lowercase();
301        let lang =
302            match lower.as_str() {
303                // Docker
304                "dockerfile" | "dockerfile.dev" | "dockerfile.prod" | "dockerfile.test" => {
305                    Some("dockerfile")
306                },
307                // Make
308                "makefile" | "gnumakefile" | "bsdmakefile" => Some("make"),
309                // Ruby
310                "gemfile" | "rakefile" | "guardfile" | "vagrantfile" | "berksfile" | "podfile"
311                | "fastfile" | "appfile" | "matchfile" | "deliverfile" | "snapfile"
312                | "brewfile" => Some("ruby"),
313                // Shell
314                ".bashrc" | ".bash_profile" | ".zshrc" | ".zprofile" | ".profile"
315                | ".bash_aliases" => Some("shell"),
316                // Git
317                ".gitignore" | ".gitattributes" | ".gitmodules" => Some("gitignore"),
318                // Editor config
319                ".editorconfig" => Some("editorconfig"),
320                // Procfile (Heroku)
321                "procfile" => Some("procfile"),
322                // Justfile
323                "justfile" => Some("just"),
324                // Caddyfile
325                "caddyfile" => Some("caddyfile"),
326                _ => None,
327            };
328        if lang.is_some() {
329            return lang.map(|s| s.to_owned());
330        }
331        // Check for patterns like Dockerfile.something
332        if lower.starts_with("dockerfile") {
333            return Some("dockerfile".to_owned());
334        }
335        if lower.starts_with("makefile") {
336            return Some("make".to_owned());
337        }
338    }
339
340    // Then check extensions
341    let ext = path.extension()?.to_str()?;
342    let lang = match ext.to_lowercase().as_str() {
343        // Python
344        "py" | "pyi" | "pyx" => "python",
345        // JavaScript/TypeScript
346        "js" | "mjs" | "cjs" => "javascript",
347        "jsx" => "jsx",
348        "ts" | "mts" | "cts" => "typescript",
349        "tsx" => "tsx",
350        // Rust
351        "rs" => "rust",
352        // Go
353        "go" => "go",
354        // Java/JVM
355        "java" => "java",
356        "kt" | "kts" => "kotlin",
357        "scala" => "scala",
358        "groovy" => "groovy",
359        "clj" | "cljs" | "cljc" => "clojure",
360        // C/C++
361        "c" | "h" => "c",
362        "cpp" | "hpp" | "cc" | "cxx" | "hxx" => "cpp",
363        // C#
364        "cs" => "csharp",
365        // Ruby
366        "rb" | "rake" | "gemspec" => "ruby",
367        // PHP
368        "php" => "php",
369        // Swift
370        "swift" => "swift",
371        // Shell
372        "sh" | "bash" => "bash",
373        "zsh" => "zsh",
374        "fish" => "fish",
375        "ps1" | "psm1" => "powershell",
376        // Web
377        "html" | "htm" => "html",
378        "css" => "css",
379        "scss" => "scss",
380        "sass" => "sass",
381        "less" => "less",
382        // Data/Config
383        "json" => "json",
384        "yaml" | "yml" => "yaml",
385        "toml" => "toml",
386        "xml" => "xml",
387        "ini" | "cfg" => "ini",
388        // Documentation
389        "md" | "markdown" => "markdown",
390        "mdx" => "mdx",
391        "rst" => "rst",
392        "txt" => "text",
393        // Zig
394        "zig" => "zig",
395        // Lua
396        "lua" => "lua",
397        // SQL
398        "sql" => "sql",
399        // Elixir/Erlang
400        "ex" | "exs" => "elixir",
401        "erl" | "hrl" => "erlang",
402        // Haskell
403        "hs" | "lhs" => "haskell",
404        // OCaml/F#
405        "ml" | "mli" => "ocaml",
406        "fs" | "fsi" | "fsx" => "fsharp",
407        // Vue/Svelte
408        "vue" => "vue",
409        "svelte" => "svelte",
410        // Docker
411        "dockerfile" => "dockerfile",
412        // Terraform
413        "tf" | "tfvars" => "terraform",
414        // Makefile-like
415        "makefile" | "mk" => "make",
416        "cmake" => "cmake",
417        // Nix
418        "nix" => "nix",
419        // Julia
420        "jl" => "julia",
421        // R
422        "r" | "rmd" => "r",
423        // Dart
424        "dart" => "dart",
425        // Nim
426        "nim" => "nim",
427        // V
428        "v" => "vlang",
429        // Crystal
430        "cr" => "crystal",
431        _ => return None,
432    };
433
434    Some(lang.to_owned())
435}
436
437impl std::str::FromStr for Language {
438    type Err = ParserError;
439
440    fn from_str(s: &str) -> Result<Self, Self::Err> {
441        match s.to_lowercase().as_str() {
442            "python" | "py" => Ok(Self::Python),
443            "javascript" | "js" => Ok(Self::JavaScript),
444            "typescript" | "ts" => Ok(Self::TypeScript),
445            "rust" | "rs" => Ok(Self::Rust),
446            "go" | "golang" => Ok(Self::Go),
447            "java" => Ok(Self::Java),
448            "c" => Ok(Self::C),
449            "cpp" | "c++" | "cxx" => Ok(Self::Cpp),
450            "csharp" | "c#" | "cs" => Ok(Self::CSharp),
451            "ruby" | "rb" => Ok(Self::Ruby),
452            "bash" | "shell" | "sh" => Ok(Self::Bash),
453            "php" => Ok(Self::Php),
454            "kotlin" | "kt" => Ok(Self::Kotlin),
455            "swift" => Ok(Self::Swift),
456            "scala" => Ok(Self::Scala),
457            "haskell" | "hs" => Ok(Self::Haskell),
458            "elixir" | "ex" => Ok(Self::Elixir),
459            "clojure" | "clj" => Ok(Self::Clojure),
460            "ocaml" | "ml" => Ok(Self::OCaml),
461            "fsharp" | "f#" | "fs" => Ok(Self::FSharp),
462            "lua" => Ok(Self::Lua),
463            "r" => Ok(Self::R),
464            _ => Err(ParserError::UnsupportedLanguage(s.to_owned())),
465        }
466    }
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472
473    #[test]
474    fn test_language_from_extension() {
475        assert_eq!(Language::from_extension("py"), Some(Language::Python));
476        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
477        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
478        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
479        assert_eq!(Language::from_extension("unknown"), None);
480    }
481
482    #[test]
483    fn test_language_name() {
484        assert_eq!(Language::Python.name(), "python");
485        assert_eq!(Language::Rust.name(), "rust");
486        assert_eq!(Language::TypeScript.name(), "typescript");
487    }
488
489    #[test]
490    fn test_language_display_name() {
491        assert_eq!(Language::Python.display_name(), "Python");
492        assert_eq!(Language::Cpp.display_name(), "C++");
493        assert_eq!(Language::CSharp.display_name(), "C#");
494    }
495
496    #[test]
497    fn test_parser_support() {
498        assert!(Language::Python.has_parser_support());
499        assert!(Language::Rust.has_parser_support());
500        assert!(!Language::FSharp.has_parser_support());
501    }
502
503    #[test]
504    fn test_language_from_str() {
505        assert_eq!("python".parse::<Language>().unwrap(), Language::Python);
506        assert_eq!("c++".parse::<Language>().unwrap(), Language::Cpp);
507        assert_eq!("c#".parse::<Language>().unwrap(), Language::CSharp);
508        assert!("invalid".parse::<Language>().is_err());
509    }
510
511    #[test]
512    fn test_all_languages() {
513        let all = Language::all();
514        assert_eq!(all.len(), 22);
515        assert!(all.contains(&Language::Python));
516        assert!(all.contains(&Language::Rust));
517    }
518
519    #[test]
520    fn test_tree_sitter_language() {
521        assert!(Language::Python.tree_sitter_language().is_some());
522        assert!(Language::Rust.tree_sitter_language().is_some());
523        assert!(Language::FSharp.tree_sitter_language().is_none());
524    }
525
526    #[test]
527    fn test_query_string() {
528        assert!(Language::Python.query_string().is_some());
529        assert!(Language::Rust.query_string().is_some());
530        assert!(Language::FSharp.query_string().is_none());
531    }
532
533    #[test]
534    fn test_init_parser() {
535        assert!(Language::Python.init_parser().is_ok());
536        assert!(Language::Rust.init_parser().is_ok());
537        assert!(Language::FSharp.init_parser().is_err());
538    }
539
540    #[test]
541    fn test_create_query() {
542        assert!(Language::Python.create_query().is_ok());
543        assert!(Language::Rust.create_query().is_ok());
544        assert!(Language::FSharp.create_query().is_err());
545    }
546
547    #[test]
548    fn test_language_categories() {
549        assert!(Language::Python.uses_indentation_blocks());
550        assert!(!Language::Rust.uses_indentation_blocks());
551
552        assert!(Language::Rust.is_c_family());
553        assert!(!Language::Python.is_c_family());
554
555        assert!(Language::Haskell.is_functional());
556        assert!(!Language::Python.is_functional());
557    }
558}