infiniloom_engine/parser/
language.rs

1//! Language definitions and support traits
2//!
3//! This module defines the supported programming languages and provides
4//! a uniform interface for language-specific operations.
5
6use super::core::ParserError;
7use super::queries;
8use tree_sitter::{Language as TSLanguage, Parser as TSParser, Query};
9
10/// Supported programming languages
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub enum Language {
13    Python,
14    JavaScript,
15    TypeScript,
16    Rust,
17    Go,
18    Java,
19    C,
20    Cpp,
21    CSharp,
22    Ruby,
23    Bash,
24    Php,
25    Kotlin,
26    Swift,
27    Scala,
28    Haskell,
29    Elixir,
30    Clojure,
31    OCaml,
32    FSharp,
33    Lua,
34    R,
35}
36
37impl Language {
38    /// Detect language from file extension
39    #[must_use]
40    pub fn from_extension(ext: &str) -> Option<Self> {
41        match ext.to_lowercase().as_str() {
42            "py" | "pyw" => Some(Self::Python),
43            "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript),
44            "ts" | "tsx" => Some(Self::TypeScript),
45            "rs" => Some(Self::Rust),
46            "go" => Some(Self::Go),
47            "java" => Some(Self::Java),
48            "c" | "h" => Some(Self::C),
49            "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => Some(Self::Cpp),
50            "cs" => Some(Self::CSharp),
51            "rb" | "rake" | "gemspec" => Some(Self::Ruby),
52            "sh" | "bash" | "zsh" | "fish" => Some(Self::Bash),
53            "php" | "phtml" | "php3" | "php4" | "php5" | "phps" => Some(Self::Php),
54            "kt" | "kts" => Some(Self::Kotlin),
55            "swift" => Some(Self::Swift),
56            "scala" | "sc" => Some(Self::Scala),
57            "hs" | "lhs" => Some(Self::Haskell),
58            "ex" | "exs" | "eex" | "heex" | "leex" => Some(Self::Elixir),
59            "clj" | "cljs" | "cljc" | "edn" => Some(Self::Clojure),
60            "ml" | "mli" => Some(Self::OCaml),
61            "fs" | "fsi" | "fsx" | "fsscript" => Some(Self::FSharp),
62            "lua" => Some(Self::Lua),
63            "r" | "rmd" => Some(Self::R),
64            _ => None,
65        }
66    }
67
68    /// Get language name as string
69    #[must_use]
70    pub const fn name(self) -> &'static str {
71        match self {
72            Self::Python => "python",
73            Self::JavaScript => "javascript",
74            Self::TypeScript => "typescript",
75            Self::Rust => "rust",
76            Self::Go => "go",
77            Self::Java => "java",
78            Self::C => "c",
79            Self::Cpp => "cpp",
80            Self::CSharp => "csharp",
81            Self::Ruby => "ruby",
82            Self::Bash => "bash",
83            Self::Php => "php",
84            Self::Kotlin => "kotlin",
85            Self::Swift => "swift",
86            Self::Scala => "scala",
87            Self::Haskell => "haskell",
88            Self::Elixir => "elixir",
89            Self::Clojure => "clojure",
90            Self::OCaml => "ocaml",
91            Self::FSharp => "fsharp",
92            Self::Lua => "lua",
93            Self::R => "r",
94        }
95    }
96
97    /// Get display name for pretty printing
98    #[must_use]
99    pub const fn display_name(self) -> &'static str {
100        match self {
101            Self::Python => "Python",
102            Self::JavaScript => "JavaScript",
103            Self::TypeScript => "TypeScript",
104            Self::Rust => "Rust",
105            Self::Go => "Go",
106            Self::Java => "Java",
107            Self::C => "C",
108            Self::Cpp => "C++",
109            Self::CSharp => "C#",
110            Self::Ruby => "Ruby",
111            Self::Bash => "Bash",
112            Self::Php => "PHP",
113            Self::Kotlin => "Kotlin",
114            Self::Swift => "Swift",
115            Self::Scala => "Scala",
116            Self::Haskell => "Haskell",
117            Self::Elixir => "Elixir",
118            Self::Clojure => "Clojure",
119            Self::OCaml => "OCaml",
120            Self::FSharp => "F#",
121            Self::Lua => "Lua",
122            Self::R => "R",
123        }
124    }
125
126    /// Check if this language has full tree-sitter support
127    #[must_use]
128    pub const fn has_parser_support(self) -> bool {
129        !matches!(self, Self::FSharp)
130    }
131
132    /// Get the tree-sitter language for this language
133    pub fn tree_sitter_language(self) -> Option<TSLanguage> {
134        Some(match self {
135            Self::Python => tree_sitter_python::LANGUAGE.into(),
136            Self::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
137            Self::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
138            Self::Rust => tree_sitter_rust::LANGUAGE.into(),
139            Self::Go => tree_sitter_go::LANGUAGE.into(),
140            Self::Java => tree_sitter_java::LANGUAGE.into(),
141            Self::C => tree_sitter_c::LANGUAGE.into(),
142            Self::Cpp => tree_sitter_cpp::LANGUAGE.into(),
143            Self::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
144            Self::Ruby => tree_sitter_ruby::LANGUAGE.into(),
145            Self::Bash => tree_sitter_bash::LANGUAGE.into(),
146            Self::Php => tree_sitter_php::LANGUAGE_PHP.into(),
147            Self::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
148            Self::Swift => tree_sitter_swift::LANGUAGE.into(),
149            Self::Scala => tree_sitter_scala::LANGUAGE.into(),
150            Self::Haskell => tree_sitter_haskell::LANGUAGE.into(),
151            Self::Elixir => tree_sitter_elixir::LANGUAGE.into(),
152            Self::Clojure => tree_sitter_clojure::LANGUAGE.into(),
153            Self::OCaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
154            Self::Lua => tree_sitter_lua::LANGUAGE.into(),
155            Self::R => tree_sitter_r::LANGUAGE.into(),
156            Self::FSharp => return None,
157        })
158    }
159
160    /// Get the query string for symbol extraction
161    #[must_use]
162    pub const fn query_string(self) -> Option<&'static str> {
163        Some(match self {
164            Self::Python => queries::PYTHON,
165            Self::JavaScript => queries::JAVASCRIPT,
166            Self::TypeScript => queries::TYPESCRIPT,
167            Self::Rust => queries::RUST,
168            Self::Go => queries::GO,
169            Self::Java => queries::JAVA,
170            Self::C => queries::C,
171            Self::Cpp => queries::CPP,
172            Self::CSharp => queries::CSHARP,
173            Self::Ruby => queries::RUBY,
174            Self::Bash => queries::BASH,
175            Self::Php => queries::PHP,
176            Self::Kotlin => queries::KOTLIN,
177            Self::Swift => queries::SWIFT,
178            Self::Scala => queries::SCALA,
179            Self::Haskell => queries::HASKELL,
180            Self::Elixir => queries::ELIXIR,
181            Self::Clojure => queries::CLOJURE,
182            Self::OCaml => queries::OCAML,
183            Self::Lua => queries::LUA,
184            Self::R => queries::R,
185            Self::FSharp => return None,
186        })
187    }
188
189    /// Initialize a tree-sitter parser for this language
190    pub fn init_parser(self) -> Result<TSParser, ParserError> {
191        let ts_lang = self.tree_sitter_language().ok_or_else(|| {
192            ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
193        })?;
194
195        let mut parser = TSParser::new();
196        parser
197            .set_language(&ts_lang)
198            .map_err(|e| ParserError::ParseError(e.to_string()))?;
199        Ok(parser)
200    }
201
202    /// Create a tree-sitter query for symbol extraction
203    pub fn create_query(self) -> Result<Query, ParserError> {
204        let ts_lang = self.tree_sitter_language().ok_or_else(|| {
205            ParserError::UnsupportedLanguage(format!("{} has no parser support", self.name()))
206        })?;
207
208        let query_str = self.query_string().ok_or_else(|| {
209            ParserError::UnsupportedLanguage(format!("{} has no query defined", self.name()))
210        })?;
211
212        Query::new(&ts_lang, query_str).map_err(|e| ParserError::QueryError(e.to_string()))
213    }
214
215    /// Get all supported languages
216    #[must_use]
217    pub const fn all() -> &'static [Self] {
218        &[
219            Self::Python,
220            Self::JavaScript,
221            Self::TypeScript,
222            Self::Rust,
223            Self::Go,
224            Self::Java,
225            Self::C,
226            Self::Cpp,
227            Self::CSharp,
228            Self::Ruby,
229            Self::Bash,
230            Self::Php,
231            Self::Kotlin,
232            Self::Swift,
233            Self::Scala,
234            Self::Haskell,
235            Self::Elixir,
236            Self::Clojure,
237            Self::OCaml,
238            Self::FSharp,
239            Self::Lua,
240            Self::R,
241        ]
242    }
243
244    /// Get all languages with full parser support
245    #[must_use]
246    pub fn all_with_parser_support() -> Vec<Self> {
247        Self::all()
248            .iter()
249            .copied()
250            .filter(|l| l.has_parser_support())
251            .collect()
252    }
253
254    /// Check if this language uses indentation for blocks (like Python)
255    #[must_use]
256    pub const fn uses_indentation_blocks(self) -> bool {
257        matches!(self, Self::Python | Self::Haskell)
258    }
259
260    /// Check if this is a C-family language (uses braces for blocks)
261    #[must_use]
262    pub const fn is_c_family(self) -> bool {
263        matches!(
264            self,
265            Self::C
266                | Self::Cpp
267                | Self::CSharp
268                | Self::Java
269                | Self::JavaScript
270                | Self::TypeScript
271                | Self::Go
272                | Self::Rust
273                | Self::Kotlin
274                | Self::Swift
275                | Self::Scala
276                | Self::Php
277        )
278    }
279
280    /// Check if this is a functional language
281    #[must_use]
282    pub const fn is_functional(self) -> bool {
283        matches!(self, Self::Haskell | Self::OCaml | Self::Elixir | Self::Clojure | Self::Scala)
284    }
285}
286
287impl std::fmt::Display for Language {
288    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
289        write!(f, "{}", self.display_name())
290    }
291}
292
293impl std::str::FromStr for Language {
294    type Err = ParserError;
295
296    fn from_str(s: &str) -> Result<Self, Self::Err> {
297        match s.to_lowercase().as_str() {
298            "python" | "py" => Ok(Self::Python),
299            "javascript" | "js" => Ok(Self::JavaScript),
300            "typescript" | "ts" => Ok(Self::TypeScript),
301            "rust" | "rs" => Ok(Self::Rust),
302            "go" | "golang" => Ok(Self::Go),
303            "java" => Ok(Self::Java),
304            "c" => Ok(Self::C),
305            "cpp" | "c++" | "cxx" => Ok(Self::Cpp),
306            "csharp" | "c#" | "cs" => Ok(Self::CSharp),
307            "ruby" | "rb" => Ok(Self::Ruby),
308            "bash" | "shell" | "sh" => Ok(Self::Bash),
309            "php" => Ok(Self::Php),
310            "kotlin" | "kt" => Ok(Self::Kotlin),
311            "swift" => Ok(Self::Swift),
312            "scala" => Ok(Self::Scala),
313            "haskell" | "hs" => Ok(Self::Haskell),
314            "elixir" | "ex" => Ok(Self::Elixir),
315            "clojure" | "clj" => Ok(Self::Clojure),
316            "ocaml" | "ml" => Ok(Self::OCaml),
317            "fsharp" | "f#" | "fs" => Ok(Self::FSharp),
318            "lua" => Ok(Self::Lua),
319            "r" => Ok(Self::R),
320            _ => Err(ParserError::UnsupportedLanguage(s.to_owned())),
321        }
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328
329    #[test]
330    fn test_language_from_extension() {
331        assert_eq!(Language::from_extension("py"), Some(Language::Python));
332        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
333        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
334        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
335        assert_eq!(Language::from_extension("unknown"), None);
336    }
337
338    #[test]
339    fn test_language_name() {
340        assert_eq!(Language::Python.name(), "python");
341        assert_eq!(Language::Rust.name(), "rust");
342        assert_eq!(Language::TypeScript.name(), "typescript");
343    }
344
345    #[test]
346    fn test_language_display_name() {
347        assert_eq!(Language::Python.display_name(), "Python");
348        assert_eq!(Language::Cpp.display_name(), "C++");
349        assert_eq!(Language::CSharp.display_name(), "C#");
350    }
351
352    #[test]
353    fn test_parser_support() {
354        assert!(Language::Python.has_parser_support());
355        assert!(Language::Rust.has_parser_support());
356        assert!(!Language::FSharp.has_parser_support());
357    }
358
359    #[test]
360    fn test_language_from_str() {
361        assert_eq!("python".parse::<Language>().unwrap(), Language::Python);
362        assert_eq!("c++".parse::<Language>().unwrap(), Language::Cpp);
363        assert_eq!("c#".parse::<Language>().unwrap(), Language::CSharp);
364        assert!("invalid".parse::<Language>().is_err());
365    }
366
367    #[test]
368    fn test_all_languages() {
369        let all = Language::all();
370        assert_eq!(all.len(), 22);
371        assert!(all.contains(&Language::Python));
372        assert!(all.contains(&Language::Rust));
373    }
374
375    #[test]
376    fn test_tree_sitter_language() {
377        assert!(Language::Python.tree_sitter_language().is_some());
378        assert!(Language::Rust.tree_sitter_language().is_some());
379        assert!(Language::FSharp.tree_sitter_language().is_none());
380    }
381
382    #[test]
383    fn test_query_string() {
384        assert!(Language::Python.query_string().is_some());
385        assert!(Language::Rust.query_string().is_some());
386        assert!(Language::FSharp.query_string().is_none());
387    }
388
389    #[test]
390    fn test_init_parser() {
391        assert!(Language::Python.init_parser().is_ok());
392        assert!(Language::Rust.init_parser().is_ok());
393        assert!(Language::FSharp.init_parser().is_err());
394    }
395
396    #[test]
397    fn test_create_query() {
398        assert!(Language::Python.create_query().is_ok());
399        assert!(Language::Rust.create_query().is_ok());
400        assert!(Language::FSharp.create_query().is_err());
401    }
402
403    #[test]
404    fn test_language_categories() {
405        assert!(Language::Python.uses_indentation_blocks());
406        assert!(!Language::Rust.uses_indentation_blocks());
407
408        assert!(Language::Rust.is_c_family());
409        assert!(!Language::Python.is_c_family());
410
411        assert!(Language::Haskell.is_functional());
412        assert!(!Language::Python.is_functional());
413    }
414}