scribe_analysis/language_support/
ast_language.rs

1//! # AST Language Support Definitions
2//!
3//! Defines comprehensive language support tiers and capabilities for 20+ programming languages.
4//! Replaces the basic regex-based approach with proper AST analysis using tree-sitter.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9/// Programming language support (focused on tree-sitter languages)
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11pub enum AstLanguage {
12    // Currently supported with tree-sitter
13    Python,
14    JavaScript,
15    TypeScript,
16    Go,
17    Rust,
18    Html,
19
20    // Future tree-sitter support (when dependencies added)
21    Java,
22    C,
23    Cpp,
24    Ruby,
25    CSharp,
26}
27
28/// Language support tier indicating analysis depth
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
30pub enum LanguageTier {
31    /// Full AST parsing with tree-sitter
32    FullAst,
33    /// Syntax-aware parsing for markup languages
34    SyntaxAware,
35    /// Future support (not yet implemented)
36    Future,
37}
38
39/// Language-specific features and capabilities
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct LanguageFeatures {
42    /// Support tier
43    pub tier: LanguageTier,
44    /// Can extract functions/methods
45    pub has_functions: bool,
46    /// Can extract classes/types
47    pub has_classes: bool,
48    /// Has documentation conventions
49    pub has_documentation: bool,
50    /// Has import/dependency statements
51    pub has_imports: bool,
52    /// Language-specific complexity factors
53    pub complexity_factors: Vec<String>,
54    /// Common file extensions
55    pub extensions: Vec<String>,
56}
57
58impl AstLanguage {
59    /// Get the tree-sitter language for this language (Tier 1 and 2 only)
60    #[cfg(feature = "tree-sitter")]
61    pub fn tree_sitter_language(&self) -> Option<tree_sitter::Language> {
62        match self {
63            // Tier 1: Full AST languages
64            AstLanguage::Python => Some(tree_sitter_python::language()),
65            AstLanguage::JavaScript => Some(tree_sitter_javascript::language()),
66            AstLanguage::TypeScript => Some(tree_sitter_typescript::language_typescript()),
67            AstLanguage::Go => Some(tree_sitter_go::language()),
68            AstLanguage::Rust => Some(tree_sitter_rust::language()),
69            AstLanguage::Html => Some(tree_sitter_html::language()),
70
71            // Future tree-sitter languages (when dependencies are added)
72            AstLanguage::Java => None, // tree_sitter_java::language() when added
73            AstLanguage::CSharp => None, // tree_sitter_c_sharp::language() when added
74            AstLanguage::C => None,    // tree_sitter_c::language() when added
75            AstLanguage::Cpp => None,  // tree_sitter_cpp::language() when added
76            AstLanguage::Ruby => None, // tree_sitter_ruby::language() when added
77        }
78    }
79
80    /// Detect language from file extension
81    pub fn from_extension(ext: &str) -> Option<Self> {
82        match ext.to_lowercase().as_str() {
83            // Currently supported
84            "py" | "pyi" | "pyw" => Some(AstLanguage::Python),
85            "js" | "mjs" | "cjs" => Some(AstLanguage::JavaScript),
86            "ts" | "mts" | "cts" | "tsx" => Some(AstLanguage::TypeScript),
87            "go" => Some(AstLanguage::Go),
88            "rs" => Some(AstLanguage::Rust),
89            "html" | "htm" => Some(AstLanguage::Html),
90
91            // Future support
92            "java" => Some(AstLanguage::Java),
93            "c" => Some(AstLanguage::C),
94            "cpp" | "cc" | "cxx" | "c++" | "hpp" | "h" => Some(AstLanguage::Cpp),
95            "rb" | "ruby" => Some(AstLanguage::Ruby),
96            "cs" => Some(AstLanguage::CSharp),
97
98            _ => None,
99        }
100    }
101
102    /// Get language tier
103    pub fn tier(&self) -> LanguageTier {
104        match self {
105            // Currently supported with tree-sitter
106            AstLanguage::Python
107            | AstLanguage::JavaScript
108            | AstLanguage::TypeScript
109            | AstLanguage::Go
110            | AstLanguage::Rust => LanguageTier::FullAst,
111
112            // Syntax-aware (markup)
113            AstLanguage::Html => LanguageTier::SyntaxAware,
114
115            // Future support
116            AstLanguage::Java
117            | AstLanguage::C
118            | AstLanguage::Cpp
119            | AstLanguage::Ruby
120            | AstLanguage::CSharp => LanguageTier::Future,
121        }
122    }
123
124    /// Get language features and capabilities
125    pub fn features(&self) -> LanguageFeatures {
126        match self {
127            AstLanguage::Python => LanguageFeatures {
128                tier: LanguageTier::FullAst,
129                has_functions: true,
130                has_classes: true,
131                has_documentation: true,
132                has_imports: true,
133                complexity_factors: vec![
134                    "list_comprehensions".to_string(),
135                    "decorators".to_string(),
136                    "async_await".to_string(),
137                    "generators".to_string(),
138                ],
139                extensions: vec!["py".to_string(), "pyi".to_string(), "pyw".to_string()],
140            },
141
142            AstLanguage::JavaScript => LanguageFeatures {
143                tier: LanguageTier::FullAst,
144                has_functions: true,
145                has_classes: true,
146                has_documentation: true,
147                has_imports: true,
148                complexity_factors: vec![
149                    "closures".to_string(),
150                    "promises".to_string(),
151                    "async_await".to_string(),
152                    "prototypal_inheritance".to_string(),
153                ],
154                extensions: vec!["js".to_string(), "mjs".to_string(), "cjs".to_string()],
155            },
156
157            AstLanguage::TypeScript => LanguageFeatures {
158                tier: LanguageTier::FullAst,
159                has_functions: true,
160                has_classes: true,
161                has_documentation: true,
162                has_imports: true,
163                complexity_factors: vec![
164                    "generic_types".to_string(),
165                    "type_guards".to_string(),
166                    "conditional_types".to_string(),
167                    "mapped_types".to_string(),
168                ],
169                extensions: vec!["ts".to_string(), "tsx".to_string(), "mts".to_string()],
170            },
171
172            AstLanguage::Rust => LanguageFeatures {
173                tier: LanguageTier::FullAst,
174                has_functions: true,
175                has_classes: false, // Rust has structs/traits instead
176                has_documentation: true,
177                has_imports: true,
178                complexity_factors: vec![
179                    "lifetimes".to_string(),
180                    "borrowing".to_string(),
181                    "pattern_matching".to_string(),
182                    "macros".to_string(),
183                ],
184                extensions: vec!["rs".to_string()],
185            },
186
187            AstLanguage::Go => LanguageFeatures {
188                tier: LanguageTier::FullAst,
189                has_functions: true,
190                has_classes: false, // Go has structs/interfaces instead
191                has_documentation: true,
192                has_imports: true,
193                complexity_factors: vec![
194                    "goroutines".to_string(),
195                    "channels".to_string(),
196                    "interfaces".to_string(),
197                    "defer_statements".to_string(),
198                ],
199                extensions: vec!["go".to_string()],
200            },
201
202            AstLanguage::Java => LanguageFeatures {
203                tier: LanguageTier::FullAst,
204                has_functions: true,
205                has_classes: true,
206                has_documentation: true,
207                has_imports: true,
208                complexity_factors: vec![
209                    "inheritance".to_string(),
210                    "generics".to_string(),
211                    "reflection".to_string(),
212                    "annotations".to_string(),
213                ],
214                extensions: vec!["java".to_string()],
215            },
216
217            // Add more language features as needed...
218            _ => LanguageFeatures {
219                tier: self.tier(),
220                has_functions: false,
221                has_classes: false,
222                has_documentation: false,
223                has_imports: false,
224                complexity_factors: vec![],
225                extensions: vec![],
226            },
227        }
228    }
229
230    /// Get all supported languages
231    pub fn all_supported() -> Vec<Self> {
232        vec![
233            // Currently supported
234            AstLanguage::Python,
235            AstLanguage::JavaScript,
236            AstLanguage::TypeScript,
237            AstLanguage::Go,
238            AstLanguage::Rust,
239            AstLanguage::Html,
240            // Future support
241            AstLanguage::Java,
242            AstLanguage::C,
243            AstLanguage::Cpp,
244            AstLanguage::Ruby,
245            AstLanguage::CSharp,
246        ]
247    }
248
249    /// Get language name as string
250    pub fn name(&self) -> &'static str {
251        match self {
252            AstLanguage::Python => "Python",
253            AstLanguage::JavaScript => "JavaScript",
254            AstLanguage::TypeScript => "TypeScript",
255            AstLanguage::Go => "Go",
256            AstLanguage::Rust => "Rust",
257            AstLanguage::Html => "HTML",
258            AstLanguage::Java => "Java",
259            AstLanguage::C => "C",
260            AstLanguage::Cpp => "C++",
261            AstLanguage::Ruby => "Ruby",
262            AstLanguage::CSharp => "C#",
263        }
264    }
265}
266
267/// Language statistics for reporting
268#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct LanguageStats {
270    /// Total supported languages
271    pub total_languages: usize,
272    /// Languages by tier
273    pub by_tier: HashMap<LanguageTier, usize>,
274    /// Languages with AST support
275    pub ast_supported: usize,
276    /// Languages with tree-sitter support
277    pub tree_sitter_available: usize,
278}
279
280impl LanguageStats {
281    /// Calculate statistics for current language support
282    pub fn calculate() -> Self {
283        let all_languages = AstLanguage::all_supported();
284        let total_languages = all_languages.len();
285
286        let mut by_tier = HashMap::new();
287        let mut ast_supported = 0;
288        let mut tree_sitter_available = 0;
289
290        for language in &all_languages {
291            let tier = language.tier();
292            *by_tier.entry(tier).or_insert(0) += 1;
293
294            if tier == LanguageTier::FullAst || tier == LanguageTier::SyntaxAware {
295                ast_supported += 1;
296            }
297
298            #[cfg(feature = "tree-sitter")]
299            if language.tree_sitter_language().is_some() {
300                tree_sitter_available += 1;
301            }
302        }
303
304        Self {
305            total_languages,
306            by_tier,
307            ast_supported,
308            tree_sitter_available,
309        }
310    }
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316
317    #[test]
318    fn test_language_detection() {
319        assert_eq!(AstLanguage::from_extension("py"), Some(AstLanguage::Python));
320        assert_eq!(
321            AstLanguage::from_extension("js"),
322            Some(AstLanguage::JavaScript)
323        );
324        assert_eq!(AstLanguage::from_extension("rs"), Some(AstLanguage::Rust));
325        assert_eq!(AstLanguage::from_extension("go"), Some(AstLanguage::Go));
326        assert_eq!(AstLanguage::from_extension("unknown"), None);
327    }
328
329    #[test]
330    fn test_language_tiers() {
331        assert_eq!(AstLanguage::Python.tier(), LanguageTier::FullAst);
332        assert_eq!(AstLanguage::Html.tier(), LanguageTier::SyntaxAware);
333        assert_eq!(AstLanguage::Java.tier(), LanguageTier::Future);
334    }
335
336    #[test]
337    fn test_language_features() {
338        let python_features = AstLanguage::Python.features();
339        assert!(python_features.has_functions);
340        assert!(python_features.has_classes);
341        assert!(python_features.has_documentation);
342        assert!(python_features.has_imports);
343        assert!(!python_features.complexity_factors.is_empty());
344    }
345
346    #[test]
347    fn test_language_count() {
348        let all_languages = AstLanguage::all_supported();
349        // Should have 11 languages (6 current + 5 future)
350        assert_eq!(
351            all_languages.len(),
352            11,
353            "Expected 11 languages, got {}",
354            all_languages.len()
355        );
356    }
357
358    #[test]
359    fn test_language_stats() {
360        let stats = LanguageStats::calculate();
361        assert_eq!(stats.total_languages, 11);
362        assert!(stats.by_tier.contains_key(&LanguageTier::FullAst));
363        assert!(stats.by_tier.contains_key(&LanguageTier::SyntaxAware));
364        assert!(stats.by_tier.contains_key(&LanguageTier::Future));
365    }
366}