scribe_analysis/language_support/
ast_language.rs

1//! # AST Language Support Definitions
2//!
3//! Defines comprehensive language support tiers and capabilities for 20+ programming languages.
4//! Replaces the basic regex-based approach with proper AST analysis using tree-sitter.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9/// Programming language support (focused on tree-sitter languages)
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11pub enum AstLanguage {
12    // Currently supported with tree-sitter
13    Python,
14    JavaScript,
15    TypeScript,
16    Go,
17    Rust,
18    Html,
19    
20    // Future tree-sitter support (when dependencies added)
21    Java,
22    C,
23    Cpp,
24    Ruby,
25    CSharp,
26}
27
28/// Language support tier indicating analysis depth
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
30pub enum LanguageTier {
31    /// Full AST parsing with tree-sitter
32    FullAst,
33    /// Syntax-aware parsing for markup languages
34    SyntaxAware,
35    /// Future support (not yet implemented)
36    Future,
37}
38
39/// Language-specific features and capabilities
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct LanguageFeatures {
42    /// Support tier
43    pub tier: LanguageTier,
44    /// Can extract functions/methods
45    pub has_functions: bool,
46    /// Can extract classes/types
47    pub has_classes: bool,
48    /// Has documentation conventions
49    pub has_documentation: bool,
50    /// Has import/dependency statements
51    pub has_imports: bool,
52    /// Language-specific complexity factors
53    pub complexity_factors: Vec<String>,
54    /// Common file extensions
55    pub extensions: Vec<String>,
56}
57
58impl AstLanguage {
59    /// Get the tree-sitter language for this language (Tier 1 and 2 only)
60    #[cfg(feature = "tree-sitter")]
61    pub fn tree_sitter_language(&self) -> Option<tree_sitter::Language> {
62        match self {
63            // Tier 1: Full AST languages
64            AstLanguage::Python => Some(tree_sitter_python::language()),
65            AstLanguage::JavaScript => Some(tree_sitter_javascript::language()),
66            AstLanguage::TypeScript => Some(tree_sitter_typescript::language_typescript()),
67            AstLanguage::Go => Some(tree_sitter_go::language()),
68            AstLanguage::Rust => Some(tree_sitter_rust::language()),
69            AstLanguage::Html => Some(tree_sitter_html::language()),
70            
71            // Future tree-sitter languages (when dependencies are added)
72            AstLanguage::Java => None, // tree_sitter_java::language() when added
73            AstLanguage::CSharp => None, // tree_sitter_c_sharp::language() when added
74            AstLanguage::C => None, // tree_sitter_c::language() when added
75            AstLanguage::Cpp => None, // tree_sitter_cpp::language() when added
76            AstLanguage::PHP => None, // tree_sitter_php::language() when added
77            AstLanguage::Ruby => None, // tree_sitter_ruby::language() when added
78            AstLanguage::Swift => None, // tree_sitter_swift::language() when added
79            AstLanguage::Kotlin => None, // tree_sitter_kotlin::language() when added
80            
81            // Tier 2: Syntax-aware languages
82            AstLanguage::Css => None, // tree_sitter_css::language() when added
83            AstLanguage::Json => None, // tree_sitter_json::language() when added
84            AstLanguage::Yaml => None, // tree_sitter_yaml::language() when added
85            AstLanguage::Xml => None, // tree_sitter_xml::language() when added
86            AstLanguage::Markdown => None, // tree_sitter_markdown::language() when added
87            AstLanguage::Sql => None, // tree_sitter_sql::language() when added
88            AstLanguage::Bash => None, // tree_sitter_bash::language() when added
89            AstLanguage::PowerShell => None, // tree_sitter_powershell::language() when added
90            AstLanguage::Dockerfile => None, // tree_sitter_dockerfile::language() when added
91            
92            // Tier 3: Basic structure languages (no tree-sitter)
93            _ => None,
94        }
95    }
96    
97    /// Detect language from file extension
98    pub fn from_extension(ext: &str) -> Option<Self> {
99        match ext.to_lowercase().as_str() {
100            // Currently supported
101            "py" | "pyi" | "pyw" => Some(AstLanguage::Python),
102            "js" | "mjs" | "cjs" => Some(AstLanguage::JavaScript),
103            "ts" | "mts" | "cts" | "tsx" => Some(AstLanguage::TypeScript),
104            "go" => Some(AstLanguage::Go),
105            "rs" => Some(AstLanguage::Rust),
106            "html" | "htm" => Some(AstLanguage::Html),
107            
108            // Future support
109            "java" => Some(AstLanguage::Java),
110            "c" => Some(AstLanguage::C),
111            "cpp" | "cc" | "cxx" | "c++" | "hpp" | "h" => Some(AstLanguage::Cpp),
112            "rb" | "ruby" => Some(AstLanguage::Ruby),
113            "cs" => Some(AstLanguage::CSharp),
114            
115            _ => None,
116        }
117    }
118    
119    /// Get language tier
120    pub fn tier(&self) -> LanguageTier {
121        match self {
122            // Currently supported with tree-sitter
123            AstLanguage::Python | AstLanguage::JavaScript | AstLanguage::TypeScript
124            | AstLanguage::Go | AstLanguage::Rust => LanguageTier::FullAst,
125            
126            // Syntax-aware (markup)
127            AstLanguage::Html => LanguageTier::SyntaxAware,
128            
129            // Future support
130            AstLanguage::Java | AstLanguage::C | AstLanguage::Cpp 
131            | AstLanguage::Ruby | AstLanguage::CSharp => LanguageTier::Future,
132        }
133    }
134    
135    /// Get language features and capabilities
136    pub fn features(&self) -> LanguageFeatures {
137        match self {
138            AstLanguage::Python => LanguageFeatures {
139                tier: LanguageTier::FullAst,
140                has_functions: true,
141                has_classes: true,
142                has_documentation: true,
143                has_imports: true,
144                complexity_factors: vec![
145                    "list_comprehensions".to_string(),
146                    "decorators".to_string(),
147                    "async_await".to_string(),
148                    "generators".to_string(),
149                ],
150                extensions: vec!["py".to_string(), "pyi".to_string(), "pyw".to_string()],
151            },
152            
153            AstLanguage::JavaScript => LanguageFeatures {
154                tier: LanguageTier::FullAst,
155                has_functions: true,
156                has_classes: true,
157                has_documentation: true,
158                has_imports: true,
159                complexity_factors: vec![
160                    "closures".to_string(),
161                    "promises".to_string(),
162                    "async_await".to_string(),
163                    "prototypal_inheritance".to_string(),
164                ],
165                extensions: vec!["js".to_string(), "mjs".to_string(), "cjs".to_string()],
166            },
167            
168            AstLanguage::TypeScript => LanguageFeatures {
169                tier: LanguageTier::FullAst,
170                has_functions: true,
171                has_classes: true,
172                has_documentation: true,
173                has_imports: true,
174                complexity_factors: vec![
175                    "generic_types".to_string(),
176                    "type_guards".to_string(),
177                    "conditional_types".to_string(),
178                    "mapped_types".to_string(),
179                ],
180                extensions: vec!["ts".to_string(), "tsx".to_string(), "mts".to_string()],
181            },
182            
183            AstLanguage::Rust => LanguageFeatures {
184                tier: LanguageTier::FullAst,
185                has_functions: true,
186                has_classes: false, // Rust has structs/traits instead
187                has_documentation: true,
188                has_imports: true,
189                complexity_factors: vec![
190                    "lifetimes".to_string(),
191                    "borrowing".to_string(),
192                    "pattern_matching".to_string(),
193                    "macros".to_string(),
194                ],
195                extensions: vec!["rs".to_string()],
196            },
197            
198            AstLanguage::Go => LanguageFeatures {
199                tier: LanguageTier::FullAst,
200                has_functions: true,
201                has_classes: false, // Go has structs/interfaces instead
202                has_documentation: true,
203                has_imports: true,
204                complexity_factors: vec![
205                    "goroutines".to_string(),
206                    "channels".to_string(),
207                    "interfaces".to_string(),
208                    "defer_statements".to_string(),
209                ],
210                extensions: vec!["go".to_string()],
211            },
212            
213            AstLanguage::Java => LanguageFeatures {
214                tier: LanguageTier::FullAst,
215                has_functions: true,
216                has_classes: true,
217                has_documentation: true,
218                has_imports: true,
219                complexity_factors: vec![
220                    "inheritance".to_string(),
221                    "generics".to_string(),
222                    "reflection".to_string(),
223                    "annotations".to_string(),
224                ],
225                extensions: vec!["java".to_string()],
226            },
227            
228            // Add more language features as needed...
229            _ => LanguageFeatures {
230                tier: self.tier(),
231                has_functions: false,
232                has_classes: false,
233                has_documentation: false,
234                has_imports: false,
235                complexity_factors: vec![],
236                extensions: vec![],
237            },
238        }
239    }
240    
241    /// Get all supported languages
242    pub fn all_supported() -> Vec<Self> {
243        vec![
244            // Currently supported
245            AstLanguage::Python,
246            AstLanguage::JavaScript,
247            AstLanguage::TypeScript,
248            AstLanguage::Go,
249            AstLanguage::Rust,
250            AstLanguage::Html,
251            
252            // Future support
253            AstLanguage::Java,
254            AstLanguage::C,
255            AstLanguage::Cpp,
256            AstLanguage::Ruby,
257            AstLanguage::CSharp,
258        ]
259    }
260    
261    /// Get language name as string
262    pub fn name(&self) -> &'static str {
263        match self {
264            AstLanguage::Python => "Python",
265            AstLanguage::JavaScript => "JavaScript",
266            AstLanguage::TypeScript => "TypeScript",
267            AstLanguage::Go => "Go",
268            AstLanguage::Rust => "Rust",
269            AstLanguage::Html => "HTML",
270            AstLanguage::Java => "Java",
271            AstLanguage::C => "C",
272            AstLanguage::Cpp => "C++",
273            AstLanguage::Ruby => "Ruby",
274            AstLanguage::CSharp => "C#",
275        }
276    }
277}
278
279/// Language statistics for reporting
280#[derive(Debug, Clone, Serialize, Deserialize)]
281pub struct LanguageStats {
282    /// Total supported languages
283    pub total_languages: usize,
284    /// Languages by tier
285    pub by_tier: HashMap<LanguageTier, usize>,
286    /// Languages with AST support
287    pub ast_supported: usize,
288    /// Languages with tree-sitter support
289    pub tree_sitter_available: usize,
290}
291
292impl LanguageStats {
293    /// Calculate statistics for current language support
294    pub fn calculate() -> Self {
295        let all_languages = AstLanguage::all_supported();
296        let total_languages = all_languages.len();
297        
298        let mut by_tier = HashMap::new();
299        let mut ast_supported = 0;
300        let mut tree_sitter_available = 0;
301        
302        for language in &all_languages {
303            let tier = language.tier();
304            *by_tier.entry(tier).or_insert(0) += 1;
305            
306            if tier == LanguageTier::FullAst || tier == LanguageTier::SyntaxAware {
307                ast_supported += 1;
308            }
309            
310            #[cfg(feature = "tree-sitter")]
311            if language.tree_sitter_language().is_some() {
312                tree_sitter_available += 1;
313            }
314        }
315        
316        Self {
317            total_languages,
318            by_tier,
319            ast_supported,
320            tree_sitter_available,
321        }
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328    
329    #[test]
330    fn test_language_detection() {
331        assert_eq!(AstLanguage::from_extension("py"), Some(AstLanguage::Python));
332        assert_eq!(AstLanguage::from_extension("js"), Some(AstLanguage::JavaScript));
333        assert_eq!(AstLanguage::from_extension("rs"), Some(AstLanguage::Rust));
334        assert_eq!(AstLanguage::from_extension("go"), Some(AstLanguage::Go));
335        assert_eq!(AstLanguage::from_extension("unknown"), None);
336    }
337    
338    #[test]
339    fn test_language_tiers() {
340        assert_eq!(AstLanguage::Python.tier(), LanguageTier::FullAst);
341        assert_eq!(AstLanguage::Html.tier(), LanguageTier::SyntaxAware);
342        assert_eq!(AstLanguage::Java.tier(), LanguageTier::Future);
343    }
344    
345    #[test]
346    fn test_language_features() {
347        let python_features = AstLanguage::Python.features();
348        assert!(python_features.has_functions);
349        assert!(python_features.has_classes);
350        assert!(python_features.has_documentation);
351        assert!(python_features.has_imports);
352        assert!(!python_features.complexity_factors.is_empty());
353    }
354    
355    #[test]
356    fn test_language_count() {
357        let all_languages = AstLanguage::all_supported();
358        // Should have 11 languages (6 current + 5 future)
359        assert_eq!(all_languages.len(), 11, "Expected 11 languages, got {}", all_languages.len());
360    }
361    
362    #[test]
363    fn test_language_stats() {
364        let stats = LanguageStats::calculate();
365        assert_eq!(stats.total_languages, 11);
366        assert!(stats.by_tier.contains_key(&LanguageTier::FullAst));
367        assert!(stats.by_tier.contains_key(&LanguageTier::SyntaxAware));
368        assert!(stats.by_tier.contains_key(&LanguageTier::Future));
369    }
370}