scribe_analysis/language_support/
ast_language.rs

1//! # AST Language Support Definitions
2//!
3//! Defines comprehensive language support tiers and capabilities for 20+ programming languages.
4//! Replaces the basic regex-based approach with proper AST analysis using tree-sitter.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9/// Programming language support (focused on tree-sitter languages)
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11pub enum AstLanguage {
12    // Currently supported with tree-sitter
13    Python,
14    JavaScript,
15    TypeScript,
16    Go,
17    Rust,
18    Html,
19
20    // Future tree-sitter support (when dependencies added)
21    Java,
22    C,
23    Cpp,
24    Ruby,
25    CSharp,
26}
27
28/// Language support tier indicating analysis depth
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
30pub enum LanguageTier {
31    /// Full AST parsing with tree-sitter
32    FullAst,
33    /// Syntax-aware parsing for markup languages
34    SyntaxAware,
35    /// Future support (not yet implemented)
36    Future,
37}
38
39/// Language-specific features and capabilities
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct LanguageFeatures {
42    /// Support tier
43    pub tier: LanguageTier,
44    /// Can extract functions/methods
45    pub has_functions: bool,
46    /// Can extract classes/types
47    pub has_classes: bool,
48    /// Has documentation conventions
49    pub has_documentation: bool,
50    /// Has import/dependency statements
51    pub has_imports: bool,
52    /// Language-specific complexity factors
53    pub complexity_factors: Vec<String>,
54    /// Common file extensions
55    pub extensions: Vec<String>,
56}
57
58impl AstLanguage {
59    /// Get the tree-sitter language for this language (Tier 1 and 2 only)
60    #[cfg(feature = "tree-sitter")]
61    pub fn tree_sitter_language(&self) -> Option<tree_sitter::Language> {
62        match self {
63            // Tier 1: Full AST languages
64            AstLanguage::Python => Some(tree_sitter_python::language()),
65            AstLanguage::JavaScript => Some(tree_sitter_javascript::language()),
66            AstLanguage::TypeScript => Some(tree_sitter_typescript::language_typescript()),
67            AstLanguage::Go => Some(tree_sitter_go::language()),
68            AstLanguage::Rust => Some(tree_sitter_rust::language()),
69            AstLanguage::Html => Some(tree_sitter_html::language()),
70
71            // Future tree-sitter languages (when dependencies are added)
72            AstLanguage::Java => None, // tree_sitter_java::language() when added
73            AstLanguage::CSharp => None, // tree_sitter_c_sharp::language() when added
74            AstLanguage::C => None,    // tree_sitter_c::language() when added
75            AstLanguage::Cpp => None,  // tree_sitter_cpp::language() when added
76            AstLanguage::PHP => None,  // tree_sitter_php::language() when added
77            AstLanguage::Ruby => None, // tree_sitter_ruby::language() when added
78            AstLanguage::Swift => None, // tree_sitter_swift::language() when added
79            AstLanguage::Kotlin => None, // tree_sitter_kotlin::language() when added
80
81            // Tier 2: Syntax-aware languages
82            AstLanguage::Css => None, // tree_sitter_css::language() when added
83            AstLanguage::Json => None, // tree_sitter_json::language() when added
84            AstLanguage::Yaml => None, // tree_sitter_yaml::language() when added
85            AstLanguage::Xml => None, // tree_sitter_xml::language() when added
86            AstLanguage::Markdown => None, // tree_sitter_markdown::language() when added
87            AstLanguage::Sql => None, // tree_sitter_sql::language() when added
88            AstLanguage::Bash => None, // tree_sitter_bash::language() when added
89            AstLanguage::PowerShell => None, // tree_sitter_powershell::language() when added
90            AstLanguage::Dockerfile => None, // tree_sitter_dockerfile::language() when added
91
92            // Tier 3: Basic structure languages (no tree-sitter)
93            _ => None,
94        }
95    }
96
97    /// Detect language from file extension
98    pub fn from_extension(ext: &str) -> Option<Self> {
99        match ext.to_lowercase().as_str() {
100            // Currently supported
101            "py" | "pyi" | "pyw" => Some(AstLanguage::Python),
102            "js" | "mjs" | "cjs" => Some(AstLanguage::JavaScript),
103            "ts" | "mts" | "cts" | "tsx" => Some(AstLanguage::TypeScript),
104            "go" => Some(AstLanguage::Go),
105            "rs" => Some(AstLanguage::Rust),
106            "html" | "htm" => Some(AstLanguage::Html),
107
108            // Future support
109            "java" => Some(AstLanguage::Java),
110            "c" => Some(AstLanguage::C),
111            "cpp" | "cc" | "cxx" | "c++" | "hpp" | "h" => Some(AstLanguage::Cpp),
112            "rb" | "ruby" => Some(AstLanguage::Ruby),
113            "cs" => Some(AstLanguage::CSharp),
114
115            _ => None,
116        }
117    }
118
119    /// Get language tier
120    pub fn tier(&self) -> LanguageTier {
121        match self {
122            // Currently supported with tree-sitter
123            AstLanguage::Python
124            | AstLanguage::JavaScript
125            | AstLanguage::TypeScript
126            | AstLanguage::Go
127            | AstLanguage::Rust => LanguageTier::FullAst,
128
129            // Syntax-aware (markup)
130            AstLanguage::Html => LanguageTier::SyntaxAware,
131
132            // Future support
133            AstLanguage::Java
134            | AstLanguage::C
135            | AstLanguage::Cpp
136            | AstLanguage::Ruby
137            | AstLanguage::CSharp => LanguageTier::Future,
138        }
139    }
140
141    /// Get language features and capabilities
142    pub fn features(&self) -> LanguageFeatures {
143        match self {
144            AstLanguage::Python => LanguageFeatures {
145                tier: LanguageTier::FullAst,
146                has_functions: true,
147                has_classes: true,
148                has_documentation: true,
149                has_imports: true,
150                complexity_factors: vec![
151                    "list_comprehensions".to_string(),
152                    "decorators".to_string(),
153                    "async_await".to_string(),
154                    "generators".to_string(),
155                ],
156                extensions: vec!["py".to_string(), "pyi".to_string(), "pyw".to_string()],
157            },
158
159            AstLanguage::JavaScript => LanguageFeatures {
160                tier: LanguageTier::FullAst,
161                has_functions: true,
162                has_classes: true,
163                has_documentation: true,
164                has_imports: true,
165                complexity_factors: vec![
166                    "closures".to_string(),
167                    "promises".to_string(),
168                    "async_await".to_string(),
169                    "prototypal_inheritance".to_string(),
170                ],
171                extensions: vec!["js".to_string(), "mjs".to_string(), "cjs".to_string()],
172            },
173
174            AstLanguage::TypeScript => LanguageFeatures {
175                tier: LanguageTier::FullAst,
176                has_functions: true,
177                has_classes: true,
178                has_documentation: true,
179                has_imports: true,
180                complexity_factors: vec![
181                    "generic_types".to_string(),
182                    "type_guards".to_string(),
183                    "conditional_types".to_string(),
184                    "mapped_types".to_string(),
185                ],
186                extensions: vec!["ts".to_string(), "tsx".to_string(), "mts".to_string()],
187            },
188
189            AstLanguage::Rust => LanguageFeatures {
190                tier: LanguageTier::FullAst,
191                has_functions: true,
192                has_classes: false, // Rust has structs/traits instead
193                has_documentation: true,
194                has_imports: true,
195                complexity_factors: vec![
196                    "lifetimes".to_string(),
197                    "borrowing".to_string(),
198                    "pattern_matching".to_string(),
199                    "macros".to_string(),
200                ],
201                extensions: vec!["rs".to_string()],
202            },
203
204            AstLanguage::Go => LanguageFeatures {
205                tier: LanguageTier::FullAst,
206                has_functions: true,
207                has_classes: false, // Go has structs/interfaces instead
208                has_documentation: true,
209                has_imports: true,
210                complexity_factors: vec![
211                    "goroutines".to_string(),
212                    "channels".to_string(),
213                    "interfaces".to_string(),
214                    "defer_statements".to_string(),
215                ],
216                extensions: vec!["go".to_string()],
217            },
218
219            AstLanguage::Java => LanguageFeatures {
220                tier: LanguageTier::FullAst,
221                has_functions: true,
222                has_classes: true,
223                has_documentation: true,
224                has_imports: true,
225                complexity_factors: vec![
226                    "inheritance".to_string(),
227                    "generics".to_string(),
228                    "reflection".to_string(),
229                    "annotations".to_string(),
230                ],
231                extensions: vec!["java".to_string()],
232            },
233
234            // Add more language features as needed...
235            _ => LanguageFeatures {
236                tier: self.tier(),
237                has_functions: false,
238                has_classes: false,
239                has_documentation: false,
240                has_imports: false,
241                complexity_factors: vec![],
242                extensions: vec![],
243            },
244        }
245    }
246
247    /// Get all supported languages
248    pub fn all_supported() -> Vec<Self> {
249        vec![
250            // Currently supported
251            AstLanguage::Python,
252            AstLanguage::JavaScript,
253            AstLanguage::TypeScript,
254            AstLanguage::Go,
255            AstLanguage::Rust,
256            AstLanguage::Html,
257            // Future support
258            AstLanguage::Java,
259            AstLanguage::C,
260            AstLanguage::Cpp,
261            AstLanguage::Ruby,
262            AstLanguage::CSharp,
263        ]
264    }
265
266    /// Get language name as string
267    pub fn name(&self) -> &'static str {
268        match self {
269            AstLanguage::Python => "Python",
270            AstLanguage::JavaScript => "JavaScript",
271            AstLanguage::TypeScript => "TypeScript",
272            AstLanguage::Go => "Go",
273            AstLanguage::Rust => "Rust",
274            AstLanguage::Html => "HTML",
275            AstLanguage::Java => "Java",
276            AstLanguage::C => "C",
277            AstLanguage::Cpp => "C++",
278            AstLanguage::Ruby => "Ruby",
279            AstLanguage::CSharp => "C#",
280        }
281    }
282}
283
284/// Language statistics for reporting
285#[derive(Debug, Clone, Serialize, Deserialize)]
286pub struct LanguageStats {
287    /// Total supported languages
288    pub total_languages: usize,
289    /// Languages by tier
290    pub by_tier: HashMap<LanguageTier, usize>,
291    /// Languages with AST support
292    pub ast_supported: usize,
293    /// Languages with tree-sitter support
294    pub tree_sitter_available: usize,
295}
296
297impl LanguageStats {
298    /// Calculate statistics for current language support
299    pub fn calculate() -> Self {
300        let all_languages = AstLanguage::all_supported();
301        let total_languages = all_languages.len();
302
303        let mut by_tier = HashMap::new();
304        let mut ast_supported = 0;
305        let mut tree_sitter_available = 0;
306
307        for language in &all_languages {
308            let tier = language.tier();
309            *by_tier.entry(tier).or_insert(0) += 1;
310
311            if tier == LanguageTier::FullAst || tier == LanguageTier::SyntaxAware {
312                ast_supported += 1;
313            }
314
315            #[cfg(feature = "tree-sitter")]
316            if language.tree_sitter_language().is_some() {
317                tree_sitter_available += 1;
318            }
319        }
320
321        Self {
322            total_languages,
323            by_tier,
324            ast_supported,
325            tree_sitter_available,
326        }
327    }
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333
334    #[test]
335    fn test_language_detection() {
336        assert_eq!(AstLanguage::from_extension("py"), Some(AstLanguage::Python));
337        assert_eq!(
338            AstLanguage::from_extension("js"),
339            Some(AstLanguage::JavaScript)
340        );
341        assert_eq!(AstLanguage::from_extension("rs"), Some(AstLanguage::Rust));
342        assert_eq!(AstLanguage::from_extension("go"), Some(AstLanguage::Go));
343        assert_eq!(AstLanguage::from_extension("unknown"), None);
344    }
345
346    #[test]
347    fn test_language_tiers() {
348        assert_eq!(AstLanguage::Python.tier(), LanguageTier::FullAst);
349        assert_eq!(AstLanguage::Html.tier(), LanguageTier::SyntaxAware);
350        assert_eq!(AstLanguage::Java.tier(), LanguageTier::Future);
351    }
352
353    #[test]
354    fn test_language_features() {
355        let python_features = AstLanguage::Python.features();
356        assert!(python_features.has_functions);
357        assert!(python_features.has_classes);
358        assert!(python_features.has_documentation);
359        assert!(python_features.has_imports);
360        assert!(!python_features.complexity_factors.is_empty());
361    }
362
363    #[test]
364    fn test_language_count() {
365        let all_languages = AstLanguage::all_supported();
366        // Should have 11 languages (6 current + 5 future)
367        assert_eq!(
368            all_languages.len(),
369            11,
370            "Expected 11 languages, got {}",
371            all_languages.len()
372        );
373    }
374
375    #[test]
376    fn test_language_stats() {
377        let stats = LanguageStats::calculate();
378        assert_eq!(stats.total_languages, 11);
379        assert!(stats.by_tier.contains_key(&LanguageTier::FullAst));
380        assert!(stats.by_tier.contains_key(&LanguageTier::SyntaxAware));
381        assert!(stats.by_tier.contains_key(&LanguageTier::Future));
382    }
383}