cadi_core/atomizer/
parser.rs

1//! AST Parser using Tree-sitter
2//!
3//! Provides language-aware parsing for supported languages.
4
5use std::collections::HashMap;
6
7#[cfg(feature = "ast-parsing")]
8use tree_sitter::{Parser, Tree};
9
10use crate::error::{CadiError, CadiResult};
11
12/// Multi-language AST parser
13pub struct AstParser {
14    #[cfg(feature = "ast-parsing")]
15    parsers: HashMap<String, Parser>,
16    
17    #[cfg(not(feature = "ast-parsing"))]
18    _phantom: std::marker::PhantomData<()>,
19}
20
21impl AstParser {
22    /// Create a new AST parser with all supported languages
23    #[cfg(feature = "ast-parsing")]
24    pub fn new() -> CadiResult<Self> {
25        let mut parsers = HashMap::new();
26        
27        // Initialize Rust parser
28        {
29            let mut parser = Parser::new();
30            parser.set_language(&tree_sitter_rust::language())
31                .map_err(|e| CadiError::AtomizerError(format!("Failed to load Rust grammar: {}", e)))?;
32            parsers.insert("rust".to_string(), parser);
33        }
34        
35        // Initialize TypeScript parser
36        {
37            let mut parser = Parser::new();
38            parser.set_language(&tree_sitter_typescript::language_typescript())
39                .map_err(|e| CadiError::AtomizerError(format!("Failed to load TypeScript grammar: {}", e)))?;
40            parsers.insert("typescript".to_string(), parser);
41        }
42        
43        // Initialize JavaScript parser
44        {
45            let mut parser = Parser::new();
46            parser.set_language(&tree_sitter_javascript::language())
47                .map_err(|e| CadiError::AtomizerError(format!("Failed to load JavaScript grammar: {}", e)))?;
48            parsers.insert("javascript".to_string(), parser);
49        }
50        
51        // Initialize Python parser
52        {
53            let mut parser = Parser::new();
54            parser.set_language(&tree_sitter_python::language())
55                .map_err(|e| CadiError::AtomizerError(format!("Failed to load Python grammar: {}", e)))?;
56            parsers.insert("python".to_string(), parser);
57        }
58        
59        Ok(Self { parsers })
60    }
61    
62    /// Create a new AST parser (stub when feature not enabled)
63    #[cfg(not(feature = "ast-parsing"))]
64    pub fn new() -> CadiResult<Self> {
65        Ok(Self { _phantom: std::marker::PhantomData })
66    }
67    
68    /// Parse source code into an AST
69    #[cfg(feature = "ast-parsing")]
70    pub fn parse(&mut self, language: &str, source: &str) -> CadiResult<ParsedAst> {
71        let parser = self.parsers.get_mut(language)
72            .ok_or_else(|| CadiError::AtomizerError(format!("Unsupported language: {}", language)))?;
73        
74        let tree = parser.parse(source, None)
75            .ok_or_else(|| CadiError::AtomizerError("Failed to parse source".to_string()))?;
76        
77        Ok(ParsedAst {
78            tree,
79            source: source.to_string(),
80            language: language.to_string(),
81        })
82    }
83    
84    /// Parse source code (stub when feature not enabled)
85    #[cfg(not(feature = "ast-parsing"))]
86    pub fn parse(&mut self, language: &str, source: &str) -> CadiResult<ParsedAst> {
87        Ok(ParsedAst {
88            source: source.to_string(),
89            language: language.to_string(),
90        })
91    }
92    
93    /// Check if a language is supported
94    #[cfg(feature = "ast-parsing")]
95    pub fn supports_language(&self, language: &str) -> bool {
96        self.parsers.contains_key(language)
97    }
98    
99    #[cfg(not(feature = "ast-parsing"))]
100    pub fn supports_language(&self, _language: &str) -> bool {
101        false
102    }
103    
104    /// Get list of supported languages
105    #[cfg(feature = "ast-parsing")]
106    pub fn supported_languages(&self) -> Vec<&str> {
107        self.parsers.keys().map(|s| s.as_str()).collect()
108    }
109    
110    #[cfg(not(feature = "ast-parsing"))]
111    pub fn supported_languages(&self) -> Vec<&str> {
112        vec![]
113    }
114    
115    /// Detect language from file extension
116    pub fn detect_language(path: &std::path::Path) -> Option<String> {
117        let ext = path.extension()?.to_str()?;
118        match ext {
119            "rs" => Some("rust".to_string()),
120            "ts" | "tsx" => Some("typescript".to_string()),
121            "js" | "jsx" | "mjs" | "cjs" => Some("javascript".to_string()),
122            "py" | "pyi" => Some("python".to_string()),
123            _ => None,
124        }
125    }
126}
127
128impl Default for AstParser {
129    fn default() -> Self {
130        Self::new().expect("Failed to create parser")
131    }
132}
133
134/// A parsed AST with metadata
135pub struct ParsedAst {
136    #[cfg(feature = "ast-parsing")]
137    pub tree: Tree,
138    
139    pub source: String,
140    pub language: String,
141}
142
143impl ParsedAst {
144    /// Get the root node
145    #[cfg(feature = "ast-parsing")]
146    pub fn root_node(&self) -> tree_sitter::Node<'_> {
147        self.tree.root_node()
148    }
149    
150    /// Walk the tree
151    #[cfg(feature = "ast-parsing")]
152    pub fn walk(&self) -> tree_sitter::TreeCursor<'_> {
153        self.tree.walk()
154    }
155    
156    /// Get source text for a byte range
157    pub fn text_for_range(&self, start: usize, end: usize) -> &str {
158        &self.source[start..end]
159    }
160    
161    /// Get total lines
162    pub fn line_count(&self) -> usize {
163        self.source.lines().count()
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170    
171    #[test]
172    fn test_language_detection() {
173        assert_eq!(AstParser::detect_language(std::path::Path::new("foo.rs")), Some("rust".to_string()));
174        assert_eq!(AstParser::detect_language(std::path::Path::new("bar.ts")), Some("typescript".to_string()));
175        assert_eq!(AstParser::detect_language(std::path::Path::new("baz.py")), Some("python".to_string()));
176        assert_eq!(AstParser::detect_language(std::path::Path::new("unknown.xyz")), None);
177    }
178    
179    #[cfg(feature = "ast-parsing")]
180    #[test]
181    fn test_rust_parsing() {
182        let mut parser = AstParser::new().unwrap();
183        
184        let source = r#"
185fn hello() {
186    println!("Hello, world!");
187}
188"#;
189        
190        let ast = parser.parse("rust", source).unwrap();
191        assert_eq!(ast.language, "rust");
192        
193        let root = ast.root_node();
194        assert_eq!(root.kind(), "source_file");
195    }
196}