Skip to main content

verificar/grammar/
python.rs

1//! Python grammar definition
2//!
3//! Grammar rules for Python code generation, targeting depyler transpilation.
4//! Uses tree-sitter for proper AST validation when the `tree-sitter` feature is enabled.
5
6use crate::Language;
7
8use super::Grammar;
9
10/// Python grammar for code generation
11///
12/// When the `tree-sitter` feature is enabled, uses tree-sitter-python for
13/// proper syntax validation. Otherwise, falls back to basic heuristics.
14pub struct PythonGrammar {
15    #[cfg(feature = "tree-sitter")]
16    parser: std::sync::Mutex<tree_sitter::Parser>,
17}
18
19impl std::fmt::Debug for PythonGrammar {
20    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21        f.debug_struct("PythonGrammar")
22            .field("language", &"python")
23            .finish()
24    }
25}
26
27impl Default for PythonGrammar {
28    fn default() -> Self {
29        Self::new()
30    }
31}
32
33impl PythonGrammar {
34    /// Create a new Python grammar
35    ///
36    /// # Panics
37    ///
38    /// Panics if the tree-sitter Python grammar fails to load (should never happen
39    /// with a correctly compiled tree-sitter-python dependency).
40    #[must_use]
41    #[allow(clippy::expect_used)]
42    pub fn new() -> Self {
43        #[cfg(feature = "tree-sitter")]
44        {
45            let mut parser = tree_sitter::Parser::new();
46            parser
47                .set_language(&tree_sitter_python::LANGUAGE.into())
48                .expect("Failed to load Python grammar");
49            Self {
50                parser: std::sync::Mutex::new(parser),
51            }
52        }
53        #[cfg(not(feature = "tree-sitter"))]
54        {
55            Self {}
56        }
57    }
58
59    /// Parse Python code and return the AST tree
60    ///
61    /// Returns `None` if parsing fails or tree-sitter feature is disabled.
62    #[cfg(feature = "tree-sitter")]
63    pub fn parse(&self, code: &str) -> Option<tree_sitter::Tree> {
64        let mut parser = self.parser.lock().ok()?;
65        parser.parse(code, None)
66    }
67
68    /// Get the root node of parsed code
69    #[cfg(feature = "tree-sitter")]
70    pub fn root_node(&self, code: &str) -> Option<String> {
71        self.parse(code)
72            .map(|tree| tree.root_node().kind().to_string())
73    }
74
75    /// Check if the parsed code has any syntax errors
76    #[cfg(feature = "tree-sitter")]
77    pub fn has_errors(&self, code: &str) -> bool {
78        self.parse(code)
79            .map_or(true, |tree| tree.root_node().has_error())
80    }
81
82    /// Get the AST depth of parsed code
83    #[cfg(feature = "tree-sitter")]
84    pub fn ast_depth(&self, code: &str) -> usize {
85        fn max_depth(node: tree_sitter::Node<'_>) -> usize {
86            let child_depths = node
87                .children(&mut node.walk())
88                .map(max_depth)
89                .max()
90                .unwrap_or(0);
91            1 + child_depths
92        }
93
94        self.parse(code)
95            .map_or(0, |tree| max_depth(tree.root_node()))
96    }
97
98    /// Count the number of nodes in the AST
99    #[cfg(feature = "tree-sitter")]
100    pub fn node_count(&self, code: &str) -> usize {
101        fn count_nodes(node: tree_sitter::Node<'_>) -> usize {
102            1 + node
103                .children(&mut node.walk())
104                .map(count_nodes)
105                .sum::<usize>()
106        }
107
108        self.parse(code)
109            .map_or(0, |tree| count_nodes(tree.root_node()))
110    }
111}
112
113impl Grammar for PythonGrammar {
114    fn language(&self) -> Language {
115        Language::Python
116    }
117
118    fn validate(&self, code: &str) -> bool {
119        if code.is_empty() {
120            return false;
121        }
122
123        #[cfg(feature = "tree-sitter")]
124        {
125            !self.has_errors(code)
126        }
127
128        #[cfg(not(feature = "tree-sitter"))]
129        {
130            // Basic fallback validation without tree-sitter
131            // Check for obvious syntax issues
132            let balanced_parens = code.chars().filter(|&c| c == '(').count()
133                == code.chars().filter(|&c| c == ')').count();
134            let balanced_brackets = code.chars().filter(|&c| c == '[').count()
135                == code.chars().filter(|&c| c == ']').count();
136            let balanced_braces = code.chars().filter(|&c| c == '{').count()
137                == code.chars().filter(|&c| c == '}').count();
138
139            balanced_parens && balanced_brackets && balanced_braces
140        }
141    }
142
143    fn max_enumeration_depth(&self) -> usize {
144        5 // Python ASTs get complex quickly
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn test_python_grammar_language() {
154        let grammar = PythonGrammar::new();
155        assert_eq!(grammar.language(), Language::Python);
156    }
157
158    #[test]
159    fn test_python_grammar_validate_basic() {
160        let grammar = PythonGrammar::new();
161        assert!(grammar.validate("x = 1"));
162        assert!(!grammar.validate(""));
163    }
164
165    #[test]
166    fn test_python_grammar_validate_function() {
167        let grammar = PythonGrammar::new();
168        assert!(grammar.validate("def foo():\n    pass"));
169        assert!(grammar.validate("def add(a, b):\n    return a + b"));
170    }
171
172    #[test]
173    fn test_python_grammar_validate_class() {
174        let grammar = PythonGrammar::new();
175        assert!(grammar.validate("class Foo:\n    pass"));
176        assert!(grammar.validate("class Bar:\n    def __init__(self):\n        self.x = 1"));
177    }
178
179    #[test]
180    fn test_python_grammar_validate_control_flow() {
181        let grammar = PythonGrammar::new();
182        assert!(grammar.validate("if x:\n    y = 1"));
183        assert!(grammar.validate("for i in range(10):\n    print(i)"));
184        assert!(grammar.validate("while True:\n    break"));
185    }
186
187    #[test]
188    fn test_python_grammar_validate_unbalanced() {
189        let grammar = PythonGrammar::new();
190        // Unbalanced parentheses should fail
191        assert!(!grammar.validate("x = (1 + 2"));
192        assert!(!grammar.validate("x = [1, 2"));
193    }
194
195    #[test]
196    fn test_python_grammar_max_depth() {
197        let grammar = PythonGrammar::new();
198        assert_eq!(grammar.max_enumeration_depth(), 5);
199    }
200
201    #[test]
202    fn test_python_grammar_debug() {
203        let grammar = PythonGrammar::new();
204        let debug = format!("{:?}", grammar);
205        assert!(debug.contains("PythonGrammar"));
206        assert!(debug.contains("python"));
207    }
208
209    #[test]
210    fn test_python_grammar_default() {
211        let grammar = PythonGrammar::default();
212        assert_eq!(grammar.language(), Language::Python);
213    }
214
215    #[test]
216    fn test_python_grammar_validate_unbalanced_braces() {
217        let grammar = PythonGrammar::new();
218        // Unbalanced braces should fail
219        assert!(!grammar.validate("x = {1, 2"));
220    }
221
222    #[cfg(feature = "tree-sitter")]
223    mod tree_sitter_tests {
224        use super::*;
225
226        #[test]
227        fn test_parse_simple() {
228            let grammar = PythonGrammar::new();
229            let tree = grammar.parse("x = 1");
230            assert!(tree.is_some());
231        }
232
233        #[test]
234        fn test_root_node() {
235            let grammar = PythonGrammar::new();
236            let root = grammar.root_node("x = 1");
237            assert_eq!(root, Some("module".to_string()));
238        }
239
240        #[test]
241        fn test_has_errors_valid() {
242            let grammar = PythonGrammar::new();
243            assert!(!grammar.has_errors("x = 1"));
244            assert!(!grammar.has_errors("def foo(): pass"));
245        }
246
247        #[test]
248        fn test_has_errors_invalid() {
249            let grammar = PythonGrammar::new();
250            assert!(grammar.has_errors("def foo("));
251            assert!(grammar.has_errors("class :"));
252        }
253
254        #[test]
255        fn test_ast_depth() {
256            let grammar = PythonGrammar::new();
257            let simple_depth = grammar.ast_depth("x = 1");
258            let complex_depth = grammar.ast_depth("def foo():\n    if x:\n        return y + z");
259            assert!(simple_depth > 0);
260            assert!(complex_depth > simple_depth);
261        }
262
263        #[test]
264        fn test_node_count() {
265            let grammar = PythonGrammar::new();
266            let simple_count = grammar.node_count("x = 1");
267            let complex_count = grammar.node_count("x = 1\ny = 2\nz = 3");
268            assert!(simple_count > 0);
269            assert!(complex_count > simple_count);
270        }
271    }
272}