Skip to main content

oak_python/builder/
mod.rs

1use crate::{
2    ast::{BinaryOperator, Expression, Literal, Parameter, Program, PythonRoot, Statement},
3    kind::PythonSyntaxKind,
4    language::PythonLanguage,
5    parser::PythonParser,
6};
7use oak_core::{Builder, BuilderCache, GreenNode, GreenTree, OakDiagnostics, OakError, Parser, SourceText, TextEdit, TokenType, builder::BuildOutput, source::Source};
8
9pub struct PythonBuilder<'config> {
10    config: &'config PythonLanguage,
11}
12
13impl<'config> PythonBuilder<'config> {
14    pub fn new(config: &'config PythonLanguage) -> Self {
15        Self { config }
16    }
17}
18
19impl<'config> Builder<PythonLanguage> for PythonBuilder<'config> {
20    fn build<'a, S: Source + ?Sized>(&self, source: &S, edits: &[TextEdit], _cache: &'a mut impl BuilderCache<PythonLanguage>) -> BuildOutput<PythonLanguage> {
21        let parser = PythonParser::new(self.config);
22
23        let mut parse_cache = oak_core::parser::session::ParseSession::<PythonLanguage>::default();
24        let parse_result = parser.parse(source, edits, &mut parse_cache);
25
26        match parse_result.result {
27            Ok(green_tree) => {
28                let source_text = SourceText::new(source.get_text_in((0..source.length()).into()).into_owned());
29                match self.build_root(green_tree, &source_text) {
30                    Ok(ast_root) => OakDiagnostics { result: Ok(ast_root), diagnostics: parse_result.diagnostics },
31                    Err(build_error) => {
32                        let mut diagnostics = parse_result.diagnostics;
33                        diagnostics.push(build_error.clone());
34                        OakDiagnostics { result: Err(build_error), diagnostics }
35                    }
36                }
37            }
38            Err(e) => OakDiagnostics { result: Err(e), diagnostics: parse_result.diagnostics },
39        }
40    }
41}
42
43impl<'config> PythonBuilder<'config> {
44    pub fn build_root(&self, green_tree: &GreenNode<PythonLanguage>, source: &SourceText) -> Result<PythonRoot, OakError> {
45        let mut statements = Vec::new();
46        let mut current_offset = 0;
47
48        for child in green_tree.children() {
49            let child_len = child.len() as usize;
50            match child {
51                GreenTree::Node(node) => {
52                    if let Some(stmt) = self.build_statement(node, current_offset, source)? {
53                        statements.push(stmt);
54                    }
55                }
56                GreenTree::Leaf(_) => {}
57            }
58            current_offset += child_len;
59        }
60
61        Ok(PythonRoot { program: Program { statements }, span: (0..green_tree.text_len() as usize).into() })
62    }
63
64    fn build_statement(&self, node: &GreenNode<PythonLanguage>, offset: usize, source: &SourceText) -> Result<Option<Statement>, OakError> {
65        match node.kind {
66            PythonSyntaxKind::FunctionDef => {
67                let mut name = String::new();
68                let mut parameters = Vec::new();
69                let mut body = Vec::new();
70                let mut current_offset = offset;
71
72                for child in node.children() {
73                    let child_len = child.len() as usize;
74                    match child {
75                        GreenTree::Leaf(leaf) if leaf.kind == PythonSyntaxKind::Identifier => {
76                            name = source.get_text_in((current_offset..current_offset + leaf.length as usize).into()).trim().to_string();
77                        }
78                        GreenTree::Node(n) if n.kind == PythonSyntaxKind::Arguments => {
79                            parameters = self.build_parameters(n, current_offset, source)?;
80                        }
81                        GreenTree::Node(n) if n.kind == PythonSyntaxKind::Suite => {
82                            body = self.build_suite(n, current_offset, source)?;
83                        }
84                        _ => {}
85                    }
86                    current_offset += child_len;
87                }
88                Ok(Some(Statement::FunctionDef { name, parameters, return_type: None, body }))
89            }
90            PythonSyntaxKind::ClassDef => {
91                let mut name = String::new();
92                let mut body = Vec::new();
93                let mut current_offset = offset;
94
95                for child in node.children() {
96                    let child_len = child.len() as usize;
97                    match child {
98                        GreenTree::Leaf(leaf) if leaf.kind == PythonSyntaxKind::Identifier => {
99                            name = source.get_text_in((current_offset..current_offset + leaf.length as usize).into()).trim().to_string();
100                        }
101                        GreenTree::Node(n) if n.kind == PythonSyntaxKind::Suite => {
102                            body = self.build_suite(n, current_offset, source)?;
103                        }
104                        _ => {}
105                    }
106                    current_offset += child_len;
107                }
108                Ok(Some(Statement::ClassDef { name, bases: Vec::new(), body }))
109            }
110            PythonSyntaxKind::Return => {
111                let mut value = None;
112                let mut current_offset = offset;
113                for child in node.children() {
114                    let child_len = child.len() as usize;
115                    if let GreenTree::Node(n) = child {
116                        value = Some(self.build_expression(n, current_offset, source)?);
117                    }
118                    current_offset += child_len;
119                }
120                Ok(Some(Statement::Return(value)))
121            }
122            PythonSyntaxKind::AssignStmt => {
123                let mut left = None;
124                let mut right = None;
125                let mut current_offset = offset;
126
127                for child in node.children() {
128                    let child_len = child.len() as usize;
129                    match child {
130                        GreenTree::Node(n) => {
131                            if !n.kind.is_ignored() {
132                                if left.is_none() {
133                                    left = Some(self.build_expression(n, current_offset, source)?);
134                                }
135                                else {
136                                    right = Some(self.build_expression(n, current_offset, source)?);
137                                }
138                            }
139                        }
140                        _ => {}
141                    }
142                    current_offset += child_len;
143                }
144
145                if let (Some(l), Some(r)) = (left, right) { Ok(Some(Statement::Assignment { target: l, value: r })) } else { Ok(None) }
146            }
147            PythonSyntaxKind::Expr => {
148                let mut current_offset = offset;
149                for child in node.children() {
150                    let child_len = child.len() as usize;
151                    match child {
152                        GreenTree::Node(n) => {
153                            if !n.kind.is_ignored() {
154                                if n.kind == PythonSyntaxKind::AssignStmt {
155                                    return self.build_statement(n, current_offset, source);
156                                }
157                                return Ok(Some(Statement::Expression(self.build_expression(n, current_offset, source)?)));
158                            }
159                        }
160                        _ => {}
161                    }
162                    current_offset += child_len;
163                }
164                Ok(None)
165            }
166            PythonSyntaxKind::If => {
167                let mut test = None;
168                let mut body = Vec::new();
169                let mut orelse = Vec::new();
170                let mut current_offset = offset;
171
172                for child in node.children() {
173                    let child_len = child.len() as usize;
174                    if let GreenTree::Node(n) = child {
175                        if test.is_none() && n.kind != PythonSyntaxKind::Suite && !n.kind.is_ignored() {
176                            test = Some(self.build_expression(n, current_offset, source)?);
177                        }
178                        else if body.is_empty() && n.kind == PythonSyntaxKind::Suite {
179                            body = self.build_suite(n, current_offset, source)?;
180                        }
181                        else if n.kind == PythonSyntaxKind::Suite {
182                            orelse = self.build_suite(n, current_offset, source)?;
183                        }
184                    }
185                    current_offset += child_len;
186                }
187
188                Ok(Some(Statement::If { test: test.unwrap_or(Expression::Literal(Literal::Boolean(true))), body, orelse }))
189            }
190            PythonSyntaxKind::While => {
191                let mut test = None;
192                let mut body = Vec::new();
193                let mut current_offset = offset;
194
195                for child in node.children() {
196                    let child_len = child.len() as usize;
197                    if let GreenTree::Node(n) = child {
198                        if test.is_none() && n.kind != PythonSyntaxKind::Suite && !n.kind.is_ignored() {
199                            test = Some(self.build_expression(n, current_offset, source)?);
200                        }
201                        else if n.kind == PythonSyntaxKind::Suite {
202                            body = self.build_suite(n, current_offset, source)?;
203                        }
204                    }
205                    current_offset += child_len;
206                }
207
208                Ok(Some(Statement::While { test: test.unwrap_or(Expression::Literal(Literal::Boolean(true))), body, orelse: Vec::new() }))
209            }
210            PythonSyntaxKind::Pass => Ok(Some(Statement::Pass)),
211            PythonSyntaxKind::Break => Ok(Some(Statement::Break)),
212            PythonSyntaxKind::Continue => Ok(Some(Statement::Continue)),
213            PythonSyntaxKind::Suite => {
214                // Suites are handled by build_suite
215                Ok(None)
216            }
217            _ => Ok(None),
218        }
219    }
220
221    fn build_suite(&self, node: &GreenNode<PythonLanguage>, offset: usize, source: &SourceText) -> Result<Vec<Statement>, OakError> {
222        let mut statements = Vec::new();
223        let mut current_offset = offset;
224
225        for child in node.children() {
226            let child_len = child.len() as usize;
227            match child {
228                GreenTree::Node(n) => {
229                    if let Some(stmt) = self.build_statement(n, current_offset, source)? {
230                        statements.push(stmt);
231                    }
232                }
233                GreenTree::Leaf(_) => {}
234            }
235            current_offset += child_len;
236        }
237        Ok(statements)
238    }
239
240    fn build_expression(&self, node: &GreenNode<PythonLanguage>, offset: usize, source: &SourceText) -> Result<Expression, OakError> {
241        match node.kind {
242            PythonSyntaxKind::Constant => {
243                let mut current_offset = offset;
244                for child in node.children() {
245                    if let GreenTree::Leaf(leaf) = child {
246                        if !leaf.kind.is_ignored() {
247                            let text = source.get_text_in((current_offset..current_offset + leaf.length as usize).into());
248                            if let Ok(val) = text.parse::<i64>() {
249                                return Ok(Expression::Literal(Literal::Integer(val)));
250                            }
251                            else if let Ok(val) = text.parse::<f64>() {
252                                return Ok(Expression::Literal(Literal::Float(val)));
253                            }
254                            else if text == "True" {
255                                return Ok(Expression::Literal(Literal::Boolean(true)));
256                            }
257                            else if text == "False" {
258                                return Ok(Expression::Literal(Literal::Boolean(false)));
259                            }
260                            else if text == "None" {
261                                return Ok(Expression::Literal(Literal::None));
262                            }
263                            else {
264                                let mut s = text.to_string();
265                                if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
266                                    s = s[1..s.len() - 1].to_string();
267                                }
268                                return Ok(Expression::Literal(Literal::String(s)));
269                            }
270                        }
271                    }
272                    current_offset += child.len() as usize;
273                }
274                Ok(Expression::Name("invalid_constant".to_string()))
275            }
276            PythonSyntaxKind::Name => {
277                let mut current_offset = offset;
278                for child in node.children() {
279                    if let GreenTree::Leaf(leaf) = child {
280                        if !leaf.kind.is_ignored() {
281                            let text = source.get_text_in((current_offset..current_offset + leaf.length as usize).into());
282                            return Ok(Expression::Name(text.to_string()));
283                        }
284                    }
285                    current_offset += child.len() as usize;
286                }
287                Ok(Expression::Name("invalid_name".to_string()))
288            }
289            PythonSyntaxKind::BinOp => {
290                let mut left = None;
291                let mut operator = None;
292                let mut right = None;
293                let mut current_offset = offset;
294
295                for child in node.children() {
296                    let child_len = child.len() as usize;
297                    match child {
298                        GreenTree::Node(n) => {
299                            if !n.kind.is_ignored() {
300                                let expr = self.build_expression(n, current_offset, source)?;
301                                if left.is_none() {
302                                    left = Some(Box::new(expr));
303                                }
304                                else if right.is_none() {
305                                    right = Some(Box::new(expr));
306                                }
307                            }
308                        }
309                        GreenTree::Leaf(leaf) => {
310                            if !leaf.kind.is_ignored() {
311                                let op = match leaf.kind {
312                                    PythonSyntaxKind::Add | PythonSyntaxKind::Plus => Some(BinaryOperator::Add),
313                                    PythonSyntaxKind::Sub | PythonSyntaxKind::Minus => Some(BinaryOperator::Sub),
314                                    PythonSyntaxKind::Mult | PythonSyntaxKind::Star => Some(BinaryOperator::Mult),
315                                    PythonSyntaxKind::Div | PythonSyntaxKind::Slash => Some(BinaryOperator::Div),
316                                    PythonSyntaxKind::Mod | PythonSyntaxKind::Percent => Some(BinaryOperator::Mod),
317                                    PythonSyntaxKind::Pow | PythonSyntaxKind::DoubleStar => Some(BinaryOperator::Pow),
318                                    PythonSyntaxKind::LShift | PythonSyntaxKind::LeftShift => Some(BinaryOperator::LShift),
319                                    PythonSyntaxKind::RShift | PythonSyntaxKind::RightShift => Some(BinaryOperator::RShift),
320                                    PythonSyntaxKind::BitOr | PythonSyntaxKind::Pipe => Some(BinaryOperator::BitOr),
321                                    PythonSyntaxKind::BitXor | PythonSyntaxKind::Caret => Some(BinaryOperator::BitXor),
322                                    PythonSyntaxKind::BitAnd | PythonSyntaxKind::Ampersand => Some(BinaryOperator::BitAnd),
323                                    PythonSyntaxKind::FloorDiv | PythonSyntaxKind::DoubleSlash => Some(BinaryOperator::FloorDiv),
324                                    _ => None,
325                                };
326                                if let Some(op) = op {
327                                    operator = Some(op);
328                                }
329                            }
330                        }
331                    }
332                    current_offset += child_len;
333                }
334
335                let l_is = left.is_some();
336                let op_is = operator.is_some();
337                let r_is = right.is_some();
338                if let (Some(l), Some(op), Some(r)) = (left, operator, right) {
339                    Ok(Expression::BinaryOp { left: l, operator: op, right: r })
340                }
341                else {
342                    println!("Warning: Invalid BinOp at {}, left={}, op={}, right={}", offset, l_is, op_is, r_is);
343                    Ok(Expression::Name(format!("invalid_binop_at_{}", offset)))
344                }
345            }
346            PythonSyntaxKind::Call => {
347                let mut func = None;
348                let mut args = Vec::new();
349                let mut current_offset = offset;
350
351                for child in node.children() {
352                    let child_len = child.len() as usize;
353                    if let GreenTree::Node(n) = child {
354                        if !n.kind.is_ignored() {
355                            if func.is_none() {
356                                func = Some(Box::new(self.build_expression(n, current_offset, source)?));
357                            }
358                            else {
359                                args.push(self.build_expression(n, current_offset, source)?);
360                            }
361                        }
362                    }
363                    current_offset += child_len;
364                }
365
366                if let Some(f) = func { Ok(Expression::Call { func: f, args, keywords: Vec::new() }) } else { Ok(Expression::Name("invalid_call".to_string())) }
367            }
368            PythonSyntaxKind::Attribute => {
369                let mut value = None;
370                let mut attr = String::new();
371                let mut current_offset = offset;
372
373                for child in node.children() {
374                    let child_len = child.len() as usize;
375                    match child {
376                        GreenTree::Node(n) => {
377                            if !n.kind.is_ignored() {
378                                value = Some(Box::new(self.build_expression(n, current_offset, source)?));
379                            }
380                        }
381                        GreenTree::Leaf(leaf) if leaf.kind == PythonSyntaxKind::Identifier => {
382                            attr = source.get_text_in((current_offset..current_offset + leaf.length as usize).into()).trim().to_string();
383                        }
384                        _ => {}
385                    }
386                    current_offset += child_len;
387                }
388
389                if let Some(v) = value { Ok(Expression::Attribute { value: v, attr }) } else { Ok(Expression::Name("invalid_attribute".to_string())) }
390            }
391            _ => Ok(Expression::Name("expr".to_string())),
392        }
393    }
394
395    fn build_parameters(&self, node: &GreenNode<PythonLanguage>, offset: usize, source: &SourceText) -> Result<Vec<Parameter>, OakError> {
396        let mut parameters = Vec::new();
397        let mut current_offset = offset;
398
399        for child in node.children() {
400            let child_len = child.len() as usize;
401            if let GreenTree::Node(n) = child {
402                if n.kind == PythonSyntaxKind::Arg {
403                    parameters.push(self.build_parameter(n, current_offset, source)?);
404                }
405            }
406            current_offset += child_len;
407        }
408        Ok(parameters)
409    }
410
411    fn build_parameter(&self, node: &GreenNode<PythonLanguage>, offset: usize, source: &SourceText) -> Result<Parameter, OakError> {
412        let mut name = String::new();
413        let mut current_offset = offset;
414
415        for child in node.children() {
416            let child_len = child.len() as usize;
417            if let GreenTree::Leaf(leaf) = child {
418                if leaf.kind == PythonSyntaxKind::Identifier {
419                    name = source.get_text_in((current_offset..current_offset + leaf.length as usize).into()).trim().to_string();
420                }
421            }
422            current_offset += child_len;
423        }
424
425        Ok(Parameter { name, annotation: None, default: None })
426    }
427}