1use crate::{
2 ast::{BinaryOperator, Expression, Literal, Parameter, Program, PythonRoot, Statement},
3 kind::PythonSyntaxKind,
4 language::PythonLanguage,
5 parser::PythonParser,
6};
7use oak_core::{Builder, BuilderCache, GreenNode, GreenTree, OakDiagnostics, OakError, Parser, SourceText, TextEdit, TokenType, builder::BuildOutput, source::Source};
8
9pub struct PythonBuilder<'config> {
10 config: &'config PythonLanguage,
11}
12
13impl<'config> PythonBuilder<'config> {
14 pub fn new(config: &'config PythonLanguage) -> Self {
15 Self { config }
16 }
17}
18
19impl<'config> Builder<PythonLanguage> for PythonBuilder<'config> {
20 fn build<'a, S: Source + ?Sized>(&self, source: &S, edits: &[TextEdit], _cache: &'a mut impl BuilderCache<PythonLanguage>) -> BuildOutput<PythonLanguage> {
21 let parser = PythonParser::new(self.config);
22
23 let mut parse_cache = oak_core::parser::session::ParseSession::<PythonLanguage>::default();
24 let parse_result = parser.parse(source, edits, &mut parse_cache);
25
26 match parse_result.result {
27 Ok(green_tree) => {
28 let source_text = SourceText::new(source.get_text_in((0..source.length()).into()).into_owned());
29 match self.build_root(green_tree, &source_text) {
30 Ok(ast_root) => OakDiagnostics { result: Ok(ast_root), diagnostics: parse_result.diagnostics },
31 Err(build_error) => {
32 let mut diagnostics = parse_result.diagnostics;
33 diagnostics.push(build_error.clone());
34 OakDiagnostics { result: Err(build_error), diagnostics }
35 }
36 }
37 }
38 Err(e) => OakDiagnostics { result: Err(e), diagnostics: parse_result.diagnostics },
39 }
40 }
41}
42
43impl<'config> PythonBuilder<'config> {
44 pub fn build_root(&self, green_tree: &GreenNode<PythonLanguage>, source: &SourceText) -> Result<PythonRoot, OakError> {
45 let mut statements = Vec::new();
46 let mut current_offset = 0;
47
48 for child in green_tree.children() {
49 let child_len = child.len() as usize;
50 match child {
51 GreenTree::Node(node) => {
52 if let Some(stmt) = self.build_statement(node, current_offset, source)? {
53 statements.push(stmt);
54 }
55 }
56 GreenTree::Leaf(_) => {}
57 }
58 current_offset += child_len;
59 }
60
61 Ok(PythonRoot { program: Program { statements }, span: (0..green_tree.text_len() as usize).into() })
62 }
63
64 fn build_statement(&self, node: &GreenNode<PythonLanguage>, offset: usize, source: &SourceText) -> Result<Option<Statement>, OakError> {
65 match node.kind {
66 PythonSyntaxKind::FunctionDef => {
67 let mut name = String::new();
68 let mut parameters = Vec::new();
69 let mut body = Vec::new();
70 let mut current_offset = offset;
71
72 for child in node.children() {
73 let child_len = child.len() as usize;
74 match child {
75 GreenTree::Leaf(leaf) if leaf.kind == PythonSyntaxKind::Identifier => {
76 name = source.get_text_in((current_offset..current_offset + leaf.length as usize).into()).trim().to_string();
77 }
78 GreenTree::Node(n) if n.kind == PythonSyntaxKind::Arguments => {
79 parameters = self.build_parameters(n, current_offset, source)?;
80 }
81 GreenTree::Node(n) if n.kind == PythonSyntaxKind::Suite => {
82 body = self.build_suite(n, current_offset, source)?;
83 }
84 _ => {}
85 }
86 current_offset += child_len;
87 }
88 Ok(Some(Statement::FunctionDef { name, parameters, return_type: None, body }))
89 }
90 PythonSyntaxKind::ClassDef => {
91 let mut name = String::new();
92 let mut body = Vec::new();
93 let mut current_offset = offset;
94
95 for child in node.children() {
96 let child_len = child.len() as usize;
97 match child {
98 GreenTree::Leaf(leaf) if leaf.kind == PythonSyntaxKind::Identifier => {
99 name = source.get_text_in((current_offset..current_offset + leaf.length as usize).into()).trim().to_string();
100 }
101 GreenTree::Node(n) if n.kind == PythonSyntaxKind::Suite => {
102 body = self.build_suite(n, current_offset, source)?;
103 }
104 _ => {}
105 }
106 current_offset += child_len;
107 }
108 Ok(Some(Statement::ClassDef { name, bases: Vec::new(), body }))
109 }
110 PythonSyntaxKind::Return => {
111 let mut value = None;
112 let mut current_offset = offset;
113 for child in node.children() {
114 let child_len = child.len() as usize;
115 if let GreenTree::Node(n) = child {
116 value = Some(self.build_expression(n, current_offset, source)?);
117 }
118 current_offset += child_len;
119 }
120 Ok(Some(Statement::Return(value)))
121 }
122 PythonSyntaxKind::AssignStmt => {
123 let mut left = None;
124 let mut right = None;
125 let mut current_offset = offset;
126
127 for child in node.children() {
128 let child_len = child.len() as usize;
129 match child {
130 GreenTree::Node(n) => {
131 if !n.kind.is_ignored() {
132 if left.is_none() {
133 left = Some(self.build_expression(n, current_offset, source)?);
134 }
135 else {
136 right = Some(self.build_expression(n, current_offset, source)?);
137 }
138 }
139 }
140 _ => {}
141 }
142 current_offset += child_len;
143 }
144
145 if let (Some(l), Some(r)) = (left, right) { Ok(Some(Statement::Assignment { target: l, value: r })) } else { Ok(None) }
146 }
147 PythonSyntaxKind::Expr => {
148 let mut current_offset = offset;
149 for child in node.children() {
150 let child_len = child.len() as usize;
151 match child {
152 GreenTree::Node(n) => {
153 if !n.kind.is_ignored() {
154 if n.kind == PythonSyntaxKind::AssignStmt {
155 return self.build_statement(n, current_offset, source);
156 }
157 return Ok(Some(Statement::Expression(self.build_expression(n, current_offset, source)?)));
158 }
159 }
160 _ => {}
161 }
162 current_offset += child_len;
163 }
164 Ok(None)
165 }
166 PythonSyntaxKind::If => {
167 let mut test = None;
168 let mut body = Vec::new();
169 let mut orelse = Vec::new();
170 let mut current_offset = offset;
171
172 for child in node.children() {
173 let child_len = child.len() as usize;
174 if let GreenTree::Node(n) = child {
175 if test.is_none() && n.kind != PythonSyntaxKind::Suite && !n.kind.is_ignored() {
176 test = Some(self.build_expression(n, current_offset, source)?);
177 }
178 else if body.is_empty() && n.kind == PythonSyntaxKind::Suite {
179 body = self.build_suite(n, current_offset, source)?;
180 }
181 else if n.kind == PythonSyntaxKind::Suite {
182 orelse = self.build_suite(n, current_offset, source)?;
183 }
184 }
185 current_offset += child_len;
186 }
187
188 Ok(Some(Statement::If { test: test.unwrap_or(Expression::Literal(Literal::Boolean(true))), body, orelse }))
189 }
190 PythonSyntaxKind::While => {
191 let mut test = None;
192 let mut body = Vec::new();
193 let mut current_offset = offset;
194
195 for child in node.children() {
196 let child_len = child.len() as usize;
197 if let GreenTree::Node(n) = child {
198 if test.is_none() && n.kind != PythonSyntaxKind::Suite && !n.kind.is_ignored() {
199 test = Some(self.build_expression(n, current_offset, source)?);
200 }
201 else if n.kind == PythonSyntaxKind::Suite {
202 body = self.build_suite(n, current_offset, source)?;
203 }
204 }
205 current_offset += child_len;
206 }
207
208 Ok(Some(Statement::While { test: test.unwrap_or(Expression::Literal(Literal::Boolean(true))), body, orelse: Vec::new() }))
209 }
210 PythonSyntaxKind::Pass => Ok(Some(Statement::Pass)),
211 PythonSyntaxKind::Break => Ok(Some(Statement::Break)),
212 PythonSyntaxKind::Continue => Ok(Some(Statement::Continue)),
213 PythonSyntaxKind::Suite => {
214 Ok(None)
216 }
217 _ => Ok(None),
218 }
219 }
220
221 fn build_suite(&self, node: &GreenNode<PythonLanguage>, offset: usize, source: &SourceText) -> Result<Vec<Statement>, OakError> {
222 let mut statements = Vec::new();
223 let mut current_offset = offset;
224
225 for child in node.children() {
226 let child_len = child.len() as usize;
227 match child {
228 GreenTree::Node(n) => {
229 if let Some(stmt) = self.build_statement(n, current_offset, source)? {
230 statements.push(stmt);
231 }
232 }
233 GreenTree::Leaf(_) => {}
234 }
235 current_offset += child_len;
236 }
237 Ok(statements)
238 }
239
240 fn build_expression(&self, node: &GreenNode<PythonLanguage>, offset: usize, source: &SourceText) -> Result<Expression, OakError> {
241 match node.kind {
242 PythonSyntaxKind::Constant => {
243 let mut current_offset = offset;
244 for child in node.children() {
245 if let GreenTree::Leaf(leaf) = child {
246 if !leaf.kind.is_ignored() {
247 let text = source.get_text_in((current_offset..current_offset + leaf.length as usize).into());
248 if let Ok(val) = text.parse::<i64>() {
249 return Ok(Expression::Literal(Literal::Integer(val)));
250 }
251 else if let Ok(val) = text.parse::<f64>() {
252 return Ok(Expression::Literal(Literal::Float(val)));
253 }
254 else if text == "True" {
255 return Ok(Expression::Literal(Literal::Boolean(true)));
256 }
257 else if text == "False" {
258 return Ok(Expression::Literal(Literal::Boolean(false)));
259 }
260 else if text == "None" {
261 return Ok(Expression::Literal(Literal::None));
262 }
263 else {
264 let mut s = text.to_string();
265 if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
266 s = s[1..s.len() - 1].to_string();
267 }
268 return Ok(Expression::Literal(Literal::String(s)));
269 }
270 }
271 }
272 current_offset += child.len() as usize;
273 }
274 Ok(Expression::Name("invalid_constant".to_string()))
275 }
276 PythonSyntaxKind::Name => {
277 let mut current_offset = offset;
278 for child in node.children() {
279 if let GreenTree::Leaf(leaf) = child {
280 if !leaf.kind.is_ignored() {
281 let text = source.get_text_in((current_offset..current_offset + leaf.length as usize).into());
282 return Ok(Expression::Name(text.to_string()));
283 }
284 }
285 current_offset += child.len() as usize;
286 }
287 Ok(Expression::Name("invalid_name".to_string()))
288 }
289 PythonSyntaxKind::BinOp => {
290 let mut left = None;
291 let mut operator = None;
292 let mut right = None;
293 let mut current_offset = offset;
294
295 for child in node.children() {
296 let child_len = child.len() as usize;
297 match child {
298 GreenTree::Node(n) => {
299 if !n.kind.is_ignored() {
300 let expr = self.build_expression(n, current_offset, source)?;
301 if left.is_none() {
302 left = Some(Box::new(expr));
303 }
304 else if right.is_none() {
305 right = Some(Box::new(expr));
306 }
307 }
308 }
309 GreenTree::Leaf(leaf) => {
310 if !leaf.kind.is_ignored() {
311 let op = match leaf.kind {
312 PythonSyntaxKind::Add | PythonSyntaxKind::Plus => Some(BinaryOperator::Add),
313 PythonSyntaxKind::Sub | PythonSyntaxKind::Minus => Some(BinaryOperator::Sub),
314 PythonSyntaxKind::Mult | PythonSyntaxKind::Star => Some(BinaryOperator::Mult),
315 PythonSyntaxKind::Div | PythonSyntaxKind::Slash => Some(BinaryOperator::Div),
316 PythonSyntaxKind::Mod | PythonSyntaxKind::Percent => Some(BinaryOperator::Mod),
317 PythonSyntaxKind::Pow | PythonSyntaxKind::DoubleStar => Some(BinaryOperator::Pow),
318 PythonSyntaxKind::LShift | PythonSyntaxKind::LeftShift => Some(BinaryOperator::LShift),
319 PythonSyntaxKind::RShift | PythonSyntaxKind::RightShift => Some(BinaryOperator::RShift),
320 PythonSyntaxKind::BitOr | PythonSyntaxKind::Pipe => Some(BinaryOperator::BitOr),
321 PythonSyntaxKind::BitXor | PythonSyntaxKind::Caret => Some(BinaryOperator::BitXor),
322 PythonSyntaxKind::BitAnd | PythonSyntaxKind::Ampersand => Some(BinaryOperator::BitAnd),
323 PythonSyntaxKind::FloorDiv | PythonSyntaxKind::DoubleSlash => Some(BinaryOperator::FloorDiv),
324 _ => None,
325 };
326 if let Some(op) = op {
327 operator = Some(op);
328 }
329 }
330 }
331 }
332 current_offset += child_len;
333 }
334
335 let l_is = left.is_some();
336 let op_is = operator.is_some();
337 let r_is = right.is_some();
338 if let (Some(l), Some(op), Some(r)) = (left, operator, right) {
339 Ok(Expression::BinaryOp { left: l, operator: op, right: r })
340 }
341 else {
342 println!("Warning: Invalid BinOp at {}, left={}, op={}, right={}", offset, l_is, op_is, r_is);
343 Ok(Expression::Name(format!("invalid_binop_at_{}", offset)))
344 }
345 }
346 PythonSyntaxKind::Call => {
347 let mut func = None;
348 let mut args = Vec::new();
349 let mut current_offset = offset;
350
351 for child in node.children() {
352 let child_len = child.len() as usize;
353 if let GreenTree::Node(n) = child {
354 if !n.kind.is_ignored() {
355 if func.is_none() {
356 func = Some(Box::new(self.build_expression(n, current_offset, source)?));
357 }
358 else {
359 args.push(self.build_expression(n, current_offset, source)?);
360 }
361 }
362 }
363 current_offset += child_len;
364 }
365
366 if let Some(f) = func { Ok(Expression::Call { func: f, args, keywords: Vec::new() }) } else { Ok(Expression::Name("invalid_call".to_string())) }
367 }
368 PythonSyntaxKind::Attribute => {
369 let mut value = None;
370 let mut attr = String::new();
371 let mut current_offset = offset;
372
373 for child in node.children() {
374 let child_len = child.len() as usize;
375 match child {
376 GreenTree::Node(n) => {
377 if !n.kind.is_ignored() {
378 value = Some(Box::new(self.build_expression(n, current_offset, source)?));
379 }
380 }
381 GreenTree::Leaf(leaf) if leaf.kind == PythonSyntaxKind::Identifier => {
382 attr = source.get_text_in((current_offset..current_offset + leaf.length as usize).into()).trim().to_string();
383 }
384 _ => {}
385 }
386 current_offset += child_len;
387 }
388
389 if let Some(v) = value { Ok(Expression::Attribute { value: v, attr }) } else { Ok(Expression::Name("invalid_attribute".to_string())) }
390 }
391 _ => Ok(Expression::Name("expr".to_string())),
392 }
393 }
394
395 fn build_parameters(&self, node: &GreenNode<PythonLanguage>, offset: usize, source: &SourceText) -> Result<Vec<Parameter>, OakError> {
396 let mut parameters = Vec::new();
397 let mut current_offset = offset;
398
399 for child in node.children() {
400 let child_len = child.len() as usize;
401 if let GreenTree::Node(n) = child {
402 if n.kind == PythonSyntaxKind::Arg {
403 parameters.push(self.build_parameter(n, current_offset, source)?);
404 }
405 }
406 current_offset += child_len;
407 }
408 Ok(parameters)
409 }
410
411 fn build_parameter(&self, node: &GreenNode<PythonLanguage>, offset: usize, source: &SourceText) -> Result<Parameter, OakError> {
412 let mut name = String::new();
413 let mut current_offset = offset;
414
415 for child in node.children() {
416 let child_len = child.len() as usize;
417 if let GreenTree::Leaf(leaf) = child {
418 if leaf.kind == PythonSyntaxKind::Identifier {
419 name = source.get_text_in((current_offset..current_offset + leaf.length as usize).into()).trim().to_string();
420 }
421 }
422 current_offset += child_len;
423 }
424
425 Ok(Parameter { name, annotation: None, default: None })
426 }
427}