1use crate::{kind::PythonSyntaxKind, language::PythonLanguage, lexer::PythonLexer};
2use oak_core::{
3 OakError, TokenType,
4 parser::{
5 ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer,
6 pratt::{Associativity, Pratt, PrattParser},
7 },
8 source::{Source, TextEdit},
9 tree::GreenNode,
10};
11
12pub(crate) type State<'a, S> = ParserState<'a, PythonLanguage, S>;
13
14pub struct PythonParser<'config> {
15 pub(crate) config: &'config PythonLanguage,
16}
17
18impl<'config> PythonParser<'config> {
19 pub fn new(config: &'config PythonLanguage) -> Self {
20 Self { config }
21 }
22
23 fn advance_until<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, kind: PythonSyntaxKind) {
24 while state.not_at_end() && !state.at(kind) {
25 state.advance();
26 }
27 }
28
29 fn skip_trivia<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
30 while state.not_at_end() {
31 if let Some(kind) = state.peek_kind() {
32 if kind.is_ignored() {
33 state.bump();
34 continue;
35 }
36 }
37 break;
38 }
39 }
40
41 fn parse_expression<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, min_precedence: u8) -> &'a GreenNode<'a, PythonLanguage> {
42 let node = PrattParser::parse(state, min_precedence, self);
43 state.push_child(node);
44 node
45 }
46
47 pub(crate) fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
48 use crate::kind::PythonSyntaxKind::*;
49 self.skip_trivia(state);
50
51 while state.eat(Newline) {
53 self.skip_trivia(state);
54 }
55
56 if !state.not_at_end() || state.at(Dedent) {
57 return Ok(());
58 }
59
60 if state.at(DefKeyword) {
61 state.incremental_node(FunctionDef.into(), |state| self.parse_function_def_body(state))
62 }
63 else if state.at(ClassKeyword) {
64 state.incremental_node(ClassDef.into(), |state| self.parse_class_def_body(state))
65 }
66 else if state.at(IfKeyword) {
67 state.incremental_node(If.into(), |state| self.parse_if_stmt_body(state))
68 }
69 else if state.at(WhileKeyword) {
70 state.incremental_node(While.into(), |state| self.parse_while_stmt_body(state))
71 }
72 else if state.at(ForKeyword) {
73 state.incremental_node(For.into(), |state| self.parse_for_stmt_body(state))
74 }
75 else if state.eat(ReturnKeyword) {
76 let cp = state.checkpoint();
77 self.parse_return_stmt_body(state)?;
78 state.finish_at(cp, Return.into());
79 state.eat(Newline);
80 Ok(())
81 }
82 else if state.at(ImportKeyword) || state.at(FromKeyword) {
83 state.incremental_node(Import.into(), |state| self.parse_import_stmt_body(state))
84 }
85 else if state.eat(PassKeyword) {
86 state.incremental_node(Pass.into(), |state| {
87 self.skip_trivia(state);
88 state.eat(Newline);
89 Ok(())
90 })
91 }
92 else if state.eat(BreakKeyword) {
93 state.incremental_node(Break.into(), |state| {
94 self.skip_trivia(state);
95 state.eat(Newline);
96 Ok(())
97 })
98 }
99 else if state.eat(ContinueKeyword) {
100 state.incremental_node(Continue.into(), |state| {
101 self.skip_trivia(state);
102 state.eat(Newline);
103 Ok(())
104 })
105 }
106 else {
107 let cp = state.checkpoint();
108 self.parse_expression(state, 0);
109 state.finish_at(cp, Expr.into());
110 self.skip_trivia(state);
111 state.eat(Newline);
112 Ok(())
113 }
114 }
115
116 fn parse_function_def_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
117 use crate::kind::PythonSyntaxKind::*;
118 state.expect(DefKeyword).ok();
119 self.skip_trivia(state);
120 if !state.expect(Identifier).is_ok() {
121 }
124 self.skip_trivia(state);
125 state.expect(LeftParen).ok();
126 state.incremental_node(Arguments.into(), |state| {
127 while state.not_at_end() && !state.at(RightParen) {
128 self.skip_trivia(state);
129 if state.at(RightParen) {
130 break;
131 }
132 state.incremental_node(Arg.into(), |state| {
133 state.expect(Identifier).ok();
134 self.skip_trivia(state);
135 if state.eat(Colon) {
136 self.skip_trivia(state);
137 while state.not_at_end() && !state.at(Comma) && !state.at(RightParen) {
139 state.advance();
140 }
141 }
142 Ok(())
143 })?;
144 self.skip_trivia(state);
145 if !state.eat(Comma) {
146 break;
147 }
148 }
149 Ok(())
150 })?;
151 self.skip_trivia(state);
152 state.expect(RightParen).ok();
153 self.skip_trivia(state);
154 if state.eat(Arrow) {
155 self.skip_trivia(state);
156 self.advance_until(state, Colon);
157 }
158 state.expect(Colon).ok();
159 self.parse_suite(state)?;
160 Ok(())
161 }
162
163 fn parse_class_def_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
164 use crate::kind::PythonSyntaxKind::*;
165 state.expect(ClassKeyword).ok();
166 self.skip_trivia(state);
167 state.expect(Identifier).ok();
168 self.skip_trivia(state);
169 if state.eat(LeftParen) {
170 self.skip_trivia(state);
171 self.advance_until(state, RightParen);
172 state.expect(RightParen).ok();
173 }
174 self.skip_trivia(state);
175 state.expect(Colon).ok();
176 self.parse_suite(state)?;
177 Ok(())
178 }
179
180 fn parse_if_stmt_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
181 use crate::kind::PythonSyntaxKind::*;
182 state.expect(IfKeyword).ok();
183 self.skip_trivia(state);
184 PrattParser::parse(state, 0, self);
185 self.skip_trivia(state);
186 state.expect(Colon).ok();
187 self.parse_suite(state)?;
188 self.skip_trivia(state);
189 while state.eat(ElifKeyword) {
190 self.skip_trivia(state);
191 PrattParser::parse(state, 0, self);
192 self.skip_trivia(state);
193 state.expect(Colon).ok();
194 self.parse_suite(state)?;
195 self.skip_trivia(state);
196 }
197 if state.eat(ElseKeyword) {
198 self.skip_trivia(state);
199 state.expect(Colon).ok();
200 self.parse_suite(state)?;
201 }
202 Ok(())
203 }
204
205 fn parse_while_stmt_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
206 use crate::kind::PythonSyntaxKind::*;
207 state.expect(WhileKeyword).ok();
208 self.skip_trivia(state);
209 PrattParser::parse(state, 0, self);
210 self.skip_trivia(state);
211 state.expect(Colon).ok();
212 self.parse_suite(state)?;
213 self.skip_trivia(state);
214 if state.eat(ElseKeyword) {
215 self.skip_trivia(state);
216 state.expect(Colon).ok();
217 self.parse_suite(state)?;
218 }
219 Ok(())
220 }
221
222 fn parse_for_stmt_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
223 use crate::kind::PythonSyntaxKind::*;
224 state.expect(ForKeyword).ok();
225 self.skip_trivia(state);
226 PrattParser::parse(state, 0, self);
227 self.skip_trivia(state);
228 state.expect(InKeyword).ok();
229 self.skip_trivia(state);
230 PrattParser::parse(state, 0, self);
231 self.skip_trivia(state);
232 state.expect(Colon).ok();
233 self.parse_suite(state)?;
234 self.skip_trivia(state);
235 if state.eat(ElseKeyword) {
236 self.skip_trivia(state);
237 state.expect(Colon).ok();
238 self.parse_suite(state)?;
239 }
240 Ok(())
241 }
242
243 fn parse_return_stmt_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
244 use crate::kind::PythonSyntaxKind::*;
245 self.skip_trivia(state);
246 if state.not_at_end() && !state.at(Newline) && !state.at(Semicolon) {
247 self.parse_expression(state, 0);
248 }
249 Ok(())
250 }
251
252 fn parse_import_stmt_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
253 use crate::kind::PythonSyntaxKind::*;
254 if state.eat(ImportKeyword) {
255 self.advance_until(state, Newline);
256 }
257 else if state.eat(FromKeyword) {
258 self.advance_until(state, Newline);
259 }
260 Ok(())
261 }
262
263 fn parse_suite<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
264 use crate::kind::PythonSyntaxKind::*;
265 let cp = state.checkpoint();
266 self.skip_trivia(state);
267 if state.eat(Newline) {
268 self.skip_trivia(state);
269 state.expect(Indent).ok();
270 while state.not_at_end() && !state.at(Dedent) {
271 self.parse_statement(state)?;
272 self.skip_trivia(state);
273 }
274 state.expect(Dedent).ok();
275 }
276 else {
277 self.parse_statement(state)?;
278 }
279 state.finish_at(cp, Suite.into());
280 Ok(())
281 }
282
283 fn parse_root_internal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<&'a GreenNode<'a, PythonLanguage>, OakError> {
284 let checkpoint = state.checkpoint();
285
286 while state.not_at_end() {
287 self.parse_statement(state)?;
288 }
289 self.skip_trivia(state);
290
291 Ok(state.finish_at(checkpoint, PythonSyntaxKind::ExpressionModule.into()))
292 }
293}
294
295impl<'config> Pratt<PythonLanguage> for PythonParser<'config> {
296 fn primary<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, PythonLanguage> {
297 use crate::kind::PythonSyntaxKind::*;
298 self.skip_trivia(state);
299 let cp = state.checkpoint();
300 let kind = state.peek_kind();
301 match kind {
302 Some(Identifier) => {
303 state.bump();
304 state.finish_at(cp, Name.into())
305 }
306 Some(Number) | Some(String) | Some(Bytes) | Some(FString) | Some(TrueKeyword) | Some(FalseKeyword) | Some(NoneKeyword) => {
307 state.bump();
308 state.finish_at(cp, Constant.into())
309 }
310 Some(LeftParen) => {
311 state.bump();
312 let cp_inner = state.checkpoint();
313 let inner = PrattParser::parse(state, 0, self);
314 state.push_child(inner);
315 state.finish_at(cp_inner, Expr.into());
316 self.skip_trivia(state);
317 state.expect(RightParen).ok();
318 state.finish_at(cp, Tuple.into())
319 }
320 Some(LeftBracket) => {
321 state.bump();
322 self.advance_until(state, RightBracket);
323 state.expect(RightBracket).ok();
324 state.finish_at(cp, List.into())
325 }
326 Some(LeftBrace) => {
327 state.bump();
328 self.advance_until(state, RightBrace);
329 state.expect(RightBrace).ok();
330 state.finish_at(cp, Dict.into())
331 }
332 _ => {
333 state.bump();
334 state.finish_at(cp, Error.into())
335 }
336 }
337 }
338
339 fn prefix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, PythonLanguage> {
340 use crate::kind::PythonSyntaxKind::*;
341 self.skip_trivia(state);
342 let kind = state.peek_kind().expect("Expected token in prefix");
343 match kind {
344 Plus | Minus | Tilde | NotKeyword => {
345 let cp = state.checkpoint();
346 state.expect(kind).ok();
347 let right = PrattParser::parse(state, 14, self);
348 state.push_child(right);
349 state.finish_at(cp, UnaryOp.into())
350 }
351 _ => self.primary(state),
352 }
353 }
354
355 fn infix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, _left: &'a GreenNode<'a, PythonLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, PythonLanguage>> {
356 use crate::kind::PythonSyntaxKind::*;
357
358 let mut lookahead = 0;
360 let mut kind = None;
361 while let Some(k) = state.peek_kind_at(lookahead) {
362 if k.is_ignored() {
363 lookahead += 1;
364 continue;
365 }
366 kind = Some(k);
367 break;
368 }
369
370 let kind = kind?;
371
372 let (prec, assoc) = match kind {
373 Assign | PlusAssign | MinusAssign | StarAssign | DoubleStarAssign | SlashAssign | DoubleSlashAssign | PercentAssign | AtAssign | AmpersandAssign | PipeAssign | CaretAssign | LeftShiftAssign | RightShiftAssign => (1, Associativity::Right),
374 OrKeyword => (2, Associativity::Left),
375 AndKeyword => (3, Associativity::Left),
376 NotKeyword => (4, Associativity::Left),
377 Less | Greater | Equal | NotEqual | LessEqual | GreaterEqual | InKeyword | IsKeyword => (5, Associativity::Left),
378 Pipe => (6, Associativity::Left),
379 Caret => (7, Associativity::Left),
380 Ampersand => (8, Associativity::Left),
381 LeftShift | RightShift => (9, Associativity::Left),
382 Plus | Minus => (10, Associativity::Left),
383 Star | Slash | DoubleSlash | Percent | At => (11, Associativity::Left),
384 DoubleStar => (13, Associativity::Right),
385 Dot | LeftParen | LeftBracket => (15, Associativity::Left),
386 _ => return None,
387 };
388
389 if prec < min_precedence {
390 return None;
391 }
392
393 match kind {
394 LeftParen => {
395 let cp = (0, state.sink.checkpoint() - 1);
396 self.skip_trivia(state);
397 state.expect(LeftParen).ok();
398 self.advance_until(state, RightParen);
399 state.expect(RightParen).ok();
400 Some(state.finish_at(cp, Call.into()))
401 }
402 LeftBracket => {
403 let cp = (0, state.sink.checkpoint() - 1);
404 self.skip_trivia(state);
405 state.expect(LeftBracket).ok();
406 self.advance_until(state, RightBracket);
407 state.expect(RightBracket).ok();
408 Some(state.finish_at(cp, Subscript.into()))
409 }
410 Dot => {
411 let cp = (0, state.sink.checkpoint() - 1);
412 self.skip_trivia(state);
413 state.expect(Dot).ok();
414 self.skip_trivia(state);
415 state.expect(Identifier).ok();
416 Some(state.finish_at(cp, Attribute.into()))
417 }
418 _ => {
419 let result_kind = if prec == 1 {
420 AssignStmt
421 }
422 else if prec <= 3 {
423 BoolOp
424 }
425 else if prec == 5 {
426 Compare
427 }
428 else {
429 BinOp
430 };
431
432 let cp = (0, state.sink.checkpoint() - 1);
433 self.skip_trivia(state);
434 state.expect(kind).ok();
435
436 let next_prec = match assoc {
437 Associativity::Left => prec + 1,
438 Associativity::Right => prec,
439 Associativity::None => prec + 1,
440 };
441
442 PrattParser::parse(state, next_prec, self);
443 Some(state.finish_at(cp, result_kind.into()))
444 }
445 }
446 }
447}
448
449impl<'config> Parser<PythonLanguage> for PythonParser<'config> {
450 fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<PythonLanguage>) -> ParseOutput<'a, PythonLanguage> {
451 let lexer = PythonLexer::new(self.config);
452 parse_with_lexer(&lexer, text, edits, cache, |state| self.parse_root_internal(state))
453 }
454}