Skip to main content

oak_python/parser/
mod.rs

1use crate::{kind::PythonSyntaxKind, language::PythonLanguage, lexer::PythonLexer};
2use oak_core::{
3    OakError, TokenType,
4    parser::{
5        ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer,
6        pratt::{Associativity, Pratt, PrattParser},
7    },
8    source::{Source, TextEdit},
9    tree::GreenNode,
10};
11
12pub(crate) type State<'a, S> = ParserState<'a, PythonLanguage, S>;
13
14pub struct PythonParser<'config> {
15    pub(crate) config: &'config PythonLanguage,
16}
17
18impl<'config> PythonParser<'config> {
19    pub fn new(config: &'config PythonLanguage) -> Self {
20        Self { config }
21    }
22
23    fn advance_until<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, kind: PythonSyntaxKind) {
24        while state.not_at_end() && !state.at(kind) {
25            state.advance();
26        }
27    }
28
29    fn skip_trivia<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
30        while state.not_at_end() {
31            if let Some(kind) = state.peek_kind() {
32                if kind.is_ignored() {
33                    state.bump();
34                    continue;
35                }
36            }
37            break;
38        }
39    }
40
41    fn parse_expression<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, min_precedence: u8) -> &'a GreenNode<'a, PythonLanguage> {
42        let node = PrattParser::parse(state, min_precedence, self);
43        state.push_child(node);
44        node
45    }
46
47    pub(crate) fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
48        use crate::kind::PythonSyntaxKind::*;
49        self.skip_trivia(state);
50
51        // Skip leading newlines at top level
52        while state.eat(Newline) {
53            self.skip_trivia(state);
54        }
55
56        if !state.not_at_end() || state.at(Dedent) {
57            return Ok(());
58        }
59
60        if state.at(DefKeyword) {
61            state.incremental_node(FunctionDef.into(), |state| self.parse_function_def_body(state))
62        }
63        else if state.at(ClassKeyword) {
64            state.incremental_node(ClassDef.into(), |state| self.parse_class_def_body(state))
65        }
66        else if state.at(IfKeyword) {
67            state.incremental_node(If.into(), |state| self.parse_if_stmt_body(state))
68        }
69        else if state.at(WhileKeyword) {
70            state.incremental_node(While.into(), |state| self.parse_while_stmt_body(state))
71        }
72        else if state.at(ForKeyword) {
73            state.incremental_node(For.into(), |state| self.parse_for_stmt_body(state))
74        }
75        else if state.eat(ReturnKeyword) {
76            let cp = state.checkpoint();
77            self.parse_return_stmt_body(state)?;
78            state.finish_at(cp, Return.into());
79            state.eat(Newline);
80            Ok(())
81        }
82        else if state.at(ImportKeyword) || state.at(FromKeyword) {
83            state.incremental_node(Import.into(), |state| self.parse_import_stmt_body(state))
84        }
85        else if state.eat(PassKeyword) {
86            state.incremental_node(Pass.into(), |state| {
87                self.skip_trivia(state);
88                state.eat(Newline);
89                Ok(())
90            })
91        }
92        else if state.eat(BreakKeyword) {
93            state.incremental_node(Break.into(), |state| {
94                self.skip_trivia(state);
95                state.eat(Newline);
96                Ok(())
97            })
98        }
99        else if state.eat(ContinueKeyword) {
100            state.incremental_node(Continue.into(), |state| {
101                self.skip_trivia(state);
102                state.eat(Newline);
103                Ok(())
104            })
105        }
106        else {
107            let cp = state.checkpoint();
108            self.parse_expression(state, 0);
109            state.finish_at(cp, Expr.into());
110            self.skip_trivia(state);
111            state.eat(Newline);
112            Ok(())
113        }
114    }
115
116    fn parse_function_def_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
117        use crate::kind::PythonSyntaxKind::*;
118        state.expect(DefKeyword).ok();
119        self.skip_trivia(state);
120        if !state.expect(Identifier).is_ok() {
121            // If identifier is missing, we might want to advance to avoid infinite loop
122            // but for now let's just let it be.
123        }
124        self.skip_trivia(state);
125        state.expect(LeftParen).ok();
126        state.incremental_node(Arguments.into(), |state| {
127            while state.not_at_end() && !state.at(RightParen) {
128                self.skip_trivia(state);
129                if state.at(RightParen) {
130                    break;
131                }
132                state.incremental_node(Arg.into(), |state| {
133                    state.expect(Identifier).ok();
134                    self.skip_trivia(state);
135                    if state.eat(Colon) {
136                        self.skip_trivia(state);
137                        // Consume until comma or right paren for simple type annotation
138                        while state.not_at_end() && !state.at(Comma) && !state.at(RightParen) {
139                            state.advance();
140                        }
141                    }
142                    Ok(())
143                })?;
144                self.skip_trivia(state);
145                if !state.eat(Comma) {
146                    break;
147                }
148            }
149            Ok(())
150        })?;
151        self.skip_trivia(state);
152        state.expect(RightParen).ok();
153        self.skip_trivia(state);
154        if state.eat(Arrow) {
155            self.skip_trivia(state);
156            self.advance_until(state, Colon);
157        }
158        state.expect(Colon).ok();
159        self.parse_suite(state)?;
160        Ok(())
161    }
162
163    fn parse_class_def_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
164        use crate::kind::PythonSyntaxKind::*;
165        state.expect(ClassKeyword).ok();
166        self.skip_trivia(state);
167        state.expect(Identifier).ok();
168        self.skip_trivia(state);
169        if state.eat(LeftParen) {
170            self.skip_trivia(state);
171            self.advance_until(state, RightParen);
172            state.expect(RightParen).ok();
173        }
174        self.skip_trivia(state);
175        state.expect(Colon).ok();
176        self.parse_suite(state)?;
177        Ok(())
178    }
179
180    fn parse_if_stmt_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
181        use crate::kind::PythonSyntaxKind::*;
182        state.expect(IfKeyword).ok();
183        self.skip_trivia(state);
184        PrattParser::parse(state, 0, self);
185        self.skip_trivia(state);
186        state.expect(Colon).ok();
187        self.parse_suite(state)?;
188        self.skip_trivia(state);
189        while state.eat(ElifKeyword) {
190            self.skip_trivia(state);
191            PrattParser::parse(state, 0, self);
192            self.skip_trivia(state);
193            state.expect(Colon).ok();
194            self.parse_suite(state)?;
195            self.skip_trivia(state);
196        }
197        if state.eat(ElseKeyword) {
198            self.skip_trivia(state);
199            state.expect(Colon).ok();
200            self.parse_suite(state)?;
201        }
202        Ok(())
203    }
204
205    fn parse_while_stmt_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
206        use crate::kind::PythonSyntaxKind::*;
207        state.expect(WhileKeyword).ok();
208        self.skip_trivia(state);
209        PrattParser::parse(state, 0, self);
210        self.skip_trivia(state);
211        state.expect(Colon).ok();
212        self.parse_suite(state)?;
213        self.skip_trivia(state);
214        if state.eat(ElseKeyword) {
215            self.skip_trivia(state);
216            state.expect(Colon).ok();
217            self.parse_suite(state)?;
218        }
219        Ok(())
220    }
221
222    fn parse_for_stmt_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
223        use crate::kind::PythonSyntaxKind::*;
224        state.expect(ForKeyword).ok();
225        self.skip_trivia(state);
226        PrattParser::parse(state, 0, self);
227        self.skip_trivia(state);
228        state.expect(InKeyword).ok();
229        self.skip_trivia(state);
230        PrattParser::parse(state, 0, self);
231        self.skip_trivia(state);
232        state.expect(Colon).ok();
233        self.parse_suite(state)?;
234        self.skip_trivia(state);
235        if state.eat(ElseKeyword) {
236            self.skip_trivia(state);
237            state.expect(Colon).ok();
238            self.parse_suite(state)?;
239        }
240        Ok(())
241    }
242
243    fn parse_return_stmt_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
244        use crate::kind::PythonSyntaxKind::*;
245        self.skip_trivia(state);
246        if state.not_at_end() && !state.at(Newline) && !state.at(Semicolon) {
247            self.parse_expression(state, 0);
248        }
249        Ok(())
250    }
251
252    fn parse_import_stmt_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
253        use crate::kind::PythonSyntaxKind::*;
254        if state.eat(ImportKeyword) {
255            self.advance_until(state, Newline);
256        }
257        else if state.eat(FromKeyword) {
258            self.advance_until(state, Newline);
259        }
260        Ok(())
261    }
262
263    fn parse_suite<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
264        use crate::kind::PythonSyntaxKind::*;
265        let cp = state.checkpoint();
266        self.skip_trivia(state);
267        if state.eat(Newline) {
268            self.skip_trivia(state);
269            state.expect(Indent).ok();
270            while state.not_at_end() && !state.at(Dedent) {
271                self.parse_statement(state)?;
272                self.skip_trivia(state);
273            }
274            state.expect(Dedent).ok();
275        }
276        else {
277            self.parse_statement(state)?;
278        }
279        state.finish_at(cp, Suite.into());
280        Ok(())
281    }
282
283    fn parse_root_internal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<&'a GreenNode<'a, PythonLanguage>, OakError> {
284        let checkpoint = state.checkpoint();
285
286        while state.not_at_end() {
287            self.parse_statement(state)?;
288        }
289        self.skip_trivia(state);
290
291        Ok(state.finish_at(checkpoint, PythonSyntaxKind::ExpressionModule.into()))
292    }
293}
294
295impl<'config> Pratt<PythonLanguage> for PythonParser<'config> {
296    fn primary<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, PythonLanguage> {
297        use crate::kind::PythonSyntaxKind::*;
298        self.skip_trivia(state);
299        let cp = state.checkpoint();
300        let kind = state.peek_kind();
301        match kind {
302            Some(Identifier) => {
303                state.bump();
304                state.finish_at(cp, Name.into())
305            }
306            Some(Number) | Some(String) | Some(Bytes) | Some(FString) | Some(TrueKeyword) | Some(FalseKeyword) | Some(NoneKeyword) => {
307                state.bump();
308                state.finish_at(cp, Constant.into())
309            }
310            Some(LeftParen) => {
311                state.bump();
312                let cp_inner = state.checkpoint();
313                let inner = PrattParser::parse(state, 0, self);
314                state.push_child(inner);
315                state.finish_at(cp_inner, Expr.into());
316                self.skip_trivia(state);
317                state.expect(RightParen).ok();
318                state.finish_at(cp, Tuple.into())
319            }
320            Some(LeftBracket) => {
321                state.bump();
322                self.advance_until(state, RightBracket);
323                state.expect(RightBracket).ok();
324                state.finish_at(cp, List.into())
325            }
326            Some(LeftBrace) => {
327                state.bump();
328                self.advance_until(state, RightBrace);
329                state.expect(RightBrace).ok();
330                state.finish_at(cp, Dict.into())
331            }
332            _ => {
333                state.bump();
334                state.finish_at(cp, Error.into())
335            }
336        }
337    }
338
339    fn prefix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, PythonLanguage> {
340        use crate::kind::PythonSyntaxKind::*;
341        self.skip_trivia(state);
342        let kind = state.peek_kind().expect("Expected token in prefix");
343        match kind {
344            Plus | Minus | Tilde | NotKeyword => {
345                let cp = state.checkpoint();
346                state.expect(kind).ok();
347                let right = PrattParser::parse(state, 14, self);
348                state.push_child(right);
349                state.finish_at(cp, UnaryOp.into())
350            }
351            _ => self.primary(state),
352        }
353    }
354
355    fn infix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, _left: &'a GreenNode<'a, PythonLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, PythonLanguage>> {
356        use crate::kind::PythonSyntaxKind::*;
357
358        // Peek kind without consuming trivia yet
359        let mut lookahead = 0;
360        let mut kind = None;
361        while let Some(k) = state.peek_kind_at(lookahead) {
362            if k.is_ignored() {
363                lookahead += 1;
364                continue;
365            }
366            kind = Some(k);
367            break;
368        }
369
370        let kind = kind?;
371
372        let (prec, assoc) = match kind {
373            Assign | PlusAssign | MinusAssign | StarAssign | DoubleStarAssign | SlashAssign | DoubleSlashAssign | PercentAssign | AtAssign | AmpersandAssign | PipeAssign | CaretAssign | LeftShiftAssign | RightShiftAssign => (1, Associativity::Right),
374            OrKeyword => (2, Associativity::Left),
375            AndKeyword => (3, Associativity::Left),
376            NotKeyword => (4, Associativity::Left),
377            Less | Greater | Equal | NotEqual | LessEqual | GreaterEqual | InKeyword | IsKeyword => (5, Associativity::Left),
378            Pipe => (6, Associativity::Left),
379            Caret => (7, Associativity::Left),
380            Ampersand => (8, Associativity::Left),
381            LeftShift | RightShift => (9, Associativity::Left),
382            Plus | Minus => (10, Associativity::Left),
383            Star | Slash | DoubleSlash | Percent | At => (11, Associativity::Left),
384            DoubleStar => (13, Associativity::Right),
385            Dot | LeftParen | LeftBracket => (15, Associativity::Left),
386            _ => return None,
387        };
388
389        if prec < min_precedence {
390            return None;
391        }
392
393        match kind {
394            LeftParen => {
395                let cp = (0, state.sink.checkpoint() - 1);
396                self.skip_trivia(state);
397                state.expect(LeftParen).ok();
398                self.advance_until(state, RightParen);
399                state.expect(RightParen).ok();
400                Some(state.finish_at(cp, Call.into()))
401            }
402            LeftBracket => {
403                let cp = (0, state.sink.checkpoint() - 1);
404                self.skip_trivia(state);
405                state.expect(LeftBracket).ok();
406                self.advance_until(state, RightBracket);
407                state.expect(RightBracket).ok();
408                Some(state.finish_at(cp, Subscript.into()))
409            }
410            Dot => {
411                let cp = (0, state.sink.checkpoint() - 1);
412                self.skip_trivia(state);
413                state.expect(Dot).ok();
414                self.skip_trivia(state);
415                state.expect(Identifier).ok();
416                Some(state.finish_at(cp, Attribute.into()))
417            }
418            _ => {
419                let result_kind = if prec == 1 {
420                    AssignStmt
421                }
422                else if prec <= 3 {
423                    BoolOp
424                }
425                else if prec == 5 {
426                    Compare
427                }
428                else {
429                    BinOp
430                };
431
432                let cp = (0, state.sink.checkpoint() - 1);
433                self.skip_trivia(state);
434                state.expect(kind).ok();
435
436                let next_prec = match assoc {
437                    Associativity::Left => prec + 1,
438                    Associativity::Right => prec,
439                    Associativity::None => prec + 1,
440                };
441
442                PrattParser::parse(state, next_prec, self);
443                Some(state.finish_at(cp, result_kind.into()))
444            }
445        }
446    }
447}
448
449impl<'config> Parser<PythonLanguage> for PythonParser<'config> {
450    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<PythonLanguage>) -> ParseOutput<'a, PythonLanguage> {
451        let lexer = PythonLexer::new(self.config);
452        parse_with_lexer(&lexer, text, edits, cache, |state| self.parse_root_internal(state))
453    }
454}