Skip to main content

oak_rust/parser/
mod.rs

1use crate::{RustLanguage, lexer::RustLexer};
2use oak_core::{
3    GreenNode, OakError,
4    parser::{Associativity, ParseCache, ParseOutput, Parser, ParserState, Pratt, PrattParser, binary, parse_with_lexer, unary},
5    source::{Source, TextEdit},
6};
7
8mod element_type;
9
10pub use self::element_type::{RustElement, RustElementType};
11
12/// A parser for the Rust programming language.
13#[derive(Clone)]
14pub struct RustParser<'config> {
15    /// Reference to the Rust language configuration
16    #[allow(dead_code)]
17    config: &'config RustLanguage,
18}
19
20impl<'config> RustParser<'config> {
21    pub fn new(config: &'config RustLanguage) -> Self {
22        Self { config }
23    }
24}
25
26impl<'config> Pratt<RustLanguage> for RustParser<'config> {
27    fn primary<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> &'a GreenNode<'a, RustLanguage> {
28        let cp = state.checkpoint();
29        match state.peek_kind() {
30            Some(crate::lexer::RustTokenType::Identifier) => {
31                state.bump();
32                state.finish_at(cp, RustElementType::IdentifierExpression.into())
33            }
34            Some(k) if k.is_literal() => {
35                state.bump();
36                state.finish_at(cp, RustElementType::LiteralExpression.into())
37            }
38            Some(crate::lexer::RustTokenType::LeftParen) => {
39                state.bump();
40                PrattParser::parse(state, 0, self);
41                state.expect(crate::lexer::RustTokenType::RightParen).ok();
42                state.finish_at(cp, RustElementType::ParenthesizedExpression.into())
43            }
44            _ => {
45                state.bump();
46                state.finish_at(cp, RustElementType::Error.into())
47            }
48        }
49    }
50
51    fn prefix<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> &'a GreenNode<'a, RustLanguage> {
52        use crate::{lexer::RustTokenType::*, parser::RustElementType::*};
53        let kind = match state.peek_kind() {
54            Some(k) => k,
55            None => return self.primary(state),
56        };
57
58        match kind {
59            Minus | Bang | Ampersand | Star => unary(state, kind, 13, UnaryExpression.into(), |s, p| PrattParser::parse(s, p, self)),
60            _ => self.primary(state),
61        }
62    }
63
64    fn infix<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>, left: &'a GreenNode<'a, RustLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, RustLanguage>> {
65        use crate::{lexer::RustTokenType::*, parser::RustElementType::*};
66        let kind = state.peek_kind()?;
67
68        let (prec, assoc) = match kind {
69            Eq | PlusEq | MinusEq | StarEq | SlashEq | PercentEq | AndEq | OrEq | CaretEq | ShlEq | ShrEq => (1, Associativity::Right),
70            DotDot | DotDotEq => (2, Associativity::Left),
71            OrOr => (3, Associativity::Left),
72            AndAnd => (4, Associativity::Left),
73            EqEq | Ne => (5, Associativity::Left),
74            Lt | Le | Gt | Ge => (6, Associativity::Left),
75            Pipe => (7, Associativity::Left),
76            Caret => (8, Associativity::Left),
77            Ampersand => (9, Associativity::Left),
78            Shl | Shr => (10, Associativity::Left),
79            Plus | Minus => (11, Associativity::Left),
80            Star | Slash | Percent => (12, Associativity::Left),
81            LeftParen | LeftBracket | Dot => (14, Associativity::Left),
82            _ => return None,
83        };
84
85        if prec < min_precedence {
86            return None;
87        }
88
89        match kind {
90            LeftParen => {
91                let cp = state.checkpoint();
92                state.push_child(left);
93                state.expect(LeftParen).ok();
94                if !state.at(RightParen) {
95                    loop {
96                        PrattParser::parse(state, 0, self);
97                        if !state.eat(Comma) {
98                            break;
99                        }
100                    }
101                }
102                state.expect(RightParen).ok();
103                Some(state.finish_at(cp, CallExpression.into()))
104            }
105            LeftBracket => {
106                let cp = state.checkpoint();
107                state.push_child(left);
108                state.expect(LeftBracket).ok();
109                PrattParser::parse(state, 0, self);
110                state.expect(RightBracket).ok();
111                Some(state.finish_at(cp, IndexExpression.into()))
112            }
113            Dot => {
114                let cp = state.checkpoint();
115                state.push_child(left);
116                state.expect(Dot).ok();
117                state.expect(crate::lexer::RustTokenType::Identifier).ok();
118                Some(state.finish_at(cp, FieldExpression.into()))
119            }
120            _ => Some(binary(state, left, kind, prec, assoc, BinaryExpression.into(), |s, p| PrattParser::parse(s, p, self))),
121        }
122    }
123}
124
125impl<'config> Parser<RustLanguage> for RustParser<'config> {
126    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<RustLanguage>) -> ParseOutput<'a, RustLanguage> {
127        let lexer = RustLexer::new(self.config);
128        parse_with_lexer(&lexer, text, edits, cache, |state| self.parse_source_file(state))
129    }
130}
131
132impl<'config> RustParser<'config> {
133    pub(crate) fn parse_source_file<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<&'a GreenNode<'a, RustLanguage>, OakError> {
134        let cp = state.checkpoint();
135        while state.not_at_end() {
136            if state.current().map(|t| t.kind.is_ignored()).unwrap_or(false) {
137                state.advance();
138                continue;
139            }
140            self.parse_statement(state)?;
141        }
142        let root = state.finish_at(cp, RustElementType::SourceFile.into());
143        Ok(root)
144    }
145
146    fn parse_statement<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
147        use crate::{lexer::RustTokenType, parser::RustElementType::*};
148
149        let kind = match state.peek_kind() {
150            Some(RustTokenType::Fn) => Some(Function),
151            Some(RustTokenType::Use) => Some(UseItem),
152            Some(RustTokenType::Mod) => Some(ModuleItem),
153            Some(RustTokenType::Struct) => Some(StructItem),
154            Some(RustTokenType::Enum) => Some(EnumItem),
155            Some(RustTokenType::Let) => Some(LetStatement),
156            Some(RustTokenType::If) => Some(IfExpression),
157            Some(RustTokenType::While) => Some(WhileExpression),
158            Some(RustTokenType::Loop) => Some(LoopExpression),
159            Some(RustTokenType::For) => Some(ForExpression),
160            Some(RustTokenType::Return) => Some(ReturnStatement),
161            Some(RustTokenType::LeftBrace) => Some(Block),
162            _ => None,
163        };
164
165        if let Some(k) = kind {
166            state.incremental_node(k.into(), |state| match k {
167                Function => self.parse_function_body(state),
168                UseItem => self.parse_use_item_body(state),
169                ModuleItem => self.parse_mod_item_body(state),
170                StructItem => self.parse_struct_item_body(state),
171                EnumItem => self.parse_enum_item_body(state),
172                LetStatement => self.parse_let_statement_body(state),
173                IfExpression => self.parse_if_expression_body(state),
174                WhileExpression => self.parse_while_expression_body(state),
175                LoopExpression => self.parse_loop_expression_body(state),
176                ForExpression => self.parse_for_expression_body(state),
177                ReturnStatement => self.parse_return_statement_body(state),
178                Block => self.parse_block_body(state),
179                _ => unreachable!(),
180            })
181        }
182        else {
183            PrattParser::parse(state, 0, self);
184            state.eat(RustTokenType::Semicolon);
185            Ok(())
186        }
187    }
188
189    fn parse_function_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
190        self.parse_function(state)
191    }
192
193    fn parse_use_item_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
194        self.parse_use_item(state)
195    }
196
197    fn parse_mod_item_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
198        self.parse_mod_item(state)
199    }
200
201    fn parse_struct_item_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
202        self.parse_struct_item(state)
203    }
204
205    fn parse_enum_item_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
206        self.parse_enum_item(state)
207    }
208
209    fn parse_let_statement_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
210        self.parse_let_statement(state)
211    }
212
213    fn parse_if_expression_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
214        self.parse_if_expression(state)
215    }
216
217    fn parse_while_expression_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
218        self.parse_while_expression(state)
219    }
220
221    fn parse_loop_expression_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
222        self.parse_loop_expression(state)
223    }
224
225    fn parse_for_expression_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
226        self.parse_for_expression(state)
227    }
228
229    fn parse_return_statement_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
230        self.parse_return_statement(state)
231    }
232
233    fn parse_block_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
234        self.parse_block(state)
235    }
236
237    fn parse_function<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
238        use crate::lexer::RustTokenType;
239        let cp = state.checkpoint();
240        state.expect(RustTokenType::Fn).ok();
241        state.expect(RustTokenType::Identifier).ok();
242        self.parse_param_list(state)?;
243        if state.eat(RustTokenType::Arrow) {
244            while state.not_at_end() && !state.at(RustTokenType::LeftBrace) {
245                state.advance();
246            }
247        }
248        self.parse_block(state)?;
249        state.finish_at(cp, RustElementType::Function.into());
250        Ok(())
251    }
252
253    fn parse_param_list<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
254        use crate::lexer::RustTokenType::*;
255        let cp = state.checkpoint();
256        state.expect(LeftParen).ok();
257        while state.not_at_end() && !state.at(RightParen) {
258            state.advance();
259        }
260        state.expect(RightParen).ok();
261        state.finish_at(cp, RustElementType::ParameterList.into());
262        Ok(())
263    }
264
265    fn parse_block<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
266        use crate::lexer::RustTokenType::*;
267        let cp = state.checkpoint();
268        state.expect(LeftBrace).ok();
269        while state.not_at_end() && !state.at(RightBrace) {
270            self.parse_statement(state)?;
271        }
272        state.expect(RightBrace).ok();
273        state.finish_at(cp, RustElementType::BlockExpression.into());
274        Ok(())
275    }
276
277    fn parse_use_item<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
278        let cp = state.checkpoint();
279        state.expect(crate::lexer::RustTokenType::Use).ok();
280        // 简化处理路径
281        while !state.at(crate::lexer::RustTokenType::Semicolon) && state.not_at_end() {
282            state.bump();
283        }
284        state.eat(crate::lexer::RustTokenType::Semicolon);
285        state.finish_at(cp, RustElementType::UseItem.into());
286        Ok(())
287    }
288
289    fn parse_mod_item<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
290        let cp = state.checkpoint();
291        state.bump(); // mod
292        state.expect(crate::lexer::RustTokenType::Identifier).ok();
293        if state.at(crate::lexer::RustTokenType::LeftBrace) {
294            self.parse_block(state)?;
295        }
296        else {
297            state.eat(crate::lexer::RustTokenType::Semicolon);
298        }
299        state.finish_at(cp, RustElementType::ModuleItem.into());
300        Ok(())
301    }
302
303    fn parse_struct_item<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
304        let cp = state.checkpoint();
305        state.bump(); // struct
306        state.expect(crate::lexer::RustTokenType::Identifier).ok();
307        while state.not_at_end() && !state.at(crate::lexer::RustTokenType::LeftBrace) && !state.at(crate::lexer::RustTokenType::Semicolon) {
308            state.advance();
309        }
310        if state.at(crate::lexer::RustTokenType::LeftBrace) {
311            self.parse_block(state)?;
312        }
313        else {
314            state.eat(crate::lexer::RustTokenType::Semicolon);
315        }
316        state.finish_at(cp, RustElementType::StructItem.into());
317        Ok(())
318    }
319
320    fn parse_enum_item<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
321        let cp = state.checkpoint();
322        state.bump(); // enum
323        state.expect(crate::lexer::RustTokenType::Identifier).ok();
324        self.parse_block(state)?;
325        state.finish_at(cp, RustElementType::EnumItem.into());
326        Ok(())
327    }
328
329    fn parse_let_statement<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
330        let cp = state.checkpoint();
331        state.bump(); // let
332        state.expect(crate::lexer::RustTokenType::Identifier).ok();
333        if state.eat(crate::lexer::RustTokenType::Eq) {
334            PrattParser::parse(state, 0, self);
335        }
336        state.eat(crate::lexer::RustTokenType::Semicolon);
337        state.finish_at(cp, RustElementType::LetStatement.into());
338        Ok(())
339    }
340
341    fn parse_if_expression<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
342        let cp = state.checkpoint();
343        state.bump(); // if
344        PrattParser::parse(state, 0, self);
345        self.parse_block(state)?;
346        if state.eat(crate::lexer::RustTokenType::Else) {
347            if state.at(crate::lexer::RustTokenType::If) {
348                self.parse_if_expression(state)?;
349            }
350            else {
351                self.parse_block(state)?;
352            }
353        }
354        state.finish_at(cp, RustElementType::IfExpression.into());
355        Ok(())
356    }
357
358    fn parse_while_expression<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
359        let cp = state.checkpoint();
360        state.bump(); // while
361        PrattParser::parse(state, 0, self);
362        self.parse_block(state)?;
363        state.finish_at(cp, RustElementType::WhileExpression.into());
364        Ok(())
365    }
366
367    fn parse_loop_expression<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
368        let cp = state.checkpoint();
369        state.bump(); // loop
370        self.parse_block(state)?;
371        state.finish_at(cp, RustElementType::LoopExpression.into());
372        Ok(())
373    }
374
375    fn parse_for_expression<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
376        let cp = state.checkpoint();
377        state.bump(); // for
378        state.expect(crate::lexer::RustTokenType::Identifier).ok();
379        state.expect(crate::lexer::RustTokenType::In).ok();
380        PrattParser::parse(state, 0, self);
381        self.parse_block(state)?;
382        state.finish_at(cp, RustElementType::ForExpression.into());
383        Ok(())
384    }
385
386    fn parse_return_statement<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
387        let cp = state.checkpoint();
388        state.bump(); // return
389        if !state.at(crate::lexer::RustTokenType::Semicolon) {
390            PrattParser::parse(state, 0, self);
391        }
392        state.eat(crate::lexer::RustTokenType::Semicolon);
393        state.finish_at(cp, RustElementType::ReturnStatement.into());
394        Ok(())
395    }
396}