oak_rust/parser/
mod.rs

1use crate::{RustLanguage, lexer::RustLexer};
2use oak_core::{
3    GreenNode, OakError,
4    parser::{Associativity, ParseCache, ParseOutput, Parser, ParserState, Pratt, PrattParser, binary, parse_with_lexer, unary},
5    source::{Source, TextEdit},
6};
7
8mod element_type;
9
10pub use self::element_type::{RustElement, RustElementType};
11
12/// A parser for the Rust programming language.
13#[derive(Clone)]
14pub struct RustParser<'config> {
15    /// Reference to the Rust language configuration
16    #[allow(dead_code)]
17    config: &'config RustLanguage,
18}
19
20impl<'config> Pratt<RustLanguage> for RustParser<'config> {
21    fn primary<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> &'a GreenNode<'a, RustLanguage> {
22        let cp = state.checkpoint();
23        match state.peek_kind() {
24            Some(crate::lexer::RustTokenType::Identifier) => {
25                state.bump();
26                state.finish_at(cp, RustElementType::IdentifierExpression.into())
27            }
28            Some(k) if k.is_literal() => {
29                state.bump();
30                state.finish_at(cp, RustElementType::LiteralExpression.into())
31            }
32            Some(crate::lexer::RustTokenType::LeftParen) => {
33                state.bump();
34                PrattParser::parse(state, 0, self);
35                state.expect(crate::lexer::RustTokenType::RightParen).ok();
36                state.finish_at(cp, RustElementType::ParenthesizedExpression.into())
37            }
38            _ => {
39                state.bump();
40                state.finish_at(cp, RustElementType::Error.into())
41            }
42        }
43    }
44
45    fn prefix<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> &'a GreenNode<'a, RustLanguage> {
46        use crate::{lexer::RustTokenType::*, parser::RustElementType::*};
47        let kind = match state.peek_kind() {
48            Some(k) => k,
49            None => return self.primary(state),
50        };
51
52        match kind {
53            Minus | Bang | Ampersand | Star => unary(state, kind, 13, UnaryExpression.into(), |s, p| PrattParser::parse(s, p, self)),
54            _ => self.primary(state),
55        }
56    }
57
58    fn infix<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>, left: &'a GreenNode<'a, RustLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, RustLanguage>> {
59        use crate::{lexer::RustTokenType::*, parser::RustElementType::*};
60        let kind = state.peek_kind()?;
61
62        let (prec, assoc) = match kind {
63            Eq | PlusEq | MinusEq | StarEq | SlashEq | PercentEq | AndEq | OrEq | CaretEq | ShlEq | ShrEq => (1, Associativity::Right),
64            DotDot | DotDotEq => (2, Associativity::Left),
65            OrOr => (3, Associativity::Left),
66            AndAnd => (4, Associativity::Left),
67            EqEq | Ne => (5, Associativity::Left),
68            Lt | Le | Gt | Ge => (6, Associativity::Left),
69            Pipe => (7, Associativity::Left),
70            Caret => (8, Associativity::Left),
71            Ampersand => (9, Associativity::Left),
72            Shl | Shr => (10, Associativity::Left),
73            Plus | Minus => (11, Associativity::Left),
74            Star | Slash | Percent => (12, Associativity::Left),
75            LeftParen | LeftBracket | Dot => (14, Associativity::Left),
76            _ => return None,
77        };
78
79        if prec < min_precedence {
80            return None;
81        }
82
83        match kind {
84            LeftParen => {
85                let cp = state.checkpoint();
86                state.push_child(left);
87                state.expect(LeftParen).ok();
88                if !state.at(RightParen) {
89                    loop {
90                        PrattParser::parse(state, 0, self);
91                        if !state.eat(Comma) {
92                            break;
93                        }
94                    }
95                }
96                state.expect(RightParen).ok();
97                Some(state.finish_at(cp, CallExpression.into()))
98            }
99            LeftBracket => {
100                let cp = state.checkpoint();
101                state.push_child(left);
102                state.expect(LeftBracket).ok();
103                PrattParser::parse(state, 0, self);
104                state.expect(RightBracket).ok();
105                Some(state.finish_at(cp, IndexExpression.into()))
106            }
107            Dot => {
108                let cp = state.checkpoint();
109                state.push_child(left);
110                state.expect(Dot).ok();
111                state.expect(crate::lexer::RustTokenType::Identifier).ok();
112                Some(state.finish_at(cp, FieldExpression.into()))
113            }
114            _ => Some(binary(state, left, kind, prec, assoc, BinaryExpression.into(), |s, p| PrattParser::parse(s, p, self))),
115        }
116    }
117}
118
119impl<'config> Parser<RustLanguage> for RustParser<'config> {
120    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<RustLanguage>) -> ParseOutput<'a, RustLanguage> {
121        let lexer = RustLexer::new(self.config);
122        parse_with_lexer(&lexer, text, edits, cache, |state| self.parse_source_file(state))
123    }
124}
125
126impl<'config> RustParser<'config> {
127    /// Creates a new Rust parser with the given language configuration.
128    pub fn new(config: &'config RustLanguage) -> Self {
129        Self { config }
130    }
131
132    pub(crate) fn parse_source_file<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<&'a GreenNode<'a, RustLanguage>, OakError> {
133        let cp = state.checkpoint();
134        while state.not_at_end() {
135            if state.current().map(|t| t.kind.is_ignored()).unwrap_or(false) {
136                state.advance();
137                continue;
138            }
139            self.parse_statement(state)?;
140        }
141        let root = state.finish_at(cp, RustElementType::SourceFile.into());
142        Ok(root)
143    }
144
145    fn parse_statement<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
146        use crate::{lexer::RustTokenType, parser::RustElementType::*};
147
148        let kind = match state.peek_kind() {
149            Some(RustTokenType::Fn) => Some(Function),
150            Some(RustTokenType::Use) => Some(UseItem),
151            Some(RustTokenType::Mod) => Some(ModuleItem),
152            Some(RustTokenType::Struct) => Some(StructItem),
153            Some(RustTokenType::Enum) => Some(EnumItem),
154            Some(RustTokenType::Let) => Some(LetStatement),
155            Some(RustTokenType::If) => Some(IfExpression),
156            Some(RustTokenType::While) => Some(WhileExpression),
157            Some(RustTokenType::Loop) => Some(LoopExpression),
158            Some(RustTokenType::For) => Some(ForExpression),
159            Some(RustTokenType::Return) => Some(ReturnStatement),
160            Some(RustTokenType::LeftBrace) => Some(Block),
161            _ => None,
162        };
163
164        if let Some(k) = kind {
165            state.incremental_node(k.into(), |state| match k {
166                Function => self.parse_function_body(state),
167                UseItem => self.parse_use_item_body(state),
168                ModuleItem => self.parse_mod_item_body(state),
169                StructItem => self.parse_struct_item_body(state),
170                EnumItem => self.parse_enum_item_body(state),
171                LetStatement => self.parse_let_statement_body(state),
172                IfExpression => self.parse_if_expression_body(state),
173                WhileExpression => self.parse_while_expression_body(state),
174                LoopExpression => self.parse_loop_expression_body(state),
175                ForExpression => self.parse_for_expression_body(state),
176                ReturnStatement => self.parse_return_statement_body(state),
177                Block => self.parse_block_body(state),
178                _ => unreachable!(),
179            })
180        }
181        else {
182            PrattParser::parse(state, 0, self);
183            state.eat(RustTokenType::Semicolon);
184            Ok(())
185        }
186    }
187
188    fn parse_function_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
189        self.parse_function(state)
190    }
191
192    fn parse_use_item_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
193        self.parse_use_item(state)
194    }
195
196    fn parse_mod_item_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
197        self.parse_mod_item(state)
198    }
199
200    fn parse_struct_item_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
201        self.parse_struct_item(state)
202    }
203
204    fn parse_enum_item_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
205        self.parse_enum_item(state)
206    }
207
208    fn parse_let_statement_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
209        self.parse_let_statement(state)
210    }
211
212    fn parse_if_expression_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
213        self.parse_if_expression(state)
214    }
215
216    fn parse_while_expression_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
217        self.parse_while_expression(state)
218    }
219
220    fn parse_loop_expression_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
221        self.parse_loop_expression(state)
222    }
223
224    fn parse_for_expression_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
225        self.parse_for_expression(state)
226    }
227
228    fn parse_return_statement_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
229        self.parse_return_statement(state)
230    }
231
232    fn parse_block_body<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
233        self.parse_block(state)
234    }
235
236    fn parse_function<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
237        use crate::lexer::RustTokenType;
238        let cp = state.checkpoint();
239        state.expect(RustTokenType::Fn).ok();
240        state.expect(RustTokenType::Identifier).ok();
241        self.parse_param_list(state)?;
242        if state.eat(RustTokenType::Arrow) {
243            while state.not_at_end() && !state.at(RustTokenType::LeftBrace) {
244                state.advance();
245            }
246        }
247        self.parse_block(state)?;
248        state.finish_at(cp, RustElementType::Function.into());
249        Ok(())
250    }
251
252    fn parse_param_list<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
253        use crate::lexer::RustTokenType::*;
254        let cp = state.checkpoint();
255        state.expect(LeftParen).ok();
256        while state.not_at_end() && !state.at(RightParen) {
257            state.advance();
258        }
259        state.expect(RightParen).ok();
260        state.finish_at(cp, RustElementType::ParameterList.into());
261        Ok(())
262    }
263
264    fn parse_block<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
265        use crate::lexer::RustTokenType::*;
266        let cp = state.checkpoint();
267        state.expect(LeftBrace).ok();
268        while state.not_at_end() && !state.at(RightBrace) {
269            self.parse_statement(state)?;
270        }
271        state.expect(RightBrace).ok();
272        state.finish_at(cp, RustElementType::BlockExpression.into());
273        Ok(())
274    }
275
276    fn parse_use_item<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
277        let cp = state.checkpoint();
278        state.expect(crate::lexer::RustTokenType::Use).ok();
279        // 简化处理路径
280        while !state.at(crate::lexer::RustTokenType::Semicolon) && state.not_at_end() {
281            state.bump();
282        }
283        state.eat(crate::lexer::RustTokenType::Semicolon);
284        state.finish_at(cp, RustElementType::UseItem.into());
285        Ok(())
286    }
287
288    fn parse_mod_item<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
289        let cp = state.checkpoint();
290        state.bump(); // mod
291        state.expect(crate::lexer::RustTokenType::Identifier).ok();
292        if state.at(crate::lexer::RustTokenType::LeftBrace) {
293            self.parse_block(state)?;
294        }
295        else {
296            state.eat(crate::lexer::RustTokenType::Semicolon);
297        }
298        state.finish_at(cp, RustElementType::ModuleItem.into());
299        Ok(())
300    }
301
302    fn parse_struct_item<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
303        let cp = state.checkpoint();
304        state.bump(); // struct
305        state.expect(crate::lexer::RustTokenType::Identifier).ok();
306        while state.not_at_end() && !state.at(crate::lexer::RustTokenType::LeftBrace) && !state.at(crate::lexer::RustTokenType::Semicolon) {
307            state.advance();
308        }
309        if state.at(crate::lexer::RustTokenType::LeftBrace) {
310            self.parse_block(state)?;
311        }
312        else {
313            state.eat(crate::lexer::RustTokenType::Semicolon);
314        }
315        state.finish_at(cp, RustElementType::StructItem.into());
316        Ok(())
317    }
318
319    fn parse_enum_item<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
320        let cp = state.checkpoint();
321        state.bump(); // enum
322        state.expect(crate::lexer::RustTokenType::Identifier).ok();
323        self.parse_block(state)?;
324        state.finish_at(cp, RustElementType::EnumItem.into());
325        Ok(())
326    }
327
328    fn parse_let_statement<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
329        let cp = state.checkpoint();
330        state.bump(); // let
331        state.expect(crate::lexer::RustTokenType::Identifier).ok();
332        if state.eat(crate::lexer::RustTokenType::Eq) {
333            PrattParser::parse(state, 0, self);
334        }
335        state.eat(crate::lexer::RustTokenType::Semicolon);
336        state.finish_at(cp, RustElementType::LetStatement.into());
337        Ok(())
338    }
339
340    fn parse_if_expression<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
341        let cp = state.checkpoint();
342        state.bump(); // if
343        PrattParser::parse(state, 0, self);
344        self.parse_block(state)?;
345        if state.eat(crate::lexer::RustTokenType::Else) {
346            if state.at(crate::lexer::RustTokenType::If) {
347                self.parse_if_expression(state)?;
348            }
349            else {
350                self.parse_block(state)?;
351            }
352        }
353        state.finish_at(cp, RustElementType::IfExpression.into());
354        Ok(())
355    }
356
357    fn parse_while_expression<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
358        let cp = state.checkpoint();
359        state.bump(); // while
360        PrattParser::parse(state, 0, self);
361        self.parse_block(state)?;
362        state.finish_at(cp, RustElementType::WhileExpression.into());
363        Ok(())
364    }
365
366    fn parse_loop_expression<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
367        let cp = state.checkpoint();
368        state.bump(); // loop
369        self.parse_block(state)?;
370        state.finish_at(cp, RustElementType::LoopExpression.into());
371        Ok(())
372    }
373
374    fn parse_for_expression<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
375        let cp = state.checkpoint();
376        state.bump(); // for
377        state.expect(crate::lexer::RustTokenType::Identifier).ok();
378        state.expect(crate::lexer::RustTokenType::In).ok();
379        PrattParser::parse(state, 0, self);
380        self.parse_block(state)?;
381        state.finish_at(cp, RustElementType::ForExpression.into());
382        Ok(())
383    }
384
385    fn parse_return_statement<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, RustLanguage, S>) -> Result<(), OakError> {
386        let cp = state.checkpoint();
387        state.bump(); // return
388        if !state.at(crate::lexer::RustTokenType::Semicolon) {
389            PrattParser::parse(state, 0, self);
390        }
391        state.eat(crate::lexer::RustTokenType::Semicolon);
392        state.finish_at(cp, RustElementType::ReturnStatement.into());
393        Ok(())
394    }
395}