Skip to main content

oak_csharp/parser/
mod.rs

1use crate::language::CSharpLanguage;
2pub mod element_type;
3pub use element_type::CSharpElementType;
4use oak_core::{
5    GreenNode, OakError,
6    parser::{
7        ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer,
8        pratt::{Associativity, Pratt, PrattParser, binary},
9    },
10    source::{Source, TextEdit},
11};
12
13pub(crate) type State<'a, S> = ParserState<'a, CSharpLanguage, S>;
14
15pub struct CSharpParser<'config> {
16    pub(crate) _language: &'config CSharpLanguage,
17}
18
19impl<'config> Pratt<CSharpLanguage> for CSharpParser<'config> {
20    fn primary<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, CSharpLanguage, S>) -> &'a GreenNode<'a, CSharpLanguage> {
21        use crate::lexer::token_type::CSharpTokenType;
22        let cp = state.checkpoint();
23        match state.peek_kind() {
24            Some(CSharpTokenType::Identifier) => {
25                state.bump();
26                state.finish_at(cp, crate::parser::element_type::CSharpElementType::IdentifierName)
27            }
28            Some(CSharpTokenType::Number)
29            | Some(CSharpTokenType::NumberLiteral)
30            | Some(CSharpTokenType::String)
31            | Some(CSharpTokenType::StringLiteral)
32            | Some(CSharpTokenType::TrueKeyword)
33            | Some(CSharpTokenType::FalseKeyword)
34            | Some(CSharpTokenType::NullKeyword) => {
35                state.bump();
36                state.finish_at(cp, crate::parser::element_type::CSharpElementType::LiteralExpression)
37            }
38            Some(CSharpTokenType::LeftParen) => {
39                state.bump();
40                PrattParser::parse(state, 0, self);
41                state.expect(CSharpTokenType::RightParen).ok();
42                state.finish_at(cp, crate::parser::element_type::CSharpElementType::BinaryExpression) // 简化处理
43            }
44            _ => {
45                state.bump();
46                state.finish_at(cp, crate::parser::element_type::CSharpElementType::Root)
47            }
48        }
49    }
50
51    fn prefix<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, CSharpLanguage, S>) -> &'a GreenNode<'a, CSharpLanguage> {
52        self.primary(state)
53    }
54
55    fn infix<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, CSharpLanguage, S>, left: &'a GreenNode<'a, CSharpLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, CSharpLanguage>> {
56        use crate::{lexer::token_type::CSharpTokenType, parser::CSharpElementType::*};
57        let kind = state.peek_kind()?;
58
59        let (prec, assoc) = match kind {
60            CSharpTokenType::Assign
61            | CSharpTokenType::PlusAssign
62            | CSharpTokenType::MinusAssign
63            | CSharpTokenType::StarAssign
64            | CSharpTokenType::SlashAssign
65            | CSharpTokenType::PercentAssign
66            | CSharpTokenType::AndAssign
67            | CSharpTokenType::OrAssign
68            | CSharpTokenType::XorAssign
69            | CSharpTokenType::LeftShiftAssign
70            | CSharpTokenType::RightShiftAssign
71            | CSharpTokenType::QuestionQuestionAssign => (1, Associativity::Right),
72            CSharpTokenType::LogicalOr => (2, Associativity::Left),
73            CSharpTokenType::LogicalAnd => (3, Associativity::Left),
74            CSharpTokenType::Equal | CSharpTokenType::NotEqual | CSharpTokenType::Less | CSharpTokenType::Greater | CSharpTokenType::LessEqual | CSharpTokenType::GreaterEqual | CSharpTokenType::IsKeyword | CSharpTokenType::AsKeyword => {
75                (4, Associativity::Left)
76            }
77            CSharpTokenType::Plus | CSharpTokenType::Minus => (10, Associativity::Left),
78            CSharpTokenType::Star | CSharpTokenType::Slash | CSharpTokenType::Percent => (11, Associativity::Left),
79            CSharpTokenType::LeftParen | CSharpTokenType::LeftBracket | CSharpTokenType::Dot => (15, Associativity::Left),
80            _ => return None,
81        };
82
83        if prec < min_precedence {
84            return None;
85        }
86
87        match kind {
88            CSharpTokenType::LeftParen => {
89                let cp = state.checkpoint();
90                state.push_child(left);
91                state.expect(CSharpTokenType::LeftParen).ok();
92                while state.not_at_end() && !state.at(CSharpTokenType::RightParen) {
93                    state.bump();
94                }
95                state.expect(CSharpTokenType::RightParen).ok();
96                Some(state.finish_at(cp, crate::parser::element_type::CSharpElementType::InvocationExpression))
97            }
98            CSharpTokenType::LeftBracket => {
99                let cp = state.checkpoint();
100                state.push_child(left);
101                state.expect(CSharpTokenType::LeftBracket).ok();
102                while state.not_at_end() && !state.at(CSharpTokenType::RightBracket) {
103                    state.bump();
104                }
105                state.expect(CSharpTokenType::RightBracket).ok();
106                Some(state.finish_at(cp, crate::parser::element_type::CSharpElementType::ElementAccessExpression))
107            }
108            CSharpTokenType::Dot => {
109                let cp = state.checkpoint();
110                state.push_child(left);
111                state.expect(CSharpTokenType::Dot).ok();
112                state.expect(CSharpTokenType::Identifier).ok();
113                Some(state.finish_at(cp, crate::parser::element_type::CSharpElementType::MemberAccessExpression))
114            }
115            CSharpTokenType::Assign
116            | CSharpTokenType::PlusAssign
117            | CSharpTokenType::MinusAssign
118            | CSharpTokenType::StarAssign
119            | CSharpTokenType::SlashAssign
120            | CSharpTokenType::PercentAssign
121            | CSharpTokenType::AndAssign
122            | CSharpTokenType::OrAssign
123            | CSharpTokenType::XorAssign
124            | CSharpTokenType::LeftShiftAssign
125            | CSharpTokenType::RightShiftAssign
126            | CSharpTokenType::QuestionQuestionAssign => Some(binary(state, left, kind, prec, assoc, AssignmentExpression, |s, p| PrattParser::parse(s, p, self))),
127            _ => Some(binary(state, left, kind, prec, assoc, BinaryExpression, |s, p| PrattParser::parse(s, p, self))),
128        }
129    }
130}
131
132impl<'config> CSharpParser<'config> {
133    pub fn new(language: &'config CSharpLanguage) -> Self {
134        Self { _language: language }
135    }
136
137    fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
138        use crate::{lexer::token_type::CSharpTokenType, parser::CSharpElementType::*};
139        match state.peek_kind() {
140            Some(CSharpTokenType::Namespace) => self.parse_namespace_declaration(state)?,
141            Some(CSharpTokenType::Using) => self.parse_using_directive(state)?,
142            Some(CSharpTokenType::Class)
143            | Some(CSharpTokenType::Struct)
144            | Some(CSharpTokenType::Interface)
145            | Some(CSharpTokenType::Enum)
146            | Some(CSharpTokenType::Record)
147            | Some(CSharpTokenType::Delegate)
148            | Some(CSharpTokenType::Public)
149            | Some(CSharpTokenType::Private)
150            | Some(CSharpTokenType::Protected)
151            | Some(CSharpTokenType::Internal)
152            | Some(CSharpTokenType::Static)
153            | Some(CSharpTokenType::AsyncKeyword)
154            | Some(CSharpTokenType::Abstract)
155            | Some(CSharpTokenType::Virtual)
156            | Some(CSharpTokenType::Override) => self.parse_declaration(state)?,
157            Some(CSharpTokenType::If) => self.parse_if_statement(state)?,
158            Some(CSharpTokenType::While) => self.parse_while_statement(state)?,
159            Some(CSharpTokenType::For) => self.parse_for_statement(state)?,
160            Some(CSharpTokenType::Foreach) => self.parse_foreach_statement(state)?,
161            Some(CSharpTokenType::Return) => self.parse_return_statement(state)?,
162            Some(CSharpTokenType::Break) => {
163                let cp = state.checkpoint();
164                state.bump();
165                state.eat(CSharpTokenType::Semicolon);
166                state.finish_at(cp, crate::parser::CSharpElementType::BreakStatement);
167            }
168            Some(CSharpTokenType::Continue) => {
169                let cp = state.checkpoint();
170                state.bump();
171                state.eat(CSharpTokenType::Semicolon);
172                state.finish_at(cp, crate::parser::CSharpElementType::ContinueStatement);
173            }
174            Some(CSharpTokenType::LeftBrace) => self.parse_block(state)?,
175            _ => {
176                let cp = state.checkpoint();
177                PrattParser::parse(state, 0, self);
178                state.eat(CSharpTokenType::Semicolon);
179                state.finish_at(cp, crate::parser::CSharpElementType::ExpressionStatement);
180            }
181        }
182        Ok(())
183    }
184
185    fn parse_foreach_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
186        use crate::lexer::token_type::CSharpTokenType;
187        let cp = state.checkpoint();
188        state.bump(); // foreach
189        state.expect(CSharpTokenType::LeftParen).ok();
190        // type name in iterable
191        while state.not_at_end() && !state.at(CSharpTokenType::RightParen) {
192            state.bump();
193        }
194        state.expect(CSharpTokenType::RightParen).ok();
195        self.parse_statement(state)?;
196        state.finish_at(cp, crate::parser::element_type::CSharpElementType::ForeachStatement);
197        Ok(())
198    }
199
200    fn parse_namespace_declaration<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
201        use crate::lexer::token_type::CSharpTokenType;
202        let cp = state.checkpoint();
203        state.expect(CSharpTokenType::Namespace).ok();
204        while state.not_at_end() && !state.at(CSharpTokenType::LeftBrace) {
205            state.bump();
206        }
207        self.parse_block(state)?;
208        state.finish_at(cp, crate::parser::element_type::CSharpElementType::NamespaceDeclaration);
209        Ok(())
210    }
211
212    fn parse_using_directive<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
213        use crate::lexer::token_type::CSharpTokenType;
214        let cp = state.checkpoint();
215        state.expect(CSharpTokenType::Using).ok();
216        while state.not_at_end() && !state.at(CSharpTokenType::Semicolon) {
217            state.bump();
218        }
219        state.eat(CSharpTokenType::Semicolon);
220        state.finish_at(cp, crate::parser::element_type::CSharpElementType::UsingDirective);
221        Ok(())
222    }
223
224    fn parse_accessor_block<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
225        use crate::lexer::token_type::CSharpTokenType;
226        state.expect(CSharpTokenType::LeftBrace).ok();
227        while state.not_at_end() && !state.at(CSharpTokenType::RightBrace) {
228            match state.peek_kind() {
229                Some(CSharpTokenType::GetKeyword) | Some(CSharpTokenType::SetKeyword) | Some(CSharpTokenType::AddKeyword) | Some(CSharpTokenType::RemoveKeyword) => {
230                    state.bump();
231                    if state.at(CSharpTokenType::LeftBrace) {
232                        self.parse_block(state)?;
233                    }
234                    else {
235                        state.eat(CSharpTokenType::Semicolon);
236                    }
237                }
238                _ => {
239                    state.bump();
240                }
241            }
242        }
243        state.expect(CSharpTokenType::RightBrace).ok();
244        Ok(())
245    }
246
247    fn parse_declaration<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
248        use crate::{lexer::token_type::CSharpTokenType, parser::CSharpElementType::*};
249        let cp = state.checkpoint();
250
251        // Handle modifiers
252        while state.not_at_end()
253            && matches!(
254                state.peek_kind(),
255                Some(CSharpTokenType::Public)
256                    | Some(CSharpTokenType::Private)
257                    | Some(CSharpTokenType::Protected)
258                    | Some(CSharpTokenType::Internal)
259                    | Some(CSharpTokenType::Static)
260                    | Some(CSharpTokenType::Readonly)
261                    | Some(CSharpTokenType::Abstract)
262                    | Some(CSharpTokenType::Virtual)
263                    | Some(CSharpTokenType::Override)
264                    | Some(CSharpTokenType::AsyncKeyword)
265            )
266        {
267            state.bump();
268        }
269
270        match state.peek_kind() {
271            Some(CSharpTokenType::Class) => {
272                state.bump();
273                state.expect(CSharpTokenType::Identifier).ok();
274                // Base types and generics
275                while state.not_at_end() && !state.at(CSharpTokenType::LeftBrace) {
276                    state.bump();
277                }
278                self.parse_block(state)?;
279                state.finish_at(cp, crate::parser::element_type::CSharpElementType::ClassDeclaration);
280            }
281            Some(CSharpTokenType::Interface) => {
282                state.bump();
283                state.expect(CSharpTokenType::Identifier).ok();
284                while state.not_at_end() && !state.at(CSharpTokenType::LeftBrace) {
285                    state.bump();
286                }
287                self.parse_block(state)?;
288                state.finish_at(cp, crate::parser::element_type::CSharpElementType::InterfaceDeclaration);
289            }
290            Some(CSharpTokenType::Struct) => {
291                state.bump();
292                state.expect(CSharpTokenType::Identifier).ok();
293                while state.not_at_end() && !state.at(CSharpTokenType::LeftBrace) {
294                    state.bump();
295                }
296                self.parse_block(state)?;
297                state.finish_at(cp, crate::parser::element_type::CSharpElementType::StructDeclaration);
298            }
299            Some(CSharpTokenType::Enum) => {
300                state.bump();
301                state.expect(CSharpTokenType::Identifier).ok();
302                self.parse_block(state)?;
303                state.finish_at(cp, crate::parser::element_type::CSharpElementType::EnumDeclaration);
304            }
305            Some(CSharpTokenType::Record) => {
306                state.bump();
307                state.expect(CSharpTokenType::Identifier).ok();
308                while state.not_at_end() && !state.at(CSharpTokenType::LeftBrace) && !state.at(CSharpTokenType::Semicolon) {
309                    state.bump();
310                }
311                if state.at(CSharpTokenType::LeftBrace) {
312                    self.parse_block(state)?;
313                }
314                else {
315                    state.eat(CSharpTokenType::Semicolon);
316                }
317                state.finish_at(cp, crate::parser::element_type::CSharpElementType::RecordDeclaration);
318            }
319            Some(CSharpTokenType::Delegate) => {
320                state.bump();
321                // Type name (parameters);
322                while state.not_at_end() && !state.at(CSharpTokenType::Semicolon) {
323                    state.bump();
324                }
325                state.eat(CSharpTokenType::Semicolon);
326                state.finish_at(cp, crate::parser::element_type::CSharpElementType::DelegateDeclaration);
327            }
328            Some(CSharpTokenType::Event) => {
329                state.bump();
330                // Type name;
331                while state.not_at_end() && !state.at(CSharpTokenType::Semicolon) && !state.at(CSharpTokenType::LeftBrace) {
332                    state.bump();
333                }
334                if state.at(CSharpTokenType::LeftBrace) {
335                    self.parse_accessor_block(state)?;
336                }
337                else {
338                    state.eat(CSharpTokenType::Semicolon);
339                }
340                state.finish_at(cp, crate::parser::element_type::CSharpElementType::EventDeclaration);
341            }
342            _ => {
343                // Property, Method, or Field
344                // 简化处理
345                state.bump(); // Type
346                while state.not_at_end() && !state.at(CSharpTokenType::Semicolon) && !state.at(CSharpTokenType::LeftBrace) && !state.at(CSharpTokenType::LeftParen) {
347                    state.bump();
348                }
349
350                if state.eat(CSharpTokenType::This) && state.at(CSharpTokenType::LeftBracket) {
351                    // Indexer
352                    state.bump(); // [
353                    while state.not_at_end() && !state.at(CSharpTokenType::RightBracket) {
354                        state.bump();
355                    }
356                    state.expect(CSharpTokenType::RightBracket).ok();
357                    self.parse_accessor_block(state)?;
358                    state.finish_at(cp, crate::parser::element_type::CSharpElementType::IndexerDeclaration);
359                }
360                else {
361                    state.expect(CSharpTokenType::Identifier).ok();
362                    if state.at(CSharpTokenType::LeftParen) {
363                        // Method
364                        state.bump(); // (
365                        while state.not_at_end() && !state.at(CSharpTokenType::RightParen) {
366                            state.bump();
367                        }
368                        state.expect(CSharpTokenType::RightParen).ok();
369                        if state.at(CSharpTokenType::LeftBrace) {
370                            self.parse_block(state)?;
371                        }
372                        else {
373                            state.eat(CSharpTokenType::Semicolon);
374                        }
375                        state.finish_at(cp, crate::parser::element_type::CSharpElementType::MethodDeclaration);
376                    }
377                    else if state.at(CSharpTokenType::LeftBrace) {
378                        // Property
379                        self.parse_accessor_block(state)?;
380                        state.finish_at(cp, crate::parser::element_type::CSharpElementType::PropertyDeclaration);
381                    }
382                    else {
383                        // Field
384                        state.eat(CSharpTokenType::Semicolon);
385                        state.finish_at(cp, crate::parser::element_type::CSharpElementType::FieldDeclaration);
386                    }
387                }
388            }
389        }
390        Ok(())
391    }
392
393    fn parse_if_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
394        use crate::lexer::token_type::CSharpTokenType;
395        let cp = state.checkpoint();
396        state.bump(); // if
397        state.expect(CSharpTokenType::LeftParen).ok();
398        PrattParser::parse(state, 0, self);
399        state.expect(CSharpTokenType::RightParen).ok();
400        self.parse_statement(state)?;
401        if state.eat(CSharpTokenType::Else) {
402            self.parse_statement(state)?;
403        }
404        state.finish_at(cp, crate::parser::element_type::CSharpElementType::IfStatement);
405        Ok(())
406    }
407
408    fn parse_while_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
409        use crate::lexer::token_type::CSharpTokenType;
410        let cp = state.checkpoint();
411        state.bump(); // while
412        state.expect(CSharpTokenType::LeftParen).ok();
413        PrattParser::parse(state, 0, self);
414        state.expect(CSharpTokenType::RightParen).ok();
415        self.parse_statement(state)?;
416        state.finish_at(cp, crate::parser::element_type::CSharpElementType::WhileStatement);
417        Ok(())
418    }
419
420    fn parse_for_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
421        use crate::lexer::token_type::CSharpTokenType;
422        let cp = state.checkpoint();
423        state.bump(); // for
424        state.expect(CSharpTokenType::LeftParen).ok();
425        PrattParser::parse(state, 0, self);
426        state.expect(CSharpTokenType::RightParen).ok();
427        self.parse_statement(state)?;
428        state.finish_at(cp, crate::parser::element_type::CSharpElementType::ForStatement);
429        Ok(())
430    }
431
432    fn parse_block<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
433        use crate::lexer::token_type::CSharpTokenType;
434        let cp = state.checkpoint();
435        state.expect(CSharpTokenType::LeftBrace).ok();
436        while state.not_at_end() && !state.at(CSharpTokenType::RightBrace) {
437            self.parse_statement(state)?;
438        }
439        state.expect(CSharpTokenType::RightBrace).ok();
440        state.finish_at(cp, crate::parser::element_type::CSharpElementType::Block);
441        Ok(())
442    }
443
444    fn parse_return_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
445        use crate::lexer::token_type::CSharpTokenType;
446        let cp = state.checkpoint();
447        state.bump(); // return
448        if !state.at(CSharpTokenType::Semicolon) && !state.at(CSharpTokenType::RightBrace) {
449            PrattParser::parse(state, 0, self);
450        }
451        state.eat(CSharpTokenType::Semicolon);
452        state.finish_at(cp, crate::parser::element_type::CSharpElementType::ReturnStatement);
453        Ok(())
454    }
455}
456
457impl<'config> Parser<CSharpLanguage> for CSharpParser<'config> {
458    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<CSharpLanguage>) -> ParseOutput<'a, CSharpLanguage> {
459        let lexer = crate::lexer::CSharpLexer::new(self._language);
460        parse_with_lexer(&lexer, text, edits, cache, |state| {
461            let cp = state.checkpoint();
462            while state.not_at_end() {
463                self.parse_statement(state)?;
464            }
465            Ok(state.finish_at(cp, crate::parser::element_type::CSharpElementType::Root))
466        })
467    }
468}