Skip to main content

oak_csharp/parser/
mod.rs

1//! Parser implementation for the C# language.
2
3use crate::language::CSharpLanguage;
4/// Element types for the C# parser.
5pub mod element_type;
6pub use element_type::CSharpElementType;
7use oak_core::{
8    GreenNode, OakError,
9    parser::{
10        ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer,
11        pratt::{Associativity, Pratt, PrattParser, binary},
12    },
13    source::{Source, TextEdit},
14};
15
16pub(crate) type State<'a, S> = ParserState<'a, CSharpLanguage, S>;
17
18/// A parser for the C# language.
19///
20/// Implements the `Pratt` and `Parser` traits to provide a full C# parser
21/// capable of handling expressions, statements, and declarations.
22pub struct CSharpParser<'config> {
23    pub(crate) _language: &'config CSharpLanguage,
24}
25
26impl<'config> Pratt<CSharpLanguage> for CSharpParser<'config> {
27    fn primary<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, CSharpLanguage, S>) -> &'a GreenNode<'a, CSharpLanguage> {
28        use crate::lexer::token_type::CSharpTokenType;
29        let cp = state.checkpoint();
30        match state.peek_kind() {
31            Some(CSharpTokenType::Identifier) => {
32                state.bump();
33                state.finish_at(cp, crate::parser::element_type::CSharpElementType::IdentifierName)
34            }
35            Some(CSharpTokenType::Number)
36            | Some(CSharpTokenType::NumberLiteral)
37            | Some(CSharpTokenType::String)
38            | Some(CSharpTokenType::StringLiteral)
39            | Some(CSharpTokenType::TrueKeyword)
40            | Some(CSharpTokenType::FalseKeyword)
41            | Some(CSharpTokenType::NullKeyword) => {
42                state.bump();
43                state.finish_at(cp, crate::parser::element_type::CSharpElementType::LiteralExpression)
44            }
45            Some(CSharpTokenType::LeftParen) => {
46                state.bump();
47                PrattParser::parse(state, 0, self);
48                state.expect(CSharpTokenType::RightParen).ok();
49                state.finish_at(cp, crate::parser::element_type::CSharpElementType::BinaryExpression) // simplified processing
50            }
51            _ => {
52                state.bump();
53                state.finish_at(cp, crate::parser::element_type::CSharpElementType::Root)
54            }
55        }
56    }
57
58    fn prefix<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, CSharpLanguage, S>) -> &'a GreenNode<'a, CSharpLanguage> {
59        self.primary(state)
60    }
61
62    fn infix<'a, S: oak_core::source::Source + ?Sized>(&self, state: &mut ParserState<'a, CSharpLanguage, S>, left: &'a GreenNode<'a, CSharpLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, CSharpLanguage>> {
63        use crate::{lexer::token_type::CSharpTokenType, parser::CSharpElementType::*};
64        let kind = state.peek_kind()?;
65
66        let (prec, assoc) = match kind {
67            CSharpTokenType::Assign
68            | CSharpTokenType::PlusAssign
69            | CSharpTokenType::MinusAssign
70            | CSharpTokenType::StarAssign
71            | CSharpTokenType::SlashAssign
72            | CSharpTokenType::PercentAssign
73            | CSharpTokenType::AndAssign
74            | CSharpTokenType::OrAssign
75            | CSharpTokenType::XorAssign
76            | CSharpTokenType::LeftShiftAssign
77            | CSharpTokenType::RightShiftAssign
78            | CSharpTokenType::QuestionQuestionAssign => (1, Associativity::Right),
79            CSharpTokenType::LogicalOr => (2, Associativity::Left),
80            CSharpTokenType::LogicalAnd => (3, Associativity::Left),
81            CSharpTokenType::Equal | CSharpTokenType::NotEqual | CSharpTokenType::Less | CSharpTokenType::Greater | CSharpTokenType::LessEqual | CSharpTokenType::GreaterEqual | CSharpTokenType::IsKeyword | CSharpTokenType::AsKeyword => {
82                (4, Associativity::Left)
83            }
84            CSharpTokenType::Plus | CSharpTokenType::Minus => (10, Associativity::Left),
85            CSharpTokenType::Star | CSharpTokenType::Slash | CSharpTokenType::Percent => (11, Associativity::Left),
86            CSharpTokenType::LeftParen | CSharpTokenType::LeftBracket | CSharpTokenType::Dot => (15, Associativity::Left),
87            _ => return None,
88        };
89
90        if prec < min_precedence {
91            return None;
92        }
93
94        match kind {
95            CSharpTokenType::LeftParen => {
96                let cp = state.checkpoint();
97                state.push_child(left);
98                state.expect(CSharpTokenType::LeftParen).ok();
99                while state.not_at_end() && !state.at(CSharpTokenType::RightParen) {
100                    state.bump();
101                }
102                state.expect(CSharpTokenType::RightParen).ok();
103                Some(state.finish_at(cp, crate::parser::element_type::CSharpElementType::InvocationExpression))
104            }
105            CSharpTokenType::LeftBracket => {
106                let cp = state.checkpoint();
107                state.push_child(left);
108                state.expect(CSharpTokenType::LeftBracket).ok();
109                while state.not_at_end() && !state.at(CSharpTokenType::RightBracket) {
110                    state.bump();
111                }
112                state.expect(CSharpTokenType::RightBracket).ok();
113                Some(state.finish_at(cp, crate::parser::element_type::CSharpElementType::ElementAccessExpression))
114            }
115            CSharpTokenType::Dot => {
116                let cp = state.checkpoint();
117                state.push_child(left);
118                state.expect(CSharpTokenType::Dot).ok();
119                state.expect(CSharpTokenType::Identifier).ok();
120                Some(state.finish_at(cp, crate::parser::element_type::CSharpElementType::MemberAccessExpression))
121            }
122            CSharpTokenType::Assign
123            | CSharpTokenType::PlusAssign
124            | CSharpTokenType::MinusAssign
125            | CSharpTokenType::StarAssign
126            | CSharpTokenType::SlashAssign
127            | CSharpTokenType::PercentAssign
128            | CSharpTokenType::AndAssign
129            | CSharpTokenType::OrAssign
130            | CSharpTokenType::XorAssign
131            | CSharpTokenType::LeftShiftAssign
132            | CSharpTokenType::RightShiftAssign
133            | CSharpTokenType::QuestionQuestionAssign => Some(binary(state, left, kind, prec, assoc, AssignmentExpression, |s, p| PrattParser::parse(s, p, self))),
134            _ => Some(binary(state, left, kind, prec, assoc, BinaryExpression, |s, p| PrattParser::parse(s, p, self))),
135        }
136    }
137}
138
139impl<'config> CSharpParser<'config> {
140    /// Creates a new C# parser.
141    pub fn new(language: &'config CSharpLanguage) -> Self {
142        Self { _language: language }
143    }
144
145    /// Parses a C# statement or declaration.
146    ///
147    /// This is the main dispatch method for the parser, routing to specific
148    /// methods based on the next token in the stream.
149    fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
150        use crate::lexer::token_type::CSharpTokenType;
151        match state.peek_kind() {
152            Some(CSharpTokenType::Namespace) => self.parse_namespace_declaration(state)?,
153            Some(CSharpTokenType::Using) => self.parse_using_directive(state)?,
154            Some(CSharpTokenType::Class)
155            | Some(CSharpTokenType::Struct)
156            | Some(CSharpTokenType::Interface)
157            | Some(CSharpTokenType::Enum)
158            | Some(CSharpTokenType::Record)
159            | Some(CSharpTokenType::Delegate)
160            | Some(CSharpTokenType::Public)
161            | Some(CSharpTokenType::Private)
162            | Some(CSharpTokenType::Protected)
163            | Some(CSharpTokenType::Internal)
164            | Some(CSharpTokenType::Static)
165            | Some(CSharpTokenType::AsyncKeyword)
166            | Some(CSharpTokenType::Abstract)
167            | Some(CSharpTokenType::Virtual)
168            | Some(CSharpTokenType::Override) => self.parse_declaration(state)?,
169            Some(CSharpTokenType::If) => self.parse_if_statement(state)?,
170            Some(CSharpTokenType::While) => self.parse_while_statement(state)?,
171            Some(CSharpTokenType::For) => self.parse_for_statement(state)?,
172            Some(CSharpTokenType::Foreach) => self.parse_foreach_statement(state)?,
173            Some(CSharpTokenType::Return) => self.parse_return_statement(state)?,
174            Some(CSharpTokenType::Break) => {
175                let cp = state.checkpoint();
176                state.bump();
177                state.eat(CSharpTokenType::Semicolon);
178                state.finish_at(cp, crate::parser::CSharpElementType::BreakStatement);
179            }
180            Some(CSharpTokenType::Continue) => {
181                let cp = state.checkpoint();
182                state.bump();
183                state.eat(CSharpTokenType::Semicolon);
184                state.finish_at(cp, crate::parser::CSharpElementType::ContinueStatement);
185            }
186            Some(CSharpTokenType::LeftBrace) => self.parse_block(state)?,
187            _ => {
188                let cp = state.checkpoint();
189                PrattParser::parse(state, 0, self);
190                state.eat(CSharpTokenType::Semicolon);
191                state.finish_at(cp, crate::parser::CSharpElementType::ExpressionStatement);
192            }
193        }
194        Ok(())
195    }
196
197    /// Parses a `foreach` statement.
198    ///
199    /// Format: `foreach (Type var in collection) statement`
200    fn parse_foreach_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
201        use crate::lexer::token_type::CSharpTokenType;
202        let cp = state.checkpoint();
203        state.bump(); // foreach
204        state.expect(CSharpTokenType::LeftParen).ok();
205        // type name in iterable
206        while state.not_at_end() && !state.at(CSharpTokenType::RightParen) {
207            state.bump();
208        }
209        state.expect(CSharpTokenType::RightParen).ok();
210        self.parse_statement(state)?;
211        state.finish_at(cp, crate::parser::element_type::CSharpElementType::ForeachStatement);
212        Ok(())
213    }
214
215    /// Parses a `namespace` declaration.
216    fn parse_namespace_declaration<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
217        use crate::lexer::token_type::CSharpTokenType;
218        let cp = state.checkpoint();
219        state.expect(CSharpTokenType::Namespace).ok();
220        while state.not_at_end() && !state.at(CSharpTokenType::LeftBrace) {
221            state.bump();
222        }
223        self.parse_block(state)?;
224        state.finish_at(cp, crate::parser::element_type::CSharpElementType::NamespaceDeclaration);
225        Ok(())
226    }
227
228    /// Parses a `using` directive.
229    fn parse_using_directive<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
230        use crate::lexer::token_type::CSharpTokenType;
231        let cp = state.checkpoint();
232        state.expect(CSharpTokenType::Using).ok();
233        while state.not_at_end() && !state.at(CSharpTokenType::Semicolon) {
234            state.bump();
235        }
236        state.eat(CSharpTokenType::Semicolon);
237        state.finish_at(cp, crate::parser::element_type::CSharpElementType::UsingDirective);
238        Ok(())
239    }
240
241    /// Parses an accessor block (e.g., `{ get; set; }` for properties).
242    fn parse_accessor_block<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
243        use crate::lexer::token_type::CSharpTokenType;
244        state.expect(CSharpTokenType::LeftBrace).ok();
245        while state.not_at_end() && !state.at(CSharpTokenType::RightBrace) {
246            match state.peek_kind() {
247                Some(CSharpTokenType::GetKeyword) | Some(CSharpTokenType::SetKeyword) | Some(CSharpTokenType::AddKeyword) | Some(CSharpTokenType::RemoveKeyword) => {
248                    state.bump();
249                    if state.at(CSharpTokenType::LeftBrace) {
250                        self.parse_block(state)?;
251                    }
252                    else {
253                        state.eat(CSharpTokenType::Semicolon);
254                    }
255                }
256                _ => {
257                    state.bump();
258                }
259            }
260        }
261        state.expect(CSharpTokenType::RightBrace).ok();
262        Ok(())
263    }
264
265    /// Parses a declaration (class, interface, struct, enum, record, delegate, event, field, property, or method).
266    fn parse_declaration<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
267        use crate::lexer::token_type::CSharpTokenType;
268        let cp = state.checkpoint();
269
270        // Handle modifiers
271        while state.not_at_end()
272            && matches!(
273                state.peek_kind(),
274                Some(CSharpTokenType::Public)
275                    | Some(CSharpTokenType::Private)
276                    | Some(CSharpTokenType::Protected)
277                    | Some(CSharpTokenType::Internal)
278                    | Some(CSharpTokenType::Static)
279                    | Some(CSharpTokenType::Readonly)
280                    | Some(CSharpTokenType::Abstract)
281                    | Some(CSharpTokenType::Virtual)
282                    | Some(CSharpTokenType::Override)
283                    | Some(CSharpTokenType::AsyncKeyword)
284            )
285        {
286            state.bump();
287        }
288
289        match state.peek_kind() {
290            Some(CSharpTokenType::Class) => {
291                state.bump();
292                state.expect(CSharpTokenType::Identifier).ok();
293                // Base types and generics
294                while state.not_at_end() && !state.at(CSharpTokenType::LeftBrace) {
295                    state.bump();
296                }
297                self.parse_block(state)?;
298                state.finish_at(cp, crate::parser::element_type::CSharpElementType::ClassDeclaration);
299            }
300            Some(CSharpTokenType::Interface) => {
301                state.bump();
302                state.expect(CSharpTokenType::Identifier).ok();
303                while state.not_at_end() && !state.at(CSharpTokenType::LeftBrace) {
304                    state.bump();
305                }
306                self.parse_block(state)?;
307                state.finish_at(cp, crate::parser::element_type::CSharpElementType::InterfaceDeclaration);
308            }
309            Some(CSharpTokenType::Struct) => {
310                state.bump();
311                state.expect(CSharpTokenType::Identifier).ok();
312                while state.not_at_end() && !state.at(CSharpTokenType::LeftBrace) {
313                    state.bump();
314                }
315                self.parse_block(state)?;
316                state.finish_at(cp, crate::parser::element_type::CSharpElementType::StructDeclaration);
317            }
318            Some(CSharpTokenType::Enum) => {
319                state.bump();
320                state.expect(CSharpTokenType::Identifier).ok();
321                self.parse_block(state)?;
322                state.finish_at(cp, crate::parser::element_type::CSharpElementType::EnumDeclaration);
323            }
324            Some(CSharpTokenType::Record) => {
325                state.bump();
326                state.expect(CSharpTokenType::Identifier).ok();
327                while state.not_at_end() && !state.at(CSharpTokenType::LeftBrace) && !state.at(CSharpTokenType::Semicolon) {
328                    state.bump();
329                }
330                if state.at(CSharpTokenType::LeftBrace) {
331                    self.parse_block(state)?;
332                }
333                else {
334                    state.eat(CSharpTokenType::Semicolon);
335                }
336                state.finish_at(cp, crate::parser::element_type::CSharpElementType::RecordDeclaration);
337            }
338            Some(CSharpTokenType::Delegate) => {
339                state.bump();
340                // Type name (parameters);
341                while state.not_at_end() && !state.at(CSharpTokenType::Semicolon) {
342                    state.bump();
343                }
344                state.eat(CSharpTokenType::Semicolon);
345                state.finish_at(cp, crate::parser::element_type::CSharpElementType::DelegateDeclaration);
346            }
347            Some(CSharpTokenType::Event) => {
348                state.bump();
349                // Type name;
350                while state.not_at_end() && !state.at(CSharpTokenType::Semicolon) && !state.at(CSharpTokenType::LeftBrace) {
351                    state.bump();
352                }
353                if state.at(CSharpTokenType::LeftBrace) {
354                    self.parse_accessor_block(state)?;
355                }
356                else {
357                    state.eat(CSharpTokenType::Semicolon);
358                }
359                state.finish_at(cp, crate::parser::element_type::CSharpElementType::EventDeclaration);
360            }
361            _ => {
362                // Property, Method, or Field
363                // Simplified processing
364                state.bump(); // Type
365                while state.not_at_end() && !state.at(CSharpTokenType::Semicolon) && !state.at(CSharpTokenType::LeftBrace) && !state.at(CSharpTokenType::LeftParen) {
366                    state.bump();
367                }
368
369                if state.eat(CSharpTokenType::This) && state.at(CSharpTokenType::LeftBracket) {
370                    // Indexer
371                    state.bump(); // [
372                    while state.not_at_end() && !state.at(CSharpTokenType::RightBracket) {
373                        state.bump();
374                    }
375                    state.expect(CSharpTokenType::RightBracket).ok();
376                    self.parse_accessor_block(state)?;
377                    state.finish_at(cp, crate::parser::element_type::CSharpElementType::IndexerDeclaration);
378                }
379                else {
380                    state.expect(CSharpTokenType::Identifier).ok();
381                    if state.at(CSharpTokenType::LeftParen) {
382                        // Method
383                        state.bump(); // (
384                        while state.not_at_end() && !state.at(CSharpTokenType::RightParen) {
385                            state.bump();
386                        }
387                        state.expect(CSharpTokenType::RightParen).ok();
388                        if state.at(CSharpTokenType::LeftBrace) {
389                            self.parse_block(state)?;
390                        }
391                        else {
392                            state.eat(CSharpTokenType::Semicolon);
393                        }
394                        state.finish_at(cp, crate::parser::element_type::CSharpElementType::MethodDeclaration);
395                    }
396                    else if state.at(CSharpTokenType::LeftBrace) {
397                        // Property
398                        self.parse_accessor_block(state)?;
399                        state.finish_at(cp, crate::parser::element_type::CSharpElementType::PropertyDeclaration);
400                    }
401                    else {
402                        // Field
403                        state.eat(CSharpTokenType::Semicolon);
404                        state.finish_at(cp, crate::parser::element_type::CSharpElementType::FieldDeclaration);
405                    }
406                }
407            }
408        }
409        Ok(())
410    }
411
412    /// Parses an `if` statement.
413    fn parse_if_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
414        use crate::lexer::token_type::CSharpTokenType;
415        let cp = state.checkpoint();
416        state.bump(); // if
417        state.expect(CSharpTokenType::LeftParen).ok();
418        PrattParser::parse(state, 0, self);
419        state.expect(CSharpTokenType::RightParen).ok();
420        self.parse_statement(state)?;
421        if state.eat(CSharpTokenType::Else) {
422            self.parse_statement(state)?;
423        }
424        state.finish_at(cp, crate::parser::element_type::CSharpElementType::IfStatement);
425        Ok(())
426    }
427
428    /// Parses a `while` statement.
429    fn parse_while_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
430        use crate::lexer::token_type::CSharpTokenType;
431        let cp = state.checkpoint();
432        state.bump(); // while
433        state.expect(CSharpTokenType::LeftParen).ok();
434        PrattParser::parse(state, 0, self);
435        state.expect(CSharpTokenType::RightParen).ok();
436        self.parse_statement(state)?;
437        state.finish_at(cp, crate::parser::element_type::CSharpElementType::WhileStatement);
438        Ok(())
439    }
440
441    /// Parses a `for` statement.
442    fn parse_for_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
443        use crate::lexer::token_type::CSharpTokenType;
444        let cp = state.checkpoint();
445        state.bump(); // for
446        state.expect(CSharpTokenType::LeftParen).ok();
447        PrattParser::parse(state, 0, self);
448        state.expect(CSharpTokenType::RightParen).ok();
449        self.parse_statement(state)?;
450        state.finish_at(cp, crate::parser::element_type::CSharpElementType::ForStatement);
451        Ok(())
452    }
453
454    /// Parses a block statement enclosed in braces `{ ... }`.
455    fn parse_block<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
456        use crate::lexer::token_type::CSharpTokenType;
457        let cp = state.checkpoint();
458        state.expect(CSharpTokenType::LeftBrace).ok();
459        while state.not_at_end() && !state.at(CSharpTokenType::RightBrace) {
460            self.parse_statement(state)?;
461        }
462        state.expect(CSharpTokenType::RightBrace).ok();
463        state.finish_at(cp, crate::parser::element_type::CSharpElementType::Block);
464        Ok(())
465    }
466
467    /// Parses a `return` statement.
468    fn parse_return_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
469        use crate::lexer::token_type::CSharpTokenType;
470        let cp = state.checkpoint();
471        state.bump(); // return
472        if !state.at(CSharpTokenType::Semicolon) {
473            PrattParser::parse(state, 0, self);
474        }
475        state.eat(CSharpTokenType::Semicolon);
476        state.finish_at(cp, crate::parser::element_type::CSharpElementType::ReturnStatement);
477        Ok(())
478    }
479}
480
481impl<'config> Parser<CSharpLanguage> for CSharpParser<'config> {
482    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<CSharpLanguage>) -> ParseOutput<'a, CSharpLanguage> {
483        let lexer = crate::lexer::CSharpLexer::new(self._language);
484        parse_with_lexer(&lexer, text, edits, cache, |state| {
485            let cp = state.checkpoint();
486            while state.not_at_end() {
487                self.parse_statement(state)?;
488            }
489            Ok(state.finish_at(cp, crate::parser::element_type::CSharpElementType::Root))
490        })
491    }
492}