Skip to main content

oak_c/parser/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Element types for the C AST.
3pub mod element_type;
4pub use element_type::CElementType;
5
6use crate::{language::CLanguage, lexer::CTokenType};
7use oak_core::{
8    GreenNode, OakError, Source,
9    parser::{Associativity, ParseCache, ParseOutput, Parser, ParserState, Pratt, PrattParser, binary, parse_with_lexer},
10    source::TextEdit,
11};
12
13pub(crate) type State<'a, S> = ParserState<'a, CLanguage, S>;
14
15/// Parser for the C language.
16pub struct CParser<'config> {
17    /// Language configuration.
18    pub(crate) config: &'config CLanguage,
19}
20
21impl<'config> CParser<'config> {
22    /// Creates a new `CParser` with the given language configuration.
23    pub fn new(config: &'config CLanguage) -> Self {
24        Self { config }
25    }
26
27    /// Parses a C statement.
28    pub(crate) fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
29        use crate::lexer::CTokenType::*;
30        self.skip_trivia(state);
31        match state.peek_kind() {
32            Some(If) => self.parse_if_statement(state)?,
33            Some(While) => self.parse_while_statement(state)?,
34            Some(For) => self.parse_for_statement(state)?,
35            Some(Return) => self.parse_return_statement(state)?,
36            Some(LeftBrace) => self.parse_compound_statement(state)?,
37            Some(Struct) | Some(Union) | Some(Enum) | Some(Typedef) | Some(Extern) | Some(Static) | Some(Int) | Some(Char) | Some(Void) | Some(Float) | Some(Double) | Some(Long) | Some(Short) | Some(Signed) | Some(Unsigned) | Some(Bool) | Some(Const)
38            | Some(Restrict) | Some(Volatile) => self.parse_declaration(state)?,
39            Some(Preprocessor) => {
40                // Skip preprocessor directives
41                while state.not_at_end() && !state.at(CTokenType::Semicolon) {
42                    state.bump();
43                }
44                if state.at(CTokenType::Semicolon) {
45                    state.bump();
46                }
47            }
48            Some(Identifier) => {
49                // Check if it's a label
50                let cp = state.checkpoint();
51                state.bump();
52                if state.at(Colon) {
53                    state.bump();
54                    state.finish_at(cp, CElementType::ExpressionStatement);
55                }
56                else {
57                    // Handle identifier as expression
58                    let expr = PrattParser::parse(state, 0, self);
59                    state.push_child(expr);
60                    self.skip_trivia(state);
61                    if !state.eat(Semicolon) {
62                        // Skip until semicolon or end of statement
63                        while state.not_at_end() && !state.at(Semicolon) && !state.at(LeftBrace) && !state.at(RightBrace) {
64                            state.bump();
65                        }
66                        state.eat(Semicolon);
67                    }
68                }
69            }
70            Some(Semicolon) => {
71                // Empty statement
72                state.bump();
73            }
74            None => {
75                // End of file
76                return Ok(());
77            }
78            _ => {
79                // Unexpected token, recover by skipping until semicolon or brace
80                while state.not_at_end() && !state.at(Semicolon) && !state.at(LeftBrace) && !state.at(RightBrace) {
81                    state.bump();
82                }
83                if state.at(Semicolon) {
84                    state.bump();
85                }
86                else if state.at(LeftBrace) {
87                    // Handle left brace by parsing compound statement
88                    self.parse_compound_statement(state)?;
89                }
90                else if state.at(RightBrace) {
91                    // Handle right brace by consuming it
92                    state.bump();
93                }
94            }
95        }
96        Ok(())
97    }
98
99    /// Skips trivia tokens (whitespace and comments).
100    fn skip_trivia<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
101        while let Some(kind) = state.peek_kind() {
102            if matches!(kind, CTokenType::Whitespace | CTokenType::LineComment | CTokenType::BlockComment) {
103                state.bump();
104            }
105            else {
106                break;
107            }
108        }
109    }
110
111    /// Parses a C declaration or function definition.
112    fn parse_declaration<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
113        use crate::lexer::CTokenType::*;
114        let cp = state.checkpoint();
115
116        // Parse declaration specifiers
117        self.parse_declaration_specifiers(state)?;
118
119        // Parse declarator
120        self.skip_trivia(state);
121        if state.at(Identifier) || state.at(Star) {
122            self.parse_declarator(state)?;
123        }
124
125        self.skip_trivia(state);
126
127        if state.at(LeftBrace) {
128            // Function definition
129            self.parse_compound_statement(state)?;
130            state.finish_at(cp, CElementType::FunctionDefinition);
131        }
132        else {
133            // Variable declaration
134            // Parse initializer if present
135            if state.at(Assign) {
136                state.bump(); // =
137                self.skip_trivia(state);
138                let expr = PrattParser::parse(state, 0, self);
139                state.push_child(expr);
140            }
141
142            // Parse comma separator and additional declarators
143            while state.at(Comma) {
144                state.bump();
145                self.skip_trivia(state);
146                if state.at(Identifier) || state.at(Star) {
147                    self.parse_declarator(state)?;
148
149                    // Parse initializer if present
150                    if state.at(Assign) {
151                        state.bump(); // =
152                        self.skip_trivia(state);
153                        let expr = PrattParser::parse(state, 0, self);
154                        state.push_child(expr);
155                    }
156                }
157            }
158
159            state.eat(Semicolon);
160            state.finish_at(cp, CElementType::DeclarationStatement);
161        }
162
163        Ok(())
164    }
165
166    /// Parses declaration specifiers (storage class, type specifiers, type qualifiers).
167    fn parse_declaration_specifiers<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
168        use crate::lexer::CTokenType::*;
169
170        loop {
171            self.skip_trivia(state);
172            match state.peek_kind() {
173                // Storage class specifiers
174                Some(Extern) | Some(Static) | Some(Auto) | Some(Register) | Some(Typedef) => {
175                    state.bump();
176                }
177                // Type specifiers
178                Some(Void) | Some(Char) | Some(Short) | Some(Int) | Some(Long) | Some(Float) | Some(Double) | Some(Signed) | Some(Unsigned) | Some(Bool) => {
179                    state.bump();
180                }
181                Some(Struct) => {
182                    let cp = state.checkpoint();
183                    state.bump();
184                    // Parse struct name
185                    if state.peek_kind() == Some(Identifier) {
186                        state.bump();
187                    }
188                    if state.peek_kind() == Some(LeftBrace) {
189                        self.parse_struct_union_body(state)?;
190                    }
191                    state.finish_at(cp, CElementType::StructDefinition);
192                }
193                Some(Union) => {
194                    let cp = state.checkpoint();
195                    state.bump();
196                    // Parse union name
197                    if state.peek_kind() == Some(Identifier) {
198                        state.bump();
199                    }
200                    if state.peek_kind() == Some(LeftBrace) {
201                        self.parse_struct_union_body(state)?;
202                    }
203                    state.finish_at(cp, CElementType::UnionDefinition);
204                }
205                Some(Enum) => {
206                    let cp = state.checkpoint();
207                    state.bump();
208                    // Parse enum name
209                    if state.peek_kind() == Some(Identifier) {
210                        state.bump();
211                    }
212                    if state.peek_kind() == Some(LeftBrace) {
213                        self.parse_enum_body(state)?;
214                    }
215                    state.finish_at(cp, CElementType::EnumDefinition);
216                }
217                // Type qualifiers
218                Some(Const) | Some(Restrict) | Some(Volatile) => {
219                    state.bump();
220                }
221                _ => break,
222            }
223        }
224        Ok(())
225    }
226
227    /// Parses a declarator (variable name, pointer, array, function).
228    fn parse_declarator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
229        use crate::lexer::CTokenType::*;
230
231        // Parse pointers
232        while state.at(Star) {
233            state.bump();
234            // Parse type qualifiers after pointer
235            while state.at(Const) || state.at(Restrict) || state.at(Volatile) {
236                state.bump();
237            }
238        }
239
240        // Parse direct declarator
241        self.parse_direct_declarator(state)?;
242
243        Ok(())
244    }
245
246    /// Parses a direct declarator (identifier, array, function).
247    fn parse_direct_declarator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
248        use crate::lexer::CTokenType::*;
249
250        if state.at(Identifier) {
251            state.bump();
252        }
253        else if state.at(LeftParen) {
254            state.bump();
255            // Parse parenthesized declarator or expression
256            self.skip_trivia(state);
257
258            // Parse the content inside the parentheses
259            let mut paren_count = 1;
260            while state.not_at_end() && paren_count > 0 {
261                if state.at(LeftParen) {
262                    paren_count += 1;
263                }
264                else if state.at(RightParen) {
265                    paren_count -= 1;
266                }
267                if paren_count > 0 {
268                    state.bump();
269                }
270            }
271
272            state.expect(RightParen).ok();
273        }
274        else {
275            return Ok(());
276        }
277
278        // Parse array declarator
279        while state.at(LeftBracket) {
280            state.bump();
281            if !state.at(RightBracket) {
282                let expr = PrattParser::parse(state, 0, self);
283                state.push_child(expr);
284            }
285            state.expect(RightBracket).ok();
286        }
287
288        // Parse function declarator
289        if state.at(LeftParen) {
290            let pcp = state.checkpoint();
291            state.bump();
292
293            // Parse parameter list
294            while state.not_at_end() && !state.at(RightParen) {
295                self.skip_trivia(state);
296                if state.at(Void) {
297                    state.bump();
298                }
299                else if state.at(Identifier) || state.at(Char) || state.at(Int) || state.at(Float) || state.at(Double) {
300                    // Parse parameter declaration
301                    self.parse_declaration_specifiers(state)?;
302                    if state.at(Identifier) || state.at(Star) {
303                        // Parse parameter name without recursion
304                        self.skip_trivia(state);
305                        // Parse pointers
306                        while state.at(Star) {
307                            state.bump();
308                            // Parse type qualifiers after pointer
309                            while state.at(Const) || state.at(Restrict) || state.at(Volatile) {
310                                state.bump();
311                            }
312                        }
313                        // Parse identifier
314                        if state.at(Identifier) {
315                            state.bump();
316                        }
317                    }
318                }
319                if state.at(Comma) {
320                    state.bump();
321                }
322                else if !state.at(RightParen) {
323                    // Skip any unexpected tokens until comma or right paren
324                    while state.not_at_end() && !state.at(Comma) && !state.at(RightParen) {
325                        state.bump();
326                    }
327                    if state.at(Comma) {
328                        state.bump();
329                    }
330                }
331            }
332
333            state.expect(RightParen).ok();
334            state.finish_at(pcp, CElementType::ParameterList);
335        }
336
337        Ok(())
338    }
339
340    /// Parses struct/union body.
341    fn parse_struct_union_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
342        use crate::lexer::CTokenType::*;
343
344        if state.at(LeftBrace) {
345            state.bump();
346
347            while state.not_at_end() && !state.at(RightBrace) {
348                self.skip_trivia(state);
349                // Parse struct member declaration
350                let member_cp = state.checkpoint();
351                self.parse_declaration_specifiers(state)?;
352                while state.not_at_end() && !state.at(Semicolon) && !state.at(RightBrace) {
353                    if state.at(Identifier) || state.at(Star) {
354                        self.parse_declarator(state)?;
355                        if state.at(Comma) {
356                            state.bump();
357                        }
358                    }
359                    else {
360                        break;
361                    }
362                }
363                state.eat(Semicolon);
364                state.finish_at(member_cp, CElementType::StructMember);
365            }
366
367            state.expect(RightBrace).ok();
368        }
369
370        Ok(())
371    }
372
373    /// Parses enum body.
374    fn parse_enum_body<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
375        use crate::lexer::CTokenType::*;
376
377        if state.at(LeftBrace) {
378            state.bump();
379
380            while state.not_at_end() && !state.at(RightBrace) {
381                self.skip_trivia(state);
382                // Parse enum constant
383                if state.at(Identifier) {
384                    let const_cp = state.checkpoint();
385                    state.bump();
386                    // Parse optional initializer
387                    if state.at(Assign) {
388                        state.bump();
389                        let expr = PrattParser::parse(state, 0, self);
390                        state.push_child(expr);
391                    }
392                    state.finish_at(const_cp, CElementType::EnumConstant);
393                }
394
395                if state.at(Comma) {
396                    state.bump();
397                }
398                else if !state.at(RightBrace) {
399                    // Skip any unexpected tokens until comma or right brace
400                    while state.not_at_end() && !state.at(Comma) && !state.at(RightBrace) {
401                        state.bump();
402                    }
403                    if state.at(Comma) {
404                        state.bump();
405                    }
406                }
407            }
408
409            state.expect(RightBrace).ok();
410        }
411
412        Ok(())
413    }
414
415    fn parse_if_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
416        let cp = state.checkpoint();
417        state.bump(); // if
418        state.expect(CTokenType::LeftParen).ok();
419        let expr = PrattParser::parse(state, 0, self);
420        state.push_child(expr);
421        state.expect(CTokenType::RightParen).ok();
422        self.parse_statement(state)?;
423        if state.eat(CTokenType::Else) {
424            self.parse_statement(state)?;
425        }
426        state.finish_at(cp, CElementType::IfStatement);
427        Ok(())
428    }
429
430    fn parse_while_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
431        let cp = state.checkpoint();
432        state.bump(); // while
433        state.expect(CTokenType::LeftParen).ok();
434        let expr = PrattParser::parse(state, 0, self);
435        state.push_child(expr);
436        state.expect(CTokenType::RightParen).ok();
437        self.parse_statement(state)?;
438        state.finish_at(cp, CElementType::WhileStatement);
439        Ok(())
440    }
441
442    fn parse_for_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
443        let cp = state.checkpoint();
444        state.bump(); // for
445        state.expect(CTokenType::LeftParen).ok();
446
447        // Init
448        if !state.at(CTokenType::Semicolon) {
449            let expr = PrattParser::parse(state, 0, self);
450            state.push_child(expr);
451        }
452        state.expect(CTokenType::Semicolon).ok();
453
454        // Condition
455        if !state.at(CTokenType::Semicolon) {
456            let expr = PrattParser::parse(state, 0, self);
457            state.push_child(expr);
458        }
459        state.expect(CTokenType::Semicolon).ok();
460
461        // Increment
462        if !state.at(CTokenType::RightParen) {
463            let expr = PrattParser::parse(state, 0, self);
464            state.push_child(expr);
465        }
466        state.expect(CTokenType::RightParen).ok();
467
468        self.parse_statement(state)?;
469        state.finish_at(cp, CElementType::ForStatement);
470        Ok(())
471    }
472
473    fn parse_return_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
474        let cp = state.checkpoint();
475        state.bump(); // return
476        if !state.at(CTokenType::Semicolon) {
477            let expr = PrattParser::parse(state, 0, self);
478            state.push_child(expr);
479        }
480        state.eat(CTokenType::Semicolon);
481        state.finish_at(cp, CElementType::ReturnStatement);
482        Ok(())
483    }
484
485    fn parse_compound_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
486        let cp = state.checkpoint();
487        if !state.eat(CTokenType::LeftBrace) {
488            // Skip until right brace or end of file
489            while state.not_at_end() && !state.at(CTokenType::RightBrace) {
490                state.bump();
491            }
492            if state.at(CTokenType::RightBrace) {
493                state.bump();
494            }
495            state.finish_at(cp, CElementType::CompoundStatement);
496            return Ok(());
497        }
498
499        while state.not_at_end() && !state.at(CTokenType::RightBrace) {
500            self.parse_statement(state)?;
501        }
502
503        if !state.eat(CTokenType::RightBrace) {
504            // Skip until end of file or next statement
505            while state.not_at_end() && !state.at(CTokenType::Semicolon) && !state.at(CTokenType::LeftBrace) {
506                state.bump();
507            }
508        }
509
510        state.finish_at(cp, CElementType::CompoundStatement);
511        Ok(())
512    }
513}
514
515impl<'config> Pratt<CLanguage> for CParser<'config> {
516    fn primary<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, CLanguage> {
517        use crate::lexer::CTokenType::*;
518        self.skip_trivia(state);
519        let cp = state.checkpoint();
520        match state.peek_kind() {
521            Some(Identifier) => {
522                state.bump();
523                state.finish_at(cp, CElementType::Token(Identifier))
524            }
525            Some(IntConstant) | Some(FloatConstant) | Some(CharConstant) | Some(StringLiteral) => {
526                state.bump();
527                state.finish_at(cp, CElementType::ExpressionStatement) // Simplified processing
528            }
529            Some(LeftParen) => {
530                state.bump();
531                let expr = PrattParser::parse(state, 0, self);
532                state.push_child(expr);
533                self.skip_trivia(state);
534                state.expect(RightParen).ok();
535                state.finish_at(cp, CElementType::ExpressionStatement)
536            }
537            _ => {
538                state.bump();
539                state.finish_at(cp, CElementType::Error)
540            }
541        }
542    }
543
544    fn infix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, left: &'a GreenNode<'a, CLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, CLanguage>> {
545        use crate::lexer::CTokenType::*;
546        self.skip_trivia(state);
547        let kind = state.peek_kind()?;
548
549        let (prec, assoc) = match kind {
550            Assign | PlusAssign | MinusAssign | StarAssign | SlashAssign | PercentAssign | AndAssign | OrAssign | XorAssign | LeftShiftAssign | RightShiftAssign => (1, Associativity::Right),
551            LogicalOr => (2, Associativity::Left),
552            LogicalAnd => (3, Associativity::Left),
553            Equal | NotEqual | Less | Greater | LessEqual | GreaterEqual => (4, Associativity::Left),
554            Plus | Minus => (10, Associativity::Left),
555            Star | Slash | Percent => (11, Associativity::Left),
556            LeftParen | LeftBracket | Dot | Arrow => (15, Associativity::Left),
557            _ => return None,
558        };
559
560        if prec < min_precedence {
561            return None;
562        }
563
564        match kind {
565            LeftParen => {
566                let cp = state.checkpoint();
567                state.push_child(left);
568                state.expect(LeftParen).ok();
569                while state.not_at_end() && !state.at(RightParen) {
570                    let expr = PrattParser::parse(state, 0, self);
571                    state.push_child(expr);
572                    self.skip_trivia(state);
573                    if !state.eat(Comma) {
574                        break;
575                    }
576                }
577                state.expect(RightParen).ok();
578                Some(state.finish_at(cp, CElementType::FunctionCall))
579            }
580            LeftBracket => {
581                let cp = state.checkpoint();
582                state.push_child(left);
583                state.expect(LeftBracket).ok();
584                let expr = PrattParser::parse(state, 0, self);
585                state.push_child(expr);
586                state.expect(RightBracket).ok();
587                Some(state.finish_at(cp, CElementType::ExpressionStatement))
588            }
589            Dot | Arrow => {
590                let cp = state.checkpoint();
591                state.push_child(left);
592                state.expect(kind).ok();
593                state.expect(Identifier).ok();
594                Some(state.finish_at(cp, CElementType::ExpressionStatement))
595            }
596            _ => Some(binary(state, left, kind, prec, assoc, CElementType::ExpressionStatement, |s, p| PrattParser::parse(s, p, self))),
597        }
598    }
599}
600
601impl<'config> Parser<CLanguage> for CParser<'config> {
602    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<CLanguage>) -> ParseOutput<'a, CLanguage> {
603        let lexer = crate::lexer::CLexer::new(self.config);
604        parse_with_lexer(&lexer, text, edits, cache, |state| {
605            let cp = state.checkpoint();
606            while state.not_at_end() {
607                self.parse_statement(state).ok();
608            }
609            Ok(state.finish_at(cp, CElementType::Root))
610        })
611    }
612}