Skip to main content

oak_cpp/parser/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Element type definition.
3pub mod element_type;
4pub use element_type::CppElementType;
5
6use crate::{
7    language::CppLanguage,
8    lexer::{CppLexer, CppTokenType},
9};
10use oak_core::{
11    GreenNode, OakError,
12    parser::{Associativity, ParseCache, ParseOutput, Parser, ParserState, Pratt, PrattParser, parse_with_lexer},
13    source::{Source, TextEdit},
14};
15
16pub(crate) type State<'a, S> = ParserState<'a, CppLanguage, S>;
17
18/// Parser for the C++ language.
19///
20/// This parser transforms a stream of tokens into a green tree of C++ syntax nodes,
21/// using a combination of top-down recursive descent and Pratt parsing for expressions.
22pub struct CppParser<'config> {
23    pub(crate) config: &'config CppLanguage,
24}
25
26impl<'config> CppParser<'config> {
27    /// Creates a new `CppParser` with the given configuration.
28    pub fn new(config: &'config CppLanguage) -> Self {
29        Self { config }
30    }
31
32    /// Parses a single C++ statement.
33    ///
34    /// This includes keywords, compound statements, preprocessor directives,
35    /// and expressions followed by a semicolon.
36    fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
37        use crate::lexer::CppTokenType::*;
38        match state.peek_kind() {
39            Some(Keyword) => {
40                state.bump();
41                while state.not_at_end() && !state.at(Semicolon) {
42                    state.advance()
43                }
44                state.eat(Semicolon);
45            }
46            Some(LeftBrace) => self.parse_compound_statement(state)?,
47            Some(Preprocessor) => state.bump(),
48            _ => {
49                PrattParser::parse(state, 0, self);
50                state.eat(Semicolon);
51            }
52        }
53        Ok(())
54    }
55
56    /// Parses a compound statement (a block of statements enclosed in braces).
57    fn parse_compound_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
58        state.expect(CppTokenType::LeftBrace).ok();
59        while state.not_at_end() && !state.at(CppTokenType::RightBrace) {
60            self.parse_statement(state)?
61        }
62        state.expect(CppTokenType::RightBrace).ok();
63        Ok(())
64    }
65}
66
67impl<'config> Parser<CppLanguage> for CppParser<'config> {
68    /// Parses the entire C++ source file.
69    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<CppLanguage>) -> ParseOutput<'a, CppLanguage> {
70        let lexer = CppLexer::new(self.config);
71        parse_with_lexer(&lexer, text, edits, cache, |state| {
72            let cp = state.checkpoint();
73            while state.not_at_end() {
74                self.parse_statement(state)?
75            }
76            Ok(state.finish_at(cp, CppElementType::SourceFile))
77        })
78    }
79}
80
81impl<'config> Pratt<CppLanguage> for CppParser<'config> {
82    /// Parses a primary expression (e.g., identifiers, literals, parenthesized expressions).
83    fn primary<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, CppLanguage> {
84        use crate::lexer::CppTokenType::*;
85        let cp = state.checkpoint();
86        match state.peek_kind() {
87            Some(Identifier) => {
88                state.bump();
89                state.finish_at(cp, CppElementType::SourceFile) // Simplified handling
90            }
91            Some(IntegerLiteral) | Some(FloatLiteral) | Some(CharacterLiteral) | Some(StringLiteral) | Some(BooleanLiteral) => {
92                state.bump();
93                state.finish_at(cp, CppElementType::SourceFile) // Simplified handling
94            }
95            Some(LeftParen) => {
96                state.bump();
97                PrattParser::parse(state, 0, self);
98                state.expect(RightParen).ok();
99                state.finish_at(cp, CppElementType::SourceFile)
100            }
101            _ => {
102                state.bump();
103                state.finish_at(cp, CppElementType::Error)
104            }
105        }
106    }
107
108    fn prefix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, CppLanguage> {
109        self.primary(state)
110    }
111
112    fn infix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, left: &'a GreenNode<'a, CppLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, CppLanguage>> {
113        use crate::lexer::CppTokenType::*;
114        let kind = state.peek_kind()?;
115
116        let (prec, assoc) = match kind {
117            Assign | PlusAssign | MinusAssign | StarAssign | SlashAssign | PercentAssign | AndAssign | OrAssign | XorAssign | LeftShiftAssign | RightShiftAssign => (1, Associativity::Right),
118            LogicalOr => (2, Associativity::Left),
119            LogicalAnd => (3, Associativity::Left),
120            Equal | NotEqual | Less | Greater | LessEqual | GreaterEqual => (4, Associativity::Left),
121            Plus | Minus => (10, Associativity::Left),
122            Star | Slash | Percent => (11, Associativity::Left),
123            LeftParen | LeftBracket | Dot | Arrow => (15, Associativity::Left),
124            Scope => (16, Associativity::Left),
125            _ => return None,
126        };
127
128        if prec < min_precedence {
129            return None;
130        }
131
132        match kind {
133            LeftParen => {
134                let cp = state.checkpoint();
135                state.push_child(left);
136                state.expect(LeftParen).ok();
137                while state.not_at_end() && !state.at(RightParen) {
138                    state.bump()
139                }
140                state.expect(RightParen).ok();
141                Some(state.finish_at(cp, CppElementType::SourceFile))
142            }
143            _ => {
144                let cp = state.checkpoint();
145                state.push_child(left);
146                state.bump();
147                let right = PrattParser::parse(state, prec + (assoc as u8), self);
148                state.push_child(right);
149                Some(state.finish_at(cp, CppElementType::SourceFile))
150            }
151        }
152    }
153}