Skip to main content

oak_cpp/parser/
mod.rs

1#![doc = include_str!("readme.md")]
2pub mod element_type;
3pub use element_type::CppElementType;
4
5use crate::{
6    language::CppLanguage,
7    lexer::{CppLexer, CppTokenType},
8};
9use oak_core::{
10    GreenNode, OakError,
11    parser::{Associativity, ParseCache, ParseOutput, Parser, ParserState, Pratt, PrattParser, parse_with_lexer},
12    source::{Source, TextEdit},
13};
14
15pub(crate) type State<'a, S> = ParserState<'a, CppLanguage, S>;
16
17/// Parser for the C++ language.
18///
19/// This parser transforms a stream of tokens into a green tree of C++ syntax nodes,
20/// using a combination of top-down recursive descent and Pratt parsing for expressions.
21pub struct CppParser<'config> {
22    pub(crate) config: &'config CppLanguage,
23}
24
25impl<'config> CppParser<'config> {
26    /// Creates a new `CppParser` with the given configuration.
27    pub fn new(config: &'config CppLanguage) -> Self {
28        Self { config }
29    }
30
31    /// Parses a single C++ statement.
32    ///
33    /// This includes keywords, compound statements, preprocessor directives,
34    /// and expressions followed by a semicolon.
35    fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
36        use crate::lexer::CppTokenType::*;
37        match state.peek_kind() {
38            Some(Keyword) => {
39                state.bump();
40                while state.not_at_end() && !state.at(Semicolon) {
41                    state.advance()
42                }
43                state.eat(Semicolon);
44            }
45            Some(LeftBrace) => self.parse_compound_statement(state)?,
46            Some(Preprocessor) => state.bump(),
47            _ => {
48                PrattParser::parse(state, 0, self);
49                state.eat(Semicolon);
50            }
51        }
52        Ok(())
53    }
54
55    /// Parses a compound statement (a block of statements enclosed in braces).
56    fn parse_compound_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
57        state.expect(CppTokenType::LeftBrace).ok();
58        while state.not_at_end() && !state.at(CppTokenType::RightBrace) {
59            self.parse_statement(state)?
60        }
61        state.expect(CppTokenType::RightBrace).ok();
62        Ok(())
63    }
64}
65
66impl<'config> Parser<CppLanguage> for CppParser<'config> {
67    /// Parses the entire C++ source file.
68    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<CppLanguage>) -> ParseOutput<'a, CppLanguage> {
69        let lexer = CppLexer::new(self.config);
70        parse_with_lexer(&lexer, text, edits, cache, |state| {
71            let cp = state.checkpoint();
72            while state.not_at_end() {
73                self.parse_statement(state)?
74            }
75            Ok(state.finish_at(cp, CppElementType::SourceFile))
76        })
77    }
78}
79
80impl<'config> Pratt<CppLanguage> for CppParser<'config> {
81    /// Parses a primary expression (e.g., identifiers, literals, parenthesized expressions).
82    fn primary<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, CppLanguage> {
83        use crate::lexer::CppTokenType::*;
84        let cp = state.checkpoint();
85        match state.peek_kind() {
86            Some(Identifier) => {
87                state.bump();
88                state.finish_at(cp, CppElementType::SourceFile) // Simplified handling
89            }
90            Some(IntegerLiteral) | Some(FloatLiteral) | Some(CharacterLiteral) | Some(StringLiteral) | Some(BooleanLiteral) => {
91                state.bump();
92                state.finish_at(cp, CppElementType::SourceFile) // Simplified handling
93            }
94            Some(LeftParen) => {
95                state.bump();
96                PrattParser::parse(state, 0, self);
97                state.expect(RightParen).ok();
98                state.finish_at(cp, CppElementType::SourceFile)
99            }
100            _ => {
101                state.bump();
102                state.finish_at(cp, CppElementType::Error)
103            }
104        }
105    }
106
107    fn prefix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, CppLanguage> {
108        self.primary(state)
109    }
110
111    fn infix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, left: &'a GreenNode<'a, CppLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, CppLanguage>> {
112        use crate::lexer::CppTokenType::*;
113        let kind = state.peek_kind()?;
114
115        let (prec, assoc) = match kind {
116            Assign | PlusAssign | MinusAssign | StarAssign | SlashAssign | PercentAssign | AndAssign | OrAssign | XorAssign | LeftShiftAssign | RightShiftAssign => (1, Associativity::Right),
117            LogicalOr => (2, Associativity::Left),
118            LogicalAnd => (3, Associativity::Left),
119            Equal | NotEqual | Less | Greater | LessEqual | GreaterEqual => (4, Associativity::Left),
120            Plus | Minus => (10, Associativity::Left),
121            Star | Slash | Percent => (11, Associativity::Left),
122            LeftParen | LeftBracket | Dot | Arrow => (15, Associativity::Left),
123            Scope => (16, Associativity::Left),
124            _ => return None,
125        };
126
127        if prec < min_precedence {
128            return None;
129        }
130
131        match kind {
132            LeftParen => {
133                let cp = state.checkpoint();
134                state.push_child(left);
135                state.expect(LeftParen).ok();
136                while state.not_at_end() && !state.at(RightParen) {
137                    state.bump()
138                }
139                state.expect(RightParen).ok();
140                Some(state.finish_at(cp, CppElementType::SourceFile))
141            }
142            _ => {
143                let cp = state.checkpoint();
144                state.push_child(left);
145                state.bump();
146                let right = PrattParser::parse(state, prec + (assoc as u8), self);
147                state.push_child(right);
148                Some(state.finish_at(cp, CppElementType::SourceFile))
149            }
150        }
151    }
152}