Skip to main content

oak_c/parser/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Element types for the C AST.
3pub mod element_type;
4pub use element_type::CElementType;
5
6use crate::{language::CLanguage, lexer::CTokenType};
7use oak_core::{
8    GreenNode, OakError, Source,
9    parser::{Associativity, ParseCache, ParseOutput, Parser, ParserState, Pratt, PrattParser, binary, parse_with_lexer},
10    source::TextEdit,
11};
12
13pub(crate) type State<'a, S> = ParserState<'a, CLanguage, S>;
14
15/// Parser for the C language.
16pub struct CParser<'config> {
17    /// Language configuration.
18    pub(crate) config: &'config CLanguage,
19}
20
21impl<'config> CParser<'config> {
22    /// Creates a new `CParser` with the given language configuration.
23    pub fn new(config: &'config CLanguage) -> Self {
24        Self { config }
25    }
26
27    /// Parses a C statement.
28    pub(crate) fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
29        use crate::lexer::CTokenType::*;
30        self.skip_trivia(state);
31        match state.peek_kind() {
32            Some(If) => self.parse_if_statement(state)?,
33            Some(While) => self.parse_while_statement(state)?,
34            Some(For) => self.parse_for_statement(state)?,
35            Some(Return) => self.parse_return_statement(state)?,
36            Some(LeftBrace) => self.parse_compound_statement(state)?,
37            Some(Struct) | Some(Union) | Some(Enum) | Some(Typedef) | Some(Extern) | Some(Static) | Some(Int) | Some(Char) | Some(Void) | Some(Float) | Some(Double) => self.parse_declaration(state)?,
38            _ => {
39                let expr = PrattParser::parse(state, 0, self);
40                state.push_child(expr);
41                self.skip_trivia(state);
42                state.eat(Semicolon);
43            }
44        }
45        Ok(())
46    }
47
48    /// Skips trivia tokens (whitespace and comments).
49    fn skip_trivia<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
50        while let Some(kind) = state.peek_kind() {
51            if matches!(kind, CTokenType::Whitespace | CTokenType::LineComment | CTokenType::BlockComment) {
52                state.bump();
53            }
54            else {
55                break;
56            }
57        }
58    }
59
60    /// Parses a C declaration or function definition.
61    fn parse_declaration<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
62        use crate::lexer::CTokenType::*;
63        let cp = state.checkpoint();
64
65        self.skip_trivia(state);
66        while state.not_at_end() && !state.at(Semicolon) && !state.at(LeftBrace) && !state.at(Assign) && !state.at(LeftParen) {
67            state.bump();
68            self.skip_trivia(state);
69        }
70
71        if state.at(LeftParen) {
72            let pcp = state.checkpoint();
73            state.bump(); // (
74            self.skip_trivia(state);
75            while state.not_at_end() && !state.at(RightParen) {
76                if state.at(Identifier) || state.at(Int) || state.at(Char) || state.at(Float) || state.at(Double) || state.at(Void) {
77                    state.bump();
78                }
79                else if state.at(Comma) {
80                    state.bump();
81                }
82                else {
83                    state.bump();
84                }
85                self.skip_trivia(state);
86            }
87            state.expect(RightParen).ok();
88            state.finish_at(pcp, CElementType::ParameterList);
89        }
90
91        self.skip_trivia(state);
92
93        if state.at(Assign) {
94            state.bump(); // =
95            self.skip_trivia(state);
96            let expr = PrattParser::parse(state, 0, self);
97            state.push_child(expr);
98        }
99
100        self.skip_trivia(state);
101
102        if state.at(LeftBrace) {
103            self.parse_compound_statement(state)?;
104            state.finish_at(cp, CElementType::FunctionDefinition);
105        }
106        else {
107            state.eat(Semicolon);
108            state.finish_at(cp, CElementType::DeclarationStatement);
109        }
110
111        Ok(())
112    }
113
114    fn parse_if_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
115        let cp = state.checkpoint();
116        state.bump(); // if
117        state.expect(CTokenType::LeftParen).ok();
118        let expr = PrattParser::parse(state, 0, self);
119        state.push_child(expr);
120        state.expect(CTokenType::RightParen).ok();
121        self.parse_statement(state)?;
122        if state.eat(CTokenType::Else) {
123            self.parse_statement(state)?;
124        }
125        state.finish_at(cp, CElementType::IfStatement);
126        Ok(())
127    }
128
129    fn parse_while_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
130        let cp = state.checkpoint();
131        state.bump(); // while
132        state.expect(CTokenType::LeftParen).ok();
133        let expr = PrattParser::parse(state, 0, self);
134        state.push_child(expr);
135        state.expect(CTokenType::RightParen).ok();
136        self.parse_statement(state)?;
137        state.finish_at(cp, CElementType::WhileStatement);
138        Ok(())
139    }
140
141    fn parse_for_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
142        let cp = state.checkpoint();
143        state.bump(); // for
144        state.expect(CTokenType::LeftParen).ok();
145
146        // Init
147        if !state.at(CTokenType::Semicolon) {
148            let expr = PrattParser::parse(state, 0, self);
149            state.push_child(expr);
150        }
151        state.expect(CTokenType::Semicolon).ok();
152
153        // Condition
154        if !state.at(CTokenType::Semicolon) {
155            let expr = PrattParser::parse(state, 0, self);
156            state.push_child(expr);
157        }
158        state.expect(CTokenType::Semicolon).ok();
159
160        // Increment
161        if !state.at(CTokenType::RightParen) {
162            let expr = PrattParser::parse(state, 0, self);
163            state.push_child(expr);
164        }
165        state.expect(CTokenType::RightParen).ok();
166
167        self.parse_statement(state)?;
168        state.finish_at(cp, CElementType::ForStatement);
169        Ok(())
170    }
171
172    fn parse_return_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
173        let cp = state.checkpoint();
174        state.bump(); // return
175        if !state.at(CTokenType::Semicolon) {
176            let expr = PrattParser::parse(state, 0, self);
177            state.push_child(expr);
178        }
179        state.eat(CTokenType::Semicolon);
180        state.finish_at(cp, CElementType::ReturnStatement);
181        Ok(())
182    }
183
184    fn parse_compound_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
185        let cp = state.checkpoint();
186        state.expect(CTokenType::LeftBrace).ok();
187        while state.not_at_end() && !state.at(CTokenType::RightBrace) {
188            self.parse_statement(state)?;
189        }
190        state.expect(CTokenType::RightBrace).ok();
191        state.finish_at(cp, CElementType::CompoundStatement);
192        Ok(())
193    }
194}
195
196impl<'config> Pratt<CLanguage> for CParser<'config> {
197    fn primary<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, CLanguage> {
198        use crate::lexer::CTokenType::*;
199        self.skip_trivia(state);
200        let cp = state.checkpoint();
201        match state.peek_kind() {
202            Some(Identifier) => {
203                state.bump();
204                state.finish_at(cp, CElementType::Token(Identifier))
205            }
206            Some(IntConstant) | Some(FloatConstant) | Some(CharConstant) | Some(StringLiteral) => {
207                state.bump();
208                state.finish_at(cp, CElementType::ExpressionStatement) // Simplified processing
209            }
210            Some(LeftParen) => {
211                state.bump();
212                let expr = PrattParser::parse(state, 0, self);
213                state.push_child(expr);
214                self.skip_trivia(state);
215                state.expect(RightParen).ok();
216                state.finish_at(cp, CElementType::ExpressionStatement)
217            }
218            _ => {
219                state.bump();
220                state.finish_at(cp, CElementType::Error)
221            }
222        }
223    }
224
225    fn infix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, left: &'a GreenNode<'a, CLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, CLanguage>> {
226        use crate::lexer::CTokenType::*;
227        self.skip_trivia(state);
228        let kind = state.peek_kind()?;
229
230        let (prec, assoc) = match kind {
231            Assign | PlusAssign | MinusAssign | StarAssign | SlashAssign | PercentAssign | AndAssign | OrAssign | XorAssign | LeftShiftAssign | RightShiftAssign => (1, Associativity::Right),
232            LogicalOr => (2, Associativity::Left),
233            LogicalAnd => (3, Associativity::Left),
234            Equal | NotEqual | Less | Greater | LessEqual | GreaterEqual => (4, Associativity::Left),
235            Plus | Minus => (10, Associativity::Left),
236            Star | Slash | Percent => (11, Associativity::Left),
237            LeftParen | LeftBracket | Dot | Arrow => (15, Associativity::Left),
238            _ => return None,
239        };
240
241        if prec < min_precedence {
242            return None;
243        }
244
245        match kind {
246            LeftParen => {
247                let cp = state.checkpoint();
248                state.push_child(left);
249                state.expect(LeftParen).ok();
250                while state.not_at_end() && !state.at(RightParen) {
251                    let expr = PrattParser::parse(state, 0, self);
252                    state.push_child(expr);
253                    self.skip_trivia(state);
254                    if !state.eat(Comma) {
255                        break;
256                    }
257                }
258                state.expect(RightParen).ok();
259                Some(state.finish_at(cp, CElementType::ExpressionStatement))
260            }
261            LeftBracket => {
262                let cp = state.checkpoint();
263                state.push_child(left);
264                state.expect(LeftBracket).ok();
265                let expr = PrattParser::parse(state, 0, self);
266                state.push_child(expr);
267                state.expect(RightBracket).ok();
268                Some(state.finish_at(cp, CElementType::ExpressionStatement))
269            }
270            Dot | Arrow => {
271                let cp = state.checkpoint();
272                state.push_child(left);
273                state.expect(kind).ok();
274                state.expect(Identifier).ok();
275                Some(state.finish_at(cp, CElementType::ExpressionStatement))
276            }
277            _ => Some(binary(state, left, kind, prec, assoc, CElementType::ExpressionStatement, |s, p| PrattParser::parse(s, p, self))),
278        }
279    }
280}
281
282impl<'config> Parser<CLanguage> for CParser<'config> {
283    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<CLanguage>) -> ParseOutput<'a, CLanguage> {
284        let lexer = crate::lexer::CLexer::new(self.config);
285        parse_with_lexer(&lexer, text, edits, cache, |state| {
286            let cp = state.checkpoint();
287            while state.not_at_end() {
288                self.parse_statement(state).ok();
289            }
290            Ok(state.finish_at(cp, CElementType::Root))
291        })
292    }
293}