Skip to main content

oak_c/parser/
mod.rs

1#![doc = include_str!("readme.md")]
2pub mod element_type;
3pub use element_type::CElementType;
4
5use crate::{language::CLanguage, lexer::CTokenType};
6use oak_core::{
7    GreenNode, OakError, Source,
8    parser::{Associativity, ParseCache, ParseOutput, Parser, ParserState, Pratt, PrattParser, binary, parse_with_lexer},
9    source::TextEdit,
10};
11
12pub(crate) type State<'a, S> = ParserState<'a, CLanguage, S>;
13
14/// Parser for the C language.
15pub struct CParser<'config> {
16    /// Language configuration.
17    pub(crate) config: &'config CLanguage,
18}
19
20impl<'config> CParser<'config> {
21    /// Creates a new `CParser` with the given language configuration.
22    pub fn new(config: &'config CLanguage) -> Self {
23        Self { config }
24    }
25
26    /// Parses a C statement.
27    pub(crate) fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
28        use crate::lexer::CTokenType::*;
29        self.skip_trivia(state);
30        match state.peek_kind() {
31            Some(If) => self.parse_if_statement(state)?,
32            Some(While) => self.parse_while_statement(state)?,
33            Some(For) => self.parse_for_statement(state)?,
34            Some(Return) => self.parse_return_statement(state)?,
35            Some(LeftBrace) => self.parse_compound_statement(state)?,
36            Some(Struct) | Some(Union) | Some(Enum) | Some(Typedef) | Some(Extern) | Some(Static) | Some(Int) | Some(Char) | Some(Void) | Some(Float) | Some(Double) => self.parse_declaration(state)?,
37            _ => {
38                let expr = PrattParser::parse(state, 0, self);
39                state.push_child(expr);
40                self.skip_trivia(state);
41                state.eat(Semicolon);
42            }
43        }
44        Ok(())
45    }
46
47    /// Skips trivia tokens (whitespace and comments).
48    fn skip_trivia<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
49        while let Some(kind) = state.peek_kind() {
50            if matches!(kind, CTokenType::Whitespace | CTokenType::Comment) {
51                state.bump();
52            }
53            else {
54                break;
55            }
56        }
57    }
58
59    /// Parses a C declaration or function definition.
60    fn parse_declaration<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
61        use crate::lexer::CTokenType::*;
62        let cp = state.checkpoint();
63
64        self.skip_trivia(state);
65        while state.not_at_end() && !state.at(Semicolon) && !state.at(LeftBrace) && !state.at(Assign) && !state.at(LeftParen) {
66            state.bump();
67            self.skip_trivia(state);
68        }
69
70        if state.at(LeftParen) {
71            let pcp = state.checkpoint();
72            state.bump(); // (
73            self.skip_trivia(state);
74            while state.not_at_end() && !state.at(RightParen) {
75                if state.at(Identifier) || state.at(Int) || state.at(Char) || state.at(Float) || state.at(Double) || state.at(Void) {
76                    state.bump();
77                }
78                else if state.at(Comma) {
79                    state.bump();
80                }
81                else {
82                    state.bump();
83                }
84                self.skip_trivia(state);
85            }
86            state.expect(RightParen).ok();
87            state.finish_at(pcp, CElementType::ParameterList);
88        }
89
90        self.skip_trivia(state);
91
92        if state.at(Assign) {
93            state.bump(); // =
94            self.skip_trivia(state);
95            let expr = PrattParser::parse(state, 0, self);
96            state.push_child(expr);
97        }
98
99        self.skip_trivia(state);
100
101        if state.at(LeftBrace) {
102            self.parse_compound_statement(state)?;
103            state.finish_at(cp, CElementType::FunctionDefinition);
104        }
105        else {
106            state.eat(Semicolon);
107            state.finish_at(cp, CElementType::DeclarationStatement);
108        }
109
110        Ok(())
111    }
112
113    fn parse_if_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
114        let cp = state.checkpoint();
115        state.bump(); // if
116        state.expect(CTokenType::LeftParen).ok();
117        let expr = PrattParser::parse(state, 0, self);
118        state.push_child(expr);
119        state.expect(CTokenType::RightParen).ok();
120        self.parse_statement(state)?;
121        if state.eat(CTokenType::Else) {
122            self.parse_statement(state)?;
123        }
124        state.finish_at(cp, CElementType::IfStatement);
125        Ok(())
126    }
127
128    fn parse_while_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
129        let cp = state.checkpoint();
130        state.bump(); // while
131        state.expect(CTokenType::LeftParen).ok();
132        let expr = PrattParser::parse(state, 0, self);
133        state.push_child(expr);
134        state.expect(CTokenType::RightParen).ok();
135        self.parse_statement(state)?;
136        state.finish_at(cp, CElementType::WhileStatement);
137        Ok(())
138    }
139
140    fn parse_for_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
141        let cp = state.checkpoint();
142        state.bump(); // for
143        state.expect(CTokenType::LeftParen).ok();
144
145        // Init
146        if !state.at(CTokenType::Semicolon) {
147            let expr = PrattParser::parse(state, 0, self);
148            state.push_child(expr);
149        }
150        state.expect(CTokenType::Semicolon).ok();
151
152        // Condition
153        if !state.at(CTokenType::Semicolon) {
154            let expr = PrattParser::parse(state, 0, self);
155            state.push_child(expr);
156        }
157        state.expect(CTokenType::Semicolon).ok();
158
159        // Increment
160        if !state.at(CTokenType::RightParen) {
161            let expr = PrattParser::parse(state, 0, self);
162            state.push_child(expr);
163        }
164        state.expect(CTokenType::RightParen).ok();
165
166        self.parse_statement(state)?;
167        state.finish_at(cp, CElementType::ForStatement);
168        Ok(())
169    }
170
171    fn parse_return_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
172        let cp = state.checkpoint();
173        state.bump(); // return
174        if !state.at(CTokenType::Semicolon) {
175            let expr = PrattParser::parse(state, 0, self);
176            state.push_child(expr);
177        }
178        state.eat(CTokenType::Semicolon);
179        state.finish_at(cp, CElementType::ReturnStatement);
180        Ok(())
181    }
182
183    fn parse_compound_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
184        let cp = state.checkpoint();
185        state.expect(CTokenType::LeftBrace).ok();
186        while state.not_at_end() && !state.at(CTokenType::RightBrace) {
187            self.parse_statement(state)?;
188        }
189        state.expect(CTokenType::RightBrace).ok();
190        state.finish_at(cp, CElementType::CompoundStatement);
191        Ok(())
192    }
193}
194
195impl<'config> Pratt<CLanguage> for CParser<'config> {
196    fn primary<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, CLanguage> {
197        use crate::lexer::CTokenType::*;
198        self.skip_trivia(state);
199        let cp = state.checkpoint();
200        match state.peek_kind() {
201            Some(Identifier) => {
202                state.bump();
203                state.finish_at(cp, CElementType::Token(Identifier))
204            }
205            Some(IntegerLiteral) | Some(FloatLiteral) | Some(CharLiteral) | Some(StringLiteral) => {
206                let _kind = state.peek_kind().unwrap();
207                state.bump();
208                state.finish_at(cp, CElementType::ExpressionStatement) // įŽ€åŒ–å¤„į†
209            }
210            Some(LeftParen) => {
211                state.bump();
212                let expr = PrattParser::parse(state, 0, self);
213                state.push_child(expr);
214                self.skip_trivia(state);
215                state.expect(RightParen).ok();
216                state.finish_at(cp, CElementType::ExpressionStatement)
217            }
218            _ => {
219                state.bump();
220                state.finish_at(cp, CElementType::Error)
221            }
222        }
223    }
224
225    fn infix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, left: &'a GreenNode<'a, CLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, CLanguage>> {
226        use crate::lexer::CTokenType::*;
227        self.skip_trivia(state);
228        let kind = state.peek_kind()?;
229
230        let (prec, assoc) = match kind {
231            Assign | PlusAssign | MinusAssign | StarAssign | SlashAssign | PercentAssign | AndAssign | OrAssign | XorAssign | LeftShiftAssign | RightShiftAssign => (1, Associativity::Right),
232            LogicalOr => (2, Associativity::Left),
233            LogicalAnd => (3, Associativity::Left),
234            Equal | NotEqual | Less | Greater | LessEqual | GreaterEqual => (4, Associativity::Left),
235            Plus | Minus => (10, Associativity::Left),
236            Star | Slash | Percent => (11, Associativity::Left),
237            LeftParen | LeftBracket | Dot | Arrow => (15, Associativity::Left),
238            _ => return None,
239        };
240
241        if prec < min_precedence {
242            return None;
243        }
244
245        match kind {
246            LeftParen => {
247                let cp = state.checkpoint();
248                state.push_child(left);
249                state.expect(LeftParen).ok();
250                while state.not_at_end() && !state.at(RightParen) {
251                    let expr = PrattParser::parse(state, 0, self);
252                    state.push_child(expr);
253                    self.skip_trivia(state);
254                    if !state.eat(Comma) {
255                        break;
256                    }
257                }
258                state.expect(RightParen).ok();
259                Some(state.finish_at(cp, CElementType::ExpressionStatement))
260            }
261            LeftBracket => {
262                let cp = state.checkpoint();
263                state.push_child(left);
264                state.expect(LeftBracket).ok();
265                let expr = PrattParser::parse(state, 0, self);
266                state.push_child(expr);
267                state.expect(RightBracket).ok();
268                Some(state.finish_at(cp, CElementType::ExpressionStatement))
269            }
270            Dot | Arrow => {
271                let cp = state.checkpoint();
272                state.push_child(left);
273                state.expect(kind).ok();
274                state.expect(Identifier).ok();
275                Some(state.finish_at(cp, CElementType::ExpressionStatement))
276            }
277            _ => Some(binary(state, left, kind, prec, assoc, CElementType::ExpressionStatement, |s, p| PrattParser::parse(s, p, self))),
278        }
279    }
280}
281
282impl<'config> Parser<CLanguage> for CParser<'config> {
283    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<CLanguage>) -> ParseOutput<'a, CLanguage> {
284        let lexer = crate::lexer::CLexer::new(self.config);
285        parse_with_lexer(&lexer, text, edits, cache, |state| {
286            let cp = state.checkpoint();
287            while state.not_at_end() {
288                self.parse_statement(state).ok();
289            }
290            Ok(state.finish_at(cp, CElementType::Root))
291        })
292    }
293}