rawk_core/
parser.rs

1use crate::{
2    Lexer, Program,
3    ast::{Action, Expression, Rule, Statement},
4    token::{Token, TokenKind},
5};
6
7#[derive(Debug)]
8pub struct Parser<'a> {
9    lexer: Lexer<'a>,
10    current_token: Token<'a>,
11}
12
13impl<'a> Parser<'a> {
14    pub fn new(mut lexer: Lexer<'a>) -> Self {
15        // Enable regex parsing for the first token since it could be a pattern
16        lexer.set_allow_regex(true);
17        let current_token = lexer.next_token();
18        lexer.set_allow_regex(false);
19
20        Parser {
21            lexer,
22            current_token,
23        }
24    }
25
26    fn next_token(&mut self) {
27        self.next_token_with_regex(false);
28    }
29
30    fn next_token_with_regex(&mut self, allow_regex: bool) {
31        self.lexer.set_allow_regex(allow_regex);
32        self.current_token = self.lexer.next_token();
33        self.lexer.set_allow_regex(false);
34    }
35
36    fn is_eof(&self) -> bool {
37        self.current_token.kind == TokenKind::Eof
38    }
39
40    fn parse_next_rule(&mut self) -> Option<Rule<'a>> {
41        match &self.current_token.kind {
42            TokenKind::Begin => {
43                self.next_token();
44                match self.parse_action() {
45                    Rule::Action(action) => Some(Rule::Begin(action)),
46                    _ => panic!("Expected action after BEGIN"),
47                }
48            }
49            TokenKind::NewLine => {
50                self.next_token_with_regex(true);
51                self.parse_next_rule()
52            }
53            TokenKind::Eof => None,
54            TokenKind::LeftCurlyBrace => Some(self.parse_action()),
55            TokenKind::End => {
56                self.next_token();
57                match self.parse_action() {
58                    Rule::Action(action) => Some(Rule::End(action)),
59                    _ => panic!("Expected action after END"),
60                }
61            }
62            TokenKind::Regex => {
63                let pattern = Some(Expression::Regex(self.current_token.literal));
64                self.next_token();
65                if self.current_token.kind == TokenKind::LeftCurlyBrace {
66                    match self.parse_action() {
67                        Rule::Action(action) => Some(Rule::PatternAction {
68                            pattern,
69                            action: Some(action),
70                        }),
71                        _ => panic!("Expected action after regex pattern"),
72                    }
73                } else {
74                    Some(Rule::PatternAction {
75                        pattern,
76                        action: None,
77                    })
78                }
79            }
80            _ => panic!(
81                "parse_next_rule not yet implemented, found token: {:?}",
82                self.current_token
83            ),
84        }
85    }
86
87    fn parse_action(&mut self) -> Rule<'a> {
88        self.next_token(); // consume '{'
89
90        let pattern = None;
91
92        let mut statements = Vec::new();
93        while self.current_token.kind == TokenKind::NewLine {
94            self.next_token();
95        }
96
97        if self.current_token.kind == TokenKind::Print {
98            let print_statement = self.parse_print_function();
99            statements.push(print_statement);
100        }
101
102        while self.current_token.kind != TokenKind::RightCurlyBrace
103            && self.current_token.kind != TokenKind::Eof
104        {
105            self.next_token();
106        }
107
108        if pattern.is_some() {
109            Rule::PatternAction {
110                pattern,
111                action: Some(Action { statements }),
112            }
113        } else {
114            Rule::Action(Action { statements })
115        }
116    }
117
118    fn parse_print_function(&mut self) -> Statement<'a> {
119        let mut expressions = Vec::new();
120        self.next_token();
121
122        while self.current_token.kind != TokenKind::RightCurlyBrace
123            && self.current_token.kind != TokenKind::Eof
124        {
125            let expression = self.parse_expression();
126            expressions.push(expression);
127        }
128
129        Statement::Print(expressions)
130    }
131
132    fn parse_expression(&mut self) -> Expression<'a> {
133        let mut left = self.parse_primary_expression();
134
135        while matches!(
136            self.current_token.kind,
137            TokenKind::Plus
138                | TokenKind::Minus
139                | TokenKind::Asterisk
140                | TokenKind::Division
141                | TokenKind::Percent
142                | TokenKind::Caret
143        ) {
144            let operator = self.current_token.clone();
145            self.next_token();
146            let right = self.parse_primary_expression();
147
148            left = Expression::Infix {
149                left: Box::new(left),
150                operator,
151                right: Box::new(right),
152            };
153        }
154
155        left
156    }
157
158    fn parse_primary_expression(&mut self) -> Expression<'a> {
159        match self.current_token.kind {
160            TokenKind::String => {
161                let expression = Expression::String(self.current_token.literal);
162                self.next_token();
163                expression
164            }
165            TokenKind::Number => {
166                let expression = if let Ok(value) = self.current_token.literal.parse::<f64>() {
167                    Expression::Number(value)
168                } else {
169                    todo!()
170                };
171                self.next_token();
172                expression
173            }
174            TokenKind::LeftParen => {
175                self.next_token();
176                let expression = self.parse_expression();
177                if self.current_token.kind == TokenKind::RightParen {
178                    self.next_token();
179                }
180                expression
181            }
182            _ => {
183                todo!()
184            }
185        }
186    }
187
188    pub fn parse_program(&mut self) -> Program<'_> {
189        let mut program = Program::new();
190
191        while !self.is_eof() {
192            match self.parse_next_rule() {
193                Some(Rule::Begin(action)) => program.add_begin_block(Rule::Begin(action)),
194                Some(Rule::End(action)) => program.add_end_block(Rule::End(action)),
195                Some(rule) => program.add_rule(rule),
196                None => {}
197            }
198            self.next_token_with_regex(true);
199        }
200
201        program
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[test]
210    fn create_parser() {
211        let mut parser = Parser::new(Lexer::new("42 == 42"));
212
213        assert_eq!(parser.current_token.literal, "42");
214        parser.next_token();
215        assert_eq!(parser.current_token.literal, "==");
216    }
217
218    #[test]
219    fn parse_empty_program() {
220        let mut parser = Parser::new(Lexer::new(""));
221
222        let program = parser.parse_program();
223
224        assert_eq!(program.len(), 0);
225    }
226
227    #[test]
228    fn parse_action_without_pattern() {
229        let mut parser = Parser::new(Lexer::new("{ print }"));
230
231        let program = parser.parse_program();
232
233        assert_eq!(program.len(), 1);
234        assert_eq!("{ print }", program.to_string());
235    }
236
237    #[test]
238    fn parse_action_with_leading_newlines() {
239        let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
240
241        let program = parser.parse_program();
242
243        assert_eq!(program.len(), 1);
244        assert_eq!("{ print }", program.to_string());
245    }
246
247    #[test]
248    fn parse_begin_block() {
249        let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
250
251        let program = parser.parse_program();
252
253        assert_eq!(program.len(), 1);
254        assert_eq!("BEGIN { print }", program.to_string());
255    }
256
257    #[test]
258    fn parse_end_block() {
259        let mut parser = Parser::new(Lexer::new("END { print 42 }"));
260
261        let program = parser.parse_program();
262
263        assert_eq!(program.len(), 1);
264        assert_eq!("END { print 42 }", program.to_string());
265    }
266
267    #[test]
268    fn parse_regex_pattern_action() {
269        let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
270
271        let program = parser.parse_program();
272
273        assert_eq!(program.len(), 1);
274        assert_eq!("/foo/ { print }", program.to_string());
275    }
276
277    #[test]
278    fn parse_print_infix_expression() {
279        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
280
281        let program = parser.parse_program();
282        let mut begin_blocks = program.begin_blocks_iter();
283        let rule = begin_blocks.next().expect("expected begin block");
284
285        let statements = match rule {
286            Rule::Begin(Action { statements }) => statements,
287            _ => panic!("expected begin rule"),
288        };
289
290        let exprs = match &statements[0] {
291            Statement::Print(expressions) => expressions,
292        };
293
294        match &exprs[0] {
295            Expression::Infix {
296                left,
297                operator,
298                right,
299            } => {
300                assert!(matches!(**left, Expression::Number(1.0)));
301                assert_eq!(operator.kind, TokenKind::Plus);
302                assert!(matches!(**right, Expression::Number(2.0)));
303            }
304            _ => panic!("expected infix expression"),
305        }
306    }
307
308    #[test]
309    fn parse_print_parenthesized_expression() {
310        let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
311
312        let program = parser.parse_program();
313        let mut begin_blocks = program.begin_blocks_iter();
314        let rule = begin_blocks.next().expect("expected begin block");
315
316        let statements = match rule {
317            Rule::Begin(Action { statements }) => statements,
318            _ => panic!("expected begin rule"),
319        };
320
321        let exprs = match &statements[0] {
322            Statement::Print(expressions) => expressions,
323        };
324
325        match &exprs[0] {
326            Expression::Infix {
327                left,
328                operator,
329                right,
330            } => {
331                assert_eq!(operator.kind, TokenKind::Asterisk);
332                assert!(matches!(**right, Expression::Number(3.0)));
333                assert!(matches!(**left, Expression::Infix { .. }));
334            }
335            _ => panic!("expected infix expression"),
336        }
337    }
338
339    #[test]
340    fn parse_print_concatenation() {
341        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
342
343        let program = parser.parse_program();
344        let mut begin_blocks = program.begin_blocks_iter();
345        let rule = begin_blocks.next().expect("expected begin block");
346
347        let statements = match rule {
348            Rule::Begin(Action { statements }) => statements,
349            _ => panic!("expected begin rule"),
350        };
351
352        let exprs = match &statements[0] {
353            Statement::Print(expressions) => expressions,
354        };
355
356        assert_eq!(exprs.len(), 2);
357        assert!(matches!(exprs[0], Expression::String("Value:")));
358        assert!(matches!(exprs[1], Expression::Number(42.0)));
359    }
360}