1use crate::{
2 Lexer, Program,
3 ast::{Action, Expression, Rule, Statement},
4 token::{Token, TokenKind},
5};
6
7#[derive(Debug)]
8pub struct Parser<'a> {
9 lexer: Lexer<'a>,
10 current_token: Token<'a>,
11}
12
13impl<'a> Parser<'a> {
14 pub fn new(mut lexer: Lexer<'a>) -> Self {
15 lexer.set_allow_regex(true);
17 let current_token = lexer.next_token();
18 lexer.set_allow_regex(false);
19
20 Parser {
21 lexer,
22 current_token,
23 }
24 }
25
26 fn next_token(&mut self) {
27 self.next_token_with_regex(false);
28 }
29
30 fn next_token_with_regex(&mut self, allow_regex: bool) {
31 self.lexer.set_allow_regex(allow_regex);
32 self.current_token = self.lexer.next_token();
33 self.lexer.set_allow_regex(false);
34 }
35
36 fn is_eof(&self) -> bool {
37 self.current_token.kind == TokenKind::Eof
38 }
39
40 fn parse_next_rule(&mut self) -> Option<Rule<'a>> {
41 match &self.current_token.kind {
42 TokenKind::Begin => {
43 self.next_token();
44 match self.parse_action() {
45 Rule::Action(action) => Some(Rule::Begin(action)),
46 _ => panic!("Expected action after BEGIN"),
47 }
48 }
49 TokenKind::NewLine => {
50 self.next_token_with_regex(true);
51 self.parse_next_rule()
52 }
53 TokenKind::Eof => None,
54 TokenKind::LeftCurlyBrace => Some(self.parse_action()),
55 TokenKind::End => {
56 self.next_token();
57 match self.parse_action() {
58 Rule::Action(action) => Some(Rule::End(action)),
59 _ => panic!("Expected action after END"),
60 }
61 }
62 TokenKind::Regex => {
63 let pattern = Some(Expression::Regex(self.current_token.literal));
64 self.next_token();
65 if self.current_token.kind == TokenKind::LeftCurlyBrace {
66 match self.parse_action() {
67 Rule::Action(action) => Some(Rule::PatternAction {
68 pattern,
69 action: Some(action),
70 }),
71 _ => panic!("Expected action after regex pattern"),
72 }
73 } else {
74 Some(Rule::PatternAction {
75 pattern,
76 action: None,
77 })
78 }
79 }
80 _ => panic!(
81 "parse_next_rule not yet implemented, found token: {:?}",
82 self.current_token
83 ),
84 }
85 }
86
87 fn parse_action(&mut self) -> Rule<'a> {
88 self.next_token(); let pattern = None;
91
92 let mut statements = Vec::new();
93 while self.current_token.kind == TokenKind::NewLine {
94 self.next_token();
95 }
96
97 if self.current_token.kind == TokenKind::Print {
98 let print_statement = self.parse_print_function();
99 statements.push(print_statement);
100 }
101
102 while self.current_token.kind != TokenKind::RightCurlyBrace
103 && self.current_token.kind != TokenKind::Eof
104 {
105 self.next_token();
106 }
107
108 if pattern.is_some() {
109 Rule::PatternAction {
110 pattern,
111 action: Some(Action { statements }),
112 }
113 } else {
114 Rule::Action(Action { statements })
115 }
116 }
117
118 fn parse_print_function(&mut self) -> Statement<'a> {
119 let mut expressions = Vec::new();
120 self.next_token();
121
122 while self.current_token.kind != TokenKind::RightCurlyBrace
123 && self.current_token.kind != TokenKind::Eof
124 {
125 let expression = self.parse_expression();
126 expressions.push(expression);
127 }
128
129 Statement::Print(expressions)
130 }
131
132 fn parse_expression(&mut self) -> Expression<'a> {
133 self.parse_expression_with_min_precedence(0)
134 }
135
136 fn parse_expression_with_min_precedence(&mut self, min_precedence: u8) -> Expression<'a> {
137 let mut left = self.parse_primary_expression();
138
139 loop {
140 let (left_precedence, right_precedence) =
141 match infix_operator_precedence(&self.current_token.kind) {
142 Some(value) => value,
143 None => break,
144 };
145
146 if left_precedence < min_precedence {
147 break;
148 }
149
150 let operator = self.current_token.clone();
151 self.next_token();
152 let right = self.parse_expression_with_min_precedence(right_precedence);
153
154 left = Expression::Infix {
155 left: Box::new(left),
156 operator,
157 right: Box::new(right),
158 };
159 }
160
161 left
162 }
163
164 fn parse_primary_expression(&mut self) -> Expression<'a> {
165 match self.current_token.kind {
166 TokenKind::String => {
167 let expression = Expression::String(self.current_token.literal);
168 self.next_token();
169 expression
170 }
171 TokenKind::Number => {
172 let expression = if let Ok(value) = self.current_token.literal.parse::<f64>() {
173 Expression::Number(value)
174 } else {
175 todo!()
176 };
177 self.next_token();
178 expression
179 }
180 TokenKind::DollarSign => {
181 self.next_token();
182 let expression = self.parse_primary_expression();
183 Expression::Field(Box::new(expression))
184 }
185 TokenKind::LeftParen => {
186 self.next_token();
187 let expression = self.parse_expression();
188 if self.current_token.kind == TokenKind::RightParen {
189 self.next_token();
190 }
191 expression
192 }
193 _ => {
194 todo!()
195 }
196 }
197 }
198
199 pub fn parse_program(&mut self) -> Program<'_> {
200 let mut program = Program::new();
201
202 while !self.is_eof() {
203 match self.parse_next_rule() {
204 Some(Rule::Begin(action)) => program.add_begin_block(Rule::Begin(action)),
205 Some(Rule::End(action)) => program.add_end_block(Rule::End(action)),
206 Some(rule) => program.add_rule(rule),
207 None => {}
208 }
209 self.next_token_with_regex(true);
210 }
211
212 program
213 }
214}
215
216fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
217 match kind {
218 TokenKind::Plus | TokenKind::Minus => Some((1, 2)),
219 TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((3, 4)),
220 TokenKind::Caret => Some((7, 6)),
221 _ => None,
222 }
223}
224
225#[cfg(test)]
226mod tests {
227 use super::*;
228
229 #[test]
230 fn create_parser() {
231 let mut parser = Parser::new(Lexer::new("42 == 42"));
232
233 assert_eq!(parser.current_token.literal, "42");
234 parser.next_token();
235 assert_eq!(parser.current_token.literal, "==");
236 }
237
238 #[test]
239 fn parse_empty_program() {
240 let mut parser = Parser::new(Lexer::new(""));
241
242 let program = parser.parse_program();
243
244 assert_eq!(program.len(), 0);
245 }
246
247 #[test]
248 fn parse_action_without_pattern() {
249 let mut parser = Parser::new(Lexer::new("{ print }"));
250
251 let program = parser.parse_program();
252
253 assert_eq!(program.len(), 1);
254 assert_eq!("{ print }", program.to_string());
255 }
256
257 #[test]
258 fn parse_action_with_leading_newlines() {
259 let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
260
261 let program = parser.parse_program();
262
263 assert_eq!(program.len(), 1);
264 assert_eq!("{ print }", program.to_string());
265 }
266
267 #[test]
268 fn parse_begin_block() {
269 let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
270
271 let program = parser.parse_program();
272
273 assert_eq!(program.len(), 1);
274 assert_eq!("BEGIN { print }", program.to_string());
275 }
276
277 #[test]
278 fn parse_end_block() {
279 let mut parser = Parser::new(Lexer::new("END { print 42 }"));
280
281 let program = parser.parse_program();
282
283 assert_eq!(program.len(), 1);
284 assert_eq!("END { print 42 }", program.to_string());
285 }
286
287 #[test]
288 fn parse_regex_pattern_action() {
289 let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
290
291 let program = parser.parse_program();
292
293 assert_eq!(program.len(), 1);
294 assert_eq!("/foo/ { print }", program.to_string());
295 }
296
297 #[test]
298 fn parse_print_infix_expression() {
299 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
300
301 let program = parser.parse_program();
302 let mut begin_blocks = program.begin_blocks_iter();
303 let rule = begin_blocks.next().expect("expected begin block");
304
305 let statements = match rule {
306 Rule::Begin(Action { statements }) => statements,
307 _ => panic!("expected begin rule"),
308 };
309
310 let exprs = match &statements[0] {
311 Statement::Print(expressions) => expressions,
312 };
313
314 match &exprs[0] {
315 Expression::Infix {
316 left,
317 operator,
318 right,
319 } => {
320 assert!(matches!(**left, Expression::Number(1.0)));
321 assert_eq!(operator.kind, TokenKind::Plus);
322 assert!(matches!(**right, Expression::Number(2.0)));
323 }
324 _ => panic!("expected infix expression"),
325 }
326 }
327
328 #[test]
329 fn parse_print_parenthesized_expression() {
330 let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
331
332 let program = parser.parse_program();
333 let mut begin_blocks = program.begin_blocks_iter();
334 let rule = begin_blocks.next().expect("expected begin block");
335
336 let statements = match rule {
337 Rule::Begin(Action { statements }) => statements,
338 _ => panic!("expected begin rule"),
339 };
340
341 let exprs = match &statements[0] {
342 Statement::Print(expressions) => expressions,
343 };
344
345 match &exprs[0] {
346 Expression::Infix {
347 left,
348 operator,
349 right,
350 } => {
351 assert_eq!(operator.kind, TokenKind::Asterisk);
352 assert!(matches!(**right, Expression::Number(3.0)));
353 assert!(matches!(**left, Expression::Infix { .. }));
354 }
355 _ => panic!("expected infix expression"),
356 }
357 }
358
359 #[test]
360 fn parse_print_multiplication_has_higher_precedence_than_addition() {
361 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
362
363 let program = parser.parse_program();
364 let mut begin_blocks = program.begin_blocks_iter();
365 let rule = begin_blocks.next().expect("expected begin block");
366
367 let statements = match rule {
368 Rule::Begin(Action { statements }) => statements,
369 _ => panic!("expected begin rule"),
370 };
371
372 let exprs = match &statements[0] {
373 Statement::Print(expressions) => expressions,
374 };
375
376 match &exprs[0] {
377 Expression::Infix {
378 left,
379 operator,
380 right,
381 } => {
382 assert_eq!(operator.kind, TokenKind::Plus);
383 assert!(matches!(**left, Expression::Number(1.0)));
384 match &**right {
385 Expression::Infix {
386 operator: right_op, ..
387 } => assert_eq!(right_op.kind, TokenKind::Asterisk),
388 _ => panic!("expected nested infix expression"),
389 }
390 }
391 _ => panic!("expected infix expression"),
392 }
393 }
394
395 #[test]
396 fn parse_print_power_is_right_associative() {
397 let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
398
399 let program = parser.parse_program();
400 let mut begin_blocks = program.begin_blocks_iter();
401 let rule = begin_blocks.next().expect("expected begin block");
402
403 let statements = match rule {
404 Rule::Begin(Action { statements }) => statements,
405 _ => panic!("expected begin rule"),
406 };
407
408 let exprs = match &statements[0] {
409 Statement::Print(expressions) => expressions,
410 };
411
412 match &exprs[0] {
413 Expression::Infix {
414 left,
415 operator,
416 right,
417 } => {
418 assert_eq!(operator.kind, TokenKind::Caret);
419 assert!(matches!(**left, Expression::Number(2.0)));
420 match &**right {
421 Expression::Infix {
422 operator: right_op, ..
423 } => assert_eq!(right_op.kind, TokenKind::Caret),
424 _ => panic!("expected nested infix expression"),
425 }
426 }
427 _ => panic!("expected infix expression"),
428 }
429 }
430
431 #[test]
432 fn parse_print_minus_is_left_associative() {
433 let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
434
435 let program = parser.parse_program();
436 let mut begin_blocks = program.begin_blocks_iter();
437 let rule = begin_blocks.next().expect("expected begin block");
438
439 let statements = match rule {
440 Rule::Begin(Action { statements }) => statements,
441 _ => panic!("expected begin rule"),
442 };
443
444 let exprs = match &statements[0] {
445 Statement::Print(expressions) => expressions,
446 };
447
448 match &exprs[0] {
449 Expression::Infix {
450 left,
451 operator,
452 right,
453 } => {
454 assert_eq!(operator.kind, TokenKind::Minus);
455 match &**left {
456 Expression::Infix {
457 operator: left_op, ..
458 } => assert_eq!(left_op.kind, TokenKind::Minus),
459 _ => panic!("expected nested infix expression"),
460 }
461 assert!(matches!(**right, Expression::Number(1.0)));
462 }
463 _ => panic!("expected infix expression"),
464 }
465 }
466
467 #[test]
468 fn parse_print_concatenation() {
469 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
470
471 let program = parser.parse_program();
472 let mut begin_blocks = program.begin_blocks_iter();
473 let rule = begin_blocks.next().expect("expected begin block");
474
475 let statements = match rule {
476 Rule::Begin(Action { statements }) => statements,
477 _ => panic!("expected begin rule"),
478 };
479
480 let exprs = match &statements[0] {
481 Statement::Print(expressions) => expressions,
482 };
483
484 assert_eq!(exprs.len(), 2);
485 assert!(matches!(exprs[0], Expression::String("Value:")));
486 assert!(matches!(exprs[1], Expression::Number(42.0)));
487 }
488
489 #[test]
490 fn parse_print_field_expression() {
491 let mut parser = Parser::new(Lexer::new("{ print $1 }"));
492
493 let program = parser.parse_program();
494 let mut rules = program.rules_iter();
495 let rule = rules.next().expect("expected rule");
496
497 let statements = match rule {
498 Rule::Action(Action { statements }) => statements,
499 _ => panic!("expected action rule"),
500 };
501
502 let exprs = match &statements[0] {
503 Statement::Print(expressions) => expressions,
504 };
505
506 match &exprs[0] {
507 Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
508 _ => panic!("expected field expression"),
509 }
510 }
511}