1use crate::{
2 Lexer, Program,
3 ast::{Action, Expression, Rule, Statement},
4 token::{Token, TokenKind},
5};
6
7#[derive(Debug)]
8pub struct Parser<'a> {
9 lexer: Lexer<'a>,
10 current_token: Token<'a>,
11}
12
13impl<'a> Parser<'a> {
14 pub fn new(mut lexer: Lexer<'a>) -> Self {
15 lexer.set_allow_regex(true);
17 let current_token = lexer.next_token();
18 lexer.set_allow_regex(false);
19
20 Parser {
21 lexer,
22 current_token,
23 }
24 }
25
26 fn next_token(&mut self) {
27 self.next_token_with_regex(false);
28 }
29
30 fn next_token_with_regex(&mut self, allow_regex: bool) {
31 self.lexer.set_allow_regex(allow_regex);
32 self.current_token = self.lexer.next_token();
33 self.lexer.set_allow_regex(false);
34 }
35
36 fn is_eof(&self) -> bool {
37 self.current_token.kind == TokenKind::Eof
38 }
39
40 fn parse_next_rule(&mut self) -> Option<Rule<'a>> {
41 match &self.current_token.kind {
42 TokenKind::Begin => {
43 self.next_token();
44 match self.parse_action() {
45 Rule::Action(action) => Some(Rule::Begin(action)),
46 _ => panic!("Expected action after BEGIN"),
47 }
48 }
49 TokenKind::NewLine => {
50 self.next_token_with_regex(true);
51 self.parse_next_rule()
52 }
53 TokenKind::Eof => None,
54 TokenKind::LeftCurlyBrace => Some(self.parse_action()),
55 TokenKind::Function => {
56 self.parse_function_definition();
57 None
58 }
59 TokenKind::End => {
60 self.next_token();
61 match self.parse_action() {
62 Rule::Action(action) => Some(Rule::End(action)),
63 _ => panic!("Expected action after END"),
64 }
65 }
66 TokenKind::Regex
67 | TokenKind::String
68 | TokenKind::Number
69 | TokenKind::DollarSign
70 | TokenKind::LeftParen
71 | TokenKind::Identifier
72 | TokenKind::Length
73 | TokenKind::Rand => self.parse_pattern_rule(),
74 _ => panic!(
75 "parse_next_rule not yet implemented, found token: {:?}",
76 self.current_token
77 ),
78 }
79 }
80
81 fn parse_pattern_rule(&mut self) -> Option<Rule<'a>> {
82 let mut pattern = self.parse_expression();
83 if self.current_token.kind == TokenKind::Comma {
84 let operator = self.current_token.clone();
85 self.next_token_with_regex(true);
86 let right = self.parse_expression();
87 pattern = Expression::Infix {
88 left: Box::new(pattern),
89 operator,
90 right: Box::new(right),
91 };
92 }
93 let pattern = Some(pattern);
94
95 if self.current_token.kind == TokenKind::LeftCurlyBrace {
96 match self.parse_action() {
97 Rule::Action(action) => Some(Rule::PatternAction {
98 pattern,
99 action: Some(action),
100 }),
101 _ => panic!("Expected action after pattern"),
102 }
103 } else {
104 Some(Rule::PatternAction {
105 pattern,
106 action: None,
107 })
108 }
109 }
110
111 fn parse_action(&mut self) -> Rule<'a> {
112 self.next_token(); let pattern = None;
115
116 let mut statements = Vec::new();
117 while self.current_token.kind != TokenKind::RightCurlyBrace
118 && self.current_token.kind != TokenKind::Eof
119 {
120 while self.current_token.kind == TokenKind::NewLine
121 || self.current_token.kind == TokenKind::Semicolon
122 {
123 self.next_token();
124 }
125
126 if self.current_token.kind == TokenKind::RightCurlyBrace
127 || self.current_token.kind == TokenKind::Eof
128 {
129 break;
130 }
131
132 statements.push(self.parse_statement());
133 }
134
135 if pattern.is_some() {
136 Rule::PatternAction {
137 pattern,
138 action: Some(Action { statements }),
139 }
140 } else {
141 Rule::Action(Action { statements })
142 }
143 }
144
145 fn parse_statement(&mut self) -> Statement<'a> {
146 match self.current_token.kind {
147 TokenKind::Print => self.parse_print_function(),
148 TokenKind::Printf => self.parse_printf_function(),
149 TokenKind::System => self.parse_system_function(),
150 TokenKind::Gsub => self.parse_gsub_function(),
151 TokenKind::If => self.parse_if_statement(),
152 TokenKind::While => self.parse_while_statement(),
153 TokenKind::For => self.parse_for_statement(),
154 TokenKind::Exit => self.parse_exit_statement(),
155 TokenKind::Identifier => self.parse_assignment_statement(),
156 TokenKind::DollarSign => self.parse_field_assignment_statement(),
157 TokenKind::Increment => self.parse_pre_increment_statement(),
158 TokenKind::Decrement => self.parse_pre_decrement_statement(),
159 _ => todo!(),
160 }
161 }
162
163 fn parse_function_definition(&mut self) {
164 let mut brace_depth = 0usize;
165 loop {
166 if self.current_token.kind == TokenKind::Eof {
167 break;
168 }
169 if self.current_token.kind == TokenKind::LeftCurlyBrace {
170 brace_depth += 1;
171 } else if self.current_token.kind == TokenKind::RightCurlyBrace {
172 if brace_depth == 0 {
173 break;
174 }
175 brace_depth -= 1;
176 if brace_depth == 0 {
177 break;
178 }
179 }
180 self.next_token_with_regex(true);
181 }
182 }
183
184 fn parse_assignment_statement(&mut self) -> Statement<'a> {
185 let identifier = self.current_token.literal;
186 self.next_token();
187 self.parse_assignment_statement_with_identifier(identifier)
188 }
189
190 fn parse_assignment_statement_with_identifier(&mut self, identifier: &'a str) -> Statement<'a> {
191 if self.current_token.kind == TokenKind::LeftSquareBracket {
192 self.next_token_with_regex(true);
193 let index = self.parse_expression();
194 if self.current_token.kind != TokenKind::RightSquareBracket {
195 todo!()
196 }
197 self.next_token();
198 if self.current_token.kind == TokenKind::Assign {
199 self.next_token();
200 let value = self.parse_expression();
201 return Statement::ArrayAssignment {
202 identifier,
203 index,
204 value,
205 };
206 }
207 if self.current_token.kind == TokenKind::AddAssign {
208 self.next_token();
209 let value = self.parse_expression();
210 return Statement::ArrayAddAssignment {
211 identifier,
212 index,
213 value,
214 };
215 }
216 todo!()
217 }
218 if self.current_token.kind == TokenKind::Assign {
219 self.next_token();
220 if self.current_token.kind == TokenKind::Split {
221 return self.parse_split_assignment_statement(identifier);
222 }
223 let value = self.parse_expression();
224 Statement::Assignment { identifier, value }
225 } else if self.current_token.kind == TokenKind::Increment {
226 self.next_token();
227 Statement::PostIncrement { identifier }
228 } else if self.current_token.kind == TokenKind::Decrement {
229 self.next_token();
230 Statement::PostDecrement { identifier }
231 } else if self.current_token.kind == TokenKind::AddAssign {
232 self.next_token();
233 let value = self.parse_expression();
234 Statement::AddAssignment { identifier, value }
235 } else {
236 todo!()
237 }
238 }
239
240 fn parse_pre_increment_statement(&mut self) -> Statement<'a> {
241 self.next_token();
242 if self.current_token.kind != TokenKind::Identifier {
243 todo!()
244 }
245 let identifier = self.current_token.literal;
246 self.next_token();
247 Statement::PreIncrement { identifier }
248 }
249
250 fn parse_pre_decrement_statement(&mut self) -> Statement<'a> {
251 self.next_token();
252 if self.current_token.kind != TokenKind::Identifier {
253 todo!()
254 }
255 let identifier = self.current_token.literal;
256 self.next_token();
257 Statement::PreDecrement { identifier }
258 }
259
260 fn parse_split_assignment_statement(&mut self, identifier: &'a str) -> Statement<'a> {
261 self.next_token();
262 if self.current_token.kind != TokenKind::LeftParen {
263 todo!()
264 }
265 self.next_token_with_regex(true);
266 let string = self.parse_expression();
267 if self.current_token.kind != TokenKind::Comma {
268 todo!()
269 }
270 self.next_token();
271 if self.current_token.kind != TokenKind::Identifier {
272 todo!()
273 }
274 let array = self.current_token.literal;
275 self.next_token();
276 if self.current_token.kind != TokenKind::RightParen {
277 todo!()
278 }
279 self.next_token();
280 Statement::SplitAssignment {
281 identifier,
282 string,
283 array,
284 }
285 }
286
287 fn parse_field_assignment_statement(&mut self) -> Statement<'a> {
288 self.next_token();
289 let field = self.parse_primary_expression();
290 let assign_token = self.current_token.clone();
291 self.next_token();
292 let right_value = self.parse_expression();
293
294 let value = if assign_token.kind == TokenKind::Assign {
295 right_value
296 } else {
297 let operator = compound_assign_operator(&assign_token);
298 Expression::Infix {
299 left: Box::new(Expression::Field(Box::new(field.clone()))),
300 operator,
301 right: Box::new(right_value),
302 }
303 };
304 Statement::FieldAssignment { field, value }
305 }
306
307 fn parse_if_statement(&mut self) -> Statement<'a> {
308 self.next_token();
309 if self.current_token.kind != TokenKind::LeftParen {
310 todo!()
311 }
312 self.next_token_with_regex(true);
313 let condition = self.parse_expression();
314 if self.current_token.kind != TokenKind::RightParen {
315 todo!()
316 }
317 self.next_token();
318 while self.current_token.kind == TokenKind::NewLine
319 || self.current_token.kind == TokenKind::Semicolon
320 {
321 self.next_token();
322 }
323 let then_statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
324 self.parse_statement_block()
325 } else {
326 vec![self.parse_statement()]
327 };
328
329 while self.current_token.kind == TokenKind::NewLine
330 || self.current_token.kind == TokenKind::Semicolon
331 {
332 self.next_token();
333 }
334
335 if self.current_token.kind == TokenKind::Else {
336 self.next_token();
337 while self.current_token.kind == TokenKind::NewLine
338 || self.current_token.kind == TokenKind::Semicolon
339 {
340 self.next_token();
341 }
342 let else_statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
343 self.parse_statement_block()
344 } else {
345 vec![self.parse_statement()]
346 };
347 return Statement::IfElse {
348 condition,
349 then_statements,
350 else_statements,
351 };
352 }
353
354 Statement::If {
355 condition,
356 then_statements,
357 }
358 }
359
360 fn parse_exit_statement(&mut self) -> Statement<'a> {
361 self.next_token();
362 Statement::Exit
363 }
364
365 fn parse_statement_block(&mut self) -> Vec<Statement<'a>> {
366 self.next_token(); let mut statements = Vec::new();
368 while self.current_token.kind != TokenKind::RightCurlyBrace
369 && self.current_token.kind != TokenKind::Eof
370 {
371 while self.current_token.kind == TokenKind::NewLine
372 || self.current_token.kind == TokenKind::Semicolon
373 {
374 self.next_token();
375 }
376
377 if self.current_token.kind == TokenKind::RightCurlyBrace
378 || self.current_token.kind == TokenKind::Eof
379 {
380 break;
381 }
382 statements.push(self.parse_statement());
383 }
384 if self.current_token.kind == TokenKind::RightCurlyBrace {
385 self.next_token();
386 }
387 statements
388 }
389
390 fn parse_while_statement(&mut self) -> Statement<'a> {
391 self.next_token();
392 if self.current_token.kind != TokenKind::LeftParen {
393 todo!()
394 }
395 self.next_token_with_regex(true);
396 let condition = self.parse_expression();
397 if self.current_token.kind != TokenKind::RightParen {
398 todo!()
399 }
400 self.next_token();
401 while self.current_token.kind == TokenKind::NewLine
402 || self.current_token.kind == TokenKind::Semicolon
403 {
404 self.next_token();
405 }
406 if self.current_token.kind != TokenKind::LeftCurlyBrace {
407 todo!()
408 }
409
410 let statements = self.parse_statement_block();
411 Statement::While {
412 condition,
413 statements,
414 }
415 }
416
417 fn parse_for_statement(&mut self) -> Statement<'a> {
418 self.next_token();
419 if self.current_token.kind != TokenKind::LeftParen {
420 todo!()
421 }
422 self.next_token();
423
424 let init = if self.current_token.kind == TokenKind::Identifier {
425 let variable = self.current_token.literal;
426 self.next_token();
427 if self.current_token.kind == TokenKind::In {
428 self.next_token();
429 if self.current_token.kind != TokenKind::Identifier {
430 todo!()
431 }
432 let array = self.current_token.literal;
433 self.next_token();
434 if self.current_token.kind != TokenKind::RightParen {
435 todo!()
436 }
437 self.next_token();
438 while self.current_token.kind == TokenKind::NewLine
439 || self.current_token.kind == TokenKind::Semicolon
440 {
441 self.next_token();
442 }
443 let statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
444 self.parse_statement_block()
445 } else {
446 vec![self.parse_statement()]
447 };
448 return Statement::ForIn {
449 variable,
450 array,
451 statements,
452 };
453 }
454 self.parse_assignment_statement_with_identifier(variable)
455 } else {
456 self.parse_statement()
457 };
458 if self.current_token.kind != TokenKind::Semicolon {
459 todo!()
460 }
461 self.next_token_with_regex(true);
462
463 let condition = self.parse_expression();
464 if self.current_token.kind != TokenKind::Semicolon {
465 todo!()
466 }
467 self.next_token();
468
469 let update = self.parse_statement();
470 if self.current_token.kind != TokenKind::RightParen {
471 todo!()
472 }
473 self.next_token();
474
475 while self.current_token.kind == TokenKind::NewLine
476 || self.current_token.kind == TokenKind::Semicolon
477 {
478 self.next_token();
479 }
480
481 let statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
482 self.parse_statement_block()
483 } else {
484 vec![self.parse_statement()]
485 };
486
487 Statement::For {
488 init: Box::new(init),
489 condition,
490 update: Box::new(update),
491 statements,
492 }
493 }
494
495 fn parse_print_function(&mut self) -> Statement<'a> {
496 let mut expressions = Vec::new();
497 let mut expect_more = false;
498 self.next_token();
499
500 loop {
501 if self.current_token.kind == TokenKind::RightCurlyBrace
502 || self.current_token.kind == TokenKind::Eof
503 || self.current_token.kind == TokenKind::GreaterThan
504 || self.current_token.kind == TokenKind::Append
505 || self.current_token.kind == TokenKind::Pipe
506 {
507 break;
508 }
509
510 if self.current_token.kind == TokenKind::NewLine
511 || self.current_token.kind == TokenKind::Semicolon
512 {
513 if expect_more {
514 self.next_token();
515 continue;
516 }
517 break;
518 }
519
520 if self.current_token.kind == TokenKind::Comma {
521 self.next_token();
522 expect_more = true;
523 continue;
524 }
525
526 let expression = self.parse_expression();
527 expressions.push(expression);
528 expect_more = false;
529 }
530
531 if self.current_token.kind == TokenKind::GreaterThan
532 || self.current_token.kind == TokenKind::Append
533 {
534 let append = self.current_token.kind == TokenKind::Append;
535 self.next_token();
536 let target = self.parse_expression();
537 return Statement::PrintRedirect {
538 expressions,
539 target,
540 append,
541 };
542 }
543 if self.current_token.kind == TokenKind::Pipe {
544 self.next_token();
545 let target = self.parse_expression();
546 return Statement::PrintPipe { expressions, target };
547 }
548
549 Statement::Print(expressions)
550 }
551
552 fn parse_printf_function(&mut self) -> Statement<'a> {
553 self.next_token();
554 let expressions = if self.current_token.kind == TokenKind::LeftParen {
555 self.next_token_with_regex(true);
556 let mut expressions = Vec::new();
557 while self.current_token.kind != TokenKind::RightParen
558 && self.current_token.kind != TokenKind::Eof
559 {
560 if self.current_token.kind == TokenKind::Comma {
561 self.next_token();
562 continue;
563 }
564 expressions.push(self.parse_expression());
565 }
566 if self.current_token.kind == TokenKind::RightParen {
567 self.next_token();
568 }
569 expressions
570 } else {
571 self.parse_expression_list_until_action_end_from_current()
572 };
573
574 Statement::Printf(expressions)
575 }
576
577 fn parse_gsub_function(&mut self) -> Statement<'a> {
578 self.next_token();
579 if self.current_token.kind != TokenKind::LeftParen {
580 todo!()
581 }
582
583 self.next_token_with_regex(true);
584 let pattern = self.parse_expression();
585
586 if self.current_token.kind != TokenKind::Comma {
587 todo!()
588 }
589 self.next_token();
590 let replacement = self.parse_expression();
591
592 if self.current_token.kind == TokenKind::Comma {
593 todo!()
594 }
595
596 if self.current_token.kind != TokenKind::RightParen {
597 todo!()
598 }
599 self.next_token();
600
601 Statement::Gsub {
602 pattern,
603 replacement,
604 }
605 }
606
607 fn parse_system_function(&mut self) -> Statement<'a> {
608 self.next_token();
609 if self.current_token.kind != TokenKind::LeftParen {
610 todo!()
611 }
612 self.next_token();
613 let command = self.parse_expression();
614 if self.current_token.kind != TokenKind::RightParen {
615 todo!()
616 }
617 self.next_token();
618 Statement::System(command)
619 }
620
621 fn parse_expression_list_until_action_end_from_current(&mut self) -> Vec<Expression<'a>> {
622 let mut expressions = Vec::new();
623 let mut expect_more = false;
624
625 loop {
626 if self.current_token.kind == TokenKind::RightCurlyBrace
627 || self.current_token.kind == TokenKind::Eof
628 {
629 break;
630 }
631
632 if self.current_token.kind == TokenKind::NewLine
633 || self.current_token.kind == TokenKind::Semicolon
634 {
635 if expect_more {
636 self.next_token();
637 continue;
638 }
639 break;
640 }
641
642 if self.current_token.kind == TokenKind::Comma {
643 self.next_token();
644 expect_more = true;
645 continue;
646 }
647
648 let expression = self.parse_expression();
649 expressions.push(expression);
650 expect_more = false;
651 }
652
653 expressions
654 }
655
656 fn parse_expression(&mut self) -> Expression<'a> {
657 self.parse_expression_with_min_precedence(0)
658 }
659
660 fn parse_expression_with_min_precedence(&mut self, min_precedence: u8) -> Expression<'a> {
661 const CONCAT_LEFT_PRECEDENCE: u8 = 6;
662 const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
663 let mut left = self.parse_primary_expression();
664
665 loop {
666 if self.current_token.kind == TokenKind::QuestionMark {
667 if min_precedence > 0 {
668 break;
669 }
670 self.next_token_with_regex(true);
671 let then_expr = self.parse_expression_with_min_precedence(0);
672 if self.current_token.kind != TokenKind::Colon {
673 todo!()
674 }
675 self.next_token_with_regex(true);
676 let else_expr = self.parse_expression_with_min_precedence(0);
677 left = Expression::Ternary {
678 condition: Box::new(left),
679 then_expr: Box::new(then_expr),
680 else_expr: Box::new(else_expr),
681 };
682 continue;
683 }
684
685 if infix_operator_precedence(&self.current_token.kind).is_none()
686 && is_expression_start(&self.current_token.kind)
687 {
688 if CONCAT_LEFT_PRECEDENCE < min_precedence {
689 break;
690 }
691
692 let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE);
693 left = Expression::Concatenation {
694 left: Box::new(left),
695 right: Box::new(right),
696 };
697 continue;
698 }
699
700 let (left_precedence, right_precedence) =
701 match infix_operator_precedence(&self.current_token.kind) {
702 Some(value) => value,
703 None => break,
704 };
705
706 if left_precedence < min_precedence {
707 break;
708 }
709
710 let operator = self.current_token.clone();
711 if matches!(
712 operator.kind,
713 TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
714 ) {
715 self.next_token_with_regex(true);
716 } else {
717 self.next_token();
718 }
719 let right = self.parse_expression_with_min_precedence(right_precedence);
720
721 left = Expression::Infix {
722 left: Box::new(left),
723 operator,
724 right: Box::new(right),
725 };
726 }
727
728 left
729 }
730
731 fn parse_primary_expression(&mut self) -> Expression<'a> {
732 match self.current_token.kind {
733 TokenKind::String => {
734 let expression = Expression::String(self.current_token.literal);
735 self.next_token();
736 expression
737 }
738 TokenKind::Regex => {
739 let expression = Expression::Regex(self.current_token.literal);
740 self.next_token();
741 expression
742 }
743 TokenKind::Number => {
744 let expression = if let Ok(value) = self.current_token.literal.parse::<f64>() {
745 Expression::Number(value)
746 } else {
747 todo!()
748 };
749 self.next_token();
750 expression
751 }
752 TokenKind::DollarSign => {
753 self.next_token();
754 let expression = self.parse_primary_expression();
755 Expression::Field(Box::new(expression))
756 }
757 TokenKind::LeftParen => {
758 self.next_token();
759 let expression = self.parse_expression();
760 if self.current_token.kind == TokenKind::RightParen {
761 self.next_token();
762 }
763 expression
764 }
765 TokenKind::Identifier => {
766 let identifier = self.current_token.literal;
767 self.next_token();
768 if self.current_token.kind == TokenKind::LeftParen {
769 let args = self.parse_call_arguments();
770 return Expression::FunctionCall {
771 name: identifier,
772 args,
773 };
774 }
775 if self.current_token.kind == TokenKind::LeftSquareBracket {
776 self.next_token_with_regex(true);
777 let index = self.parse_expression();
778 if self.current_token.kind != TokenKind::RightSquareBracket {
779 todo!()
780 }
781 self.next_token();
782 Expression::ArrayAccess {
783 identifier,
784 index: Box::new(index),
785 }
786 } else {
787 Expression::Identifier(identifier)
788 }
789 }
790 TokenKind::Length => {
791 self.next_token();
792 if self.current_token.kind == TokenKind::LeftParen {
793 self.next_token();
794 if self.current_token.kind == TokenKind::RightParen {
795 self.next_token();
796 Expression::Length(None)
797 } else {
798 let expression = self.parse_expression();
799 if self.current_token.kind != TokenKind::RightParen {
800 todo!()
801 }
802 self.next_token();
803 Expression::Length(Some(Box::new(expression)))
804 }
805 } else {
806 Expression::Length(None)
807 }
808 }
809 TokenKind::Substr => {
810 self.next_token();
811 if self.current_token.kind != TokenKind::LeftParen {
812 todo!()
813 }
814 self.next_token();
815 let string = self.parse_expression();
816 if self.current_token.kind != TokenKind::Comma {
817 todo!()
818 }
819 self.next_token();
820 let start = self.parse_expression();
821 let mut length = None;
822 if self.current_token.kind == TokenKind::Comma {
823 self.next_token();
824 length = Some(Box::new(self.parse_expression()));
825 }
826 if self.current_token.kind != TokenKind::RightParen {
827 todo!()
828 }
829 self.next_token();
830 Expression::Substr {
831 string: Box::new(string),
832 start: Box::new(start),
833 length,
834 }
835 }
836 TokenKind::Rand => {
837 self.next_token();
838 if self.current_token.kind == TokenKind::LeftParen {
839 self.next_token();
840 if self.current_token.kind != TokenKind::RightParen {
841 todo!()
842 }
843 self.next_token();
844 }
845 Expression::Rand
846 }
847 TokenKind::Sprintf | TokenKind::Split => {
848 let name = self.current_token.literal;
849 self.next_token();
850 if self.current_token.kind == TokenKind::LeftParen {
851 let args = self.parse_call_arguments();
852 return Expression::FunctionCall { name, args };
853 }
854 Expression::Number(0.0)
855 }
856 _ => {
857 panic!(
858 "parse_primary_expression not yet implemented, found token: {:?}",
859 self.current_token
860 )
861 }
862 }
863 }
864
865 pub fn parse_program(&mut self) -> Program<'_> {
866 let mut program = Program::new();
867
868 while !self.is_eof() {
869 match self.parse_next_rule() {
870 Some(Rule::Begin(action)) => program.add_begin_block(Rule::Begin(action)),
871 Some(Rule::End(action)) => program.add_end_block(Rule::End(action)),
872 Some(rule) => program.add_rule(rule),
873 None => {}
874 }
875 self.next_token_with_regex(true);
876 }
877
878 program
879 }
880
881 fn parse_call_arguments(&mut self) -> Vec<Expression<'a>> {
882 if self.current_token.kind != TokenKind::LeftParen {
883 return vec![];
884 }
885 self.next_token_with_regex(true);
886 let mut args = Vec::new();
887 while self.current_token.kind != TokenKind::RightParen
888 && self.current_token.kind != TokenKind::Eof
889 {
890 if self.current_token.kind == TokenKind::Comma {
891 self.next_token();
892 continue;
893 }
894 args.push(self.parse_expression());
895 }
896 if self.current_token.kind == TokenKind::RightParen {
897 self.next_token();
898 }
899 args
900 }
901}
902
903fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
904 match kind {
905 TokenKind::Assign => Some((0, 0)),
906 TokenKind::Or => Some((1, 2)),
907 TokenKind::And => Some((3, 4)),
908 TokenKind::Equal
909 | TokenKind::NotEqual
910 | TokenKind::GreaterThan
911 | TokenKind::GreaterThanOrEqual
912 | TokenKind::LessThan
913 | TokenKind::LessThanOrEqual
914 | TokenKind::Tilde
915 | TokenKind::NoMatch => Some((5, 6)),
916 TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
917 TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
918 TokenKind::Caret => Some((13, 12)),
919 _ => None,
920 }
921}
922
923fn is_expression_start(kind: &TokenKind) -> bool {
924 matches!(
925 kind,
926 TokenKind::String
927 | TokenKind::Regex
928 | TokenKind::Number
929 | TokenKind::DollarSign
930 | TokenKind::LeftParen
931 | TokenKind::Identifier
932 | TokenKind::Length
933 | TokenKind::Rand
934 | TokenKind::Sprintf
935 | TokenKind::Split
936 | TokenKind::Substr
937 )
938}
939
940fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
941 let (kind, literal) = match token.kind {
942 TokenKind::AddAssign => (TokenKind::Plus, "+"),
943 TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
944 TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
945 TokenKind::DivideAssign => (TokenKind::Division, "/"),
946 TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
947 TokenKind::PowerAssign => (TokenKind::Caret, "^"),
948 _ => todo!(),
949 };
950
951 Token::new(kind, literal, token.span.start)
952}
953
954#[cfg(test)]
955mod tests {
956 use super::*;
957
958 #[test]
959 fn create_parser() {
960 let mut parser = Parser::new(Lexer::new("42 == 42"));
961
962 assert_eq!(parser.current_token.literal, "42");
963 parser.next_token();
964 assert_eq!(parser.current_token.literal, "==");
965 }
966
967 #[test]
968 fn parse_empty_program() {
969 let mut parser = Parser::new(Lexer::new(""));
970
971 let program = parser.parse_program();
972
973 assert_eq!(program.len(), 0);
974 }
975
976 #[test]
977 fn parse_action_without_pattern() {
978 let mut parser = Parser::new(Lexer::new("{ print }"));
979
980 let program = parser.parse_program();
981
982 assert_eq!(program.len(), 1);
983 assert_eq!("{ print }", program.to_string());
984 }
985
986 #[test]
987 fn parse_action_with_leading_newlines() {
988 let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
989
990 let program = parser.parse_program();
991
992 assert_eq!(program.len(), 1);
993 assert_eq!("{ print }", program.to_string());
994 }
995
996 #[test]
997 fn parse_begin_block() {
998 let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
999
1000 let program = parser.parse_program();
1001
1002 assert_eq!(program.len(), 1);
1003 assert_eq!("BEGIN { print }", program.to_string());
1004 }
1005
1006 #[test]
1007 fn parse_end_block() {
1008 let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1009
1010 let program = parser.parse_program();
1011
1012 assert_eq!(program.len(), 1);
1013 assert_eq!("END { print 42 }", program.to_string());
1014 }
1015
1016 #[test]
1017 fn parse_regex_pattern_action() {
1018 let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1019
1020 let program = parser.parse_program();
1021
1022 assert_eq!(program.len(), 1);
1023 assert_eq!("/foo/ { print }", program.to_string());
1024 }
1025
1026 #[test]
1027 fn parse_print_infix_expression() {
1028 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1029
1030 let program = parser.parse_program();
1031 let mut begin_blocks = program.begin_blocks_iter();
1032 let rule = begin_blocks.next().expect("expected begin block");
1033
1034 let statements = match rule {
1035 Rule::Begin(Action { statements }) => statements,
1036 _ => panic!("expected begin rule"),
1037 };
1038
1039 let exprs = match &statements[0] {
1040 Statement::Print(expressions) => expressions,
1041 _ => panic!("expected print statement"),
1042 };
1043
1044 match &exprs[0] {
1045 Expression::Infix {
1046 left,
1047 operator,
1048 right,
1049 } => {
1050 assert!(matches!(**left, Expression::Number(1.0)));
1051 assert_eq!(operator.kind, TokenKind::Plus);
1052 assert!(matches!(**right, Expression::Number(2.0)));
1053 }
1054 _ => panic!("expected infix expression"),
1055 }
1056 }
1057
1058 #[test]
1059 fn parse_print_parenthesized_expression() {
1060 let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1061
1062 let program = parser.parse_program();
1063 let mut begin_blocks = program.begin_blocks_iter();
1064 let rule = begin_blocks.next().expect("expected begin block");
1065
1066 let statements = match rule {
1067 Rule::Begin(Action { statements }) => statements,
1068 _ => panic!("expected begin rule"),
1069 };
1070
1071 let exprs = match &statements[0] {
1072 Statement::Print(expressions) => expressions,
1073 _ => panic!("expected print statement"),
1074 };
1075
1076 match &exprs[0] {
1077 Expression::Infix {
1078 left,
1079 operator,
1080 right,
1081 } => {
1082 assert_eq!(operator.kind, TokenKind::Asterisk);
1083 assert!(matches!(**right, Expression::Number(3.0)));
1084 assert!(matches!(**left, Expression::Infix { .. }));
1085 }
1086 _ => panic!("expected infix expression"),
1087 }
1088 }
1089
1090 #[test]
1091 fn parse_print_multiplication_has_higher_precedence_than_addition() {
1092 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1093
1094 let program = parser.parse_program();
1095 let mut begin_blocks = program.begin_blocks_iter();
1096 let rule = begin_blocks.next().expect("expected begin block");
1097
1098 let statements = match rule {
1099 Rule::Begin(Action { statements }) => statements,
1100 _ => panic!("expected begin rule"),
1101 };
1102
1103 let exprs = match &statements[0] {
1104 Statement::Print(expressions) => expressions,
1105 _ => panic!("expected print statement"),
1106 };
1107
1108 match &exprs[0] {
1109 Expression::Infix {
1110 left,
1111 operator,
1112 right,
1113 } => {
1114 assert_eq!(operator.kind, TokenKind::Plus);
1115 assert!(matches!(**left, Expression::Number(1.0)));
1116 match &**right {
1117 Expression::Infix {
1118 operator: right_op, ..
1119 } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1120 _ => panic!("expected nested infix expression"),
1121 }
1122 }
1123 _ => panic!("expected infix expression"),
1124 }
1125 }
1126
1127 #[test]
1128 fn parse_print_power_is_right_associative() {
1129 let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1130
1131 let program = parser.parse_program();
1132 let mut begin_blocks = program.begin_blocks_iter();
1133 let rule = begin_blocks.next().expect("expected begin block");
1134
1135 let statements = match rule {
1136 Rule::Begin(Action { statements }) => statements,
1137 _ => panic!("expected begin rule"),
1138 };
1139
1140 let exprs = match &statements[0] {
1141 Statement::Print(expressions) => expressions,
1142 _ => panic!("expected print statement"),
1143 };
1144
1145 match &exprs[0] {
1146 Expression::Infix {
1147 left,
1148 operator,
1149 right,
1150 } => {
1151 assert_eq!(operator.kind, TokenKind::Caret);
1152 assert!(matches!(**left, Expression::Number(2.0)));
1153 match &**right {
1154 Expression::Infix {
1155 operator: right_op, ..
1156 } => assert_eq!(right_op.kind, TokenKind::Caret),
1157 _ => panic!("expected nested infix expression"),
1158 }
1159 }
1160 _ => panic!("expected infix expression"),
1161 }
1162 }
1163
1164 #[test]
1165 fn parse_print_minus_is_left_associative() {
1166 let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1167
1168 let program = parser.parse_program();
1169 let mut begin_blocks = program.begin_blocks_iter();
1170 let rule = begin_blocks.next().expect("expected begin block");
1171
1172 let statements = match rule {
1173 Rule::Begin(Action { statements }) => statements,
1174 _ => panic!("expected begin rule"),
1175 };
1176
1177 let exprs = match &statements[0] {
1178 Statement::Print(expressions) => expressions,
1179 _ => panic!("expected print statement"),
1180 };
1181
1182 match &exprs[0] {
1183 Expression::Infix {
1184 left,
1185 operator,
1186 right,
1187 } => {
1188 assert_eq!(operator.kind, TokenKind::Minus);
1189 match &**left {
1190 Expression::Infix {
1191 operator: left_op, ..
1192 } => assert_eq!(left_op.kind, TokenKind::Minus),
1193 _ => panic!("expected nested infix expression"),
1194 }
1195 assert!(matches!(**right, Expression::Number(1.0)));
1196 }
1197 _ => panic!("expected infix expression"),
1198 }
1199 }
1200
1201 #[test]
1202 fn parse_print_concatenation() {
1203 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
1204
1205 let program = parser.parse_program();
1206 let mut begin_blocks = program.begin_blocks_iter();
1207 let rule = begin_blocks.next().expect("expected begin block");
1208
1209 let statements = match rule {
1210 Rule::Begin(Action { statements }) => statements,
1211 _ => panic!("expected begin rule"),
1212 };
1213
1214 let exprs = match &statements[0] {
1215 Statement::Print(expressions) => expressions,
1216 _ => panic!("expected print statement"),
1217 };
1218
1219 assert_eq!(exprs.len(), 1);
1220 match &exprs[0] {
1221 Expression::Concatenation { left, right } => {
1222 assert!(matches!(**left, Expression::String("Value:")));
1223 assert!(matches!(**right, Expression::Number(42.0)));
1224 }
1225 _ => panic!("expected concatenation expression"),
1226 }
1227 }
1228
1229 #[test]
1230 fn parse_print_field_expression() {
1231 let mut parser = Parser::new(Lexer::new("{ print $1 }"));
1232
1233 let program = parser.parse_program();
1234 let mut rules = program.rules_iter();
1235 let rule = rules.next().expect("expected rule");
1236
1237 let statements = match rule {
1238 Rule::Action(Action { statements }) => statements,
1239 _ => panic!("expected action rule"),
1240 };
1241
1242 let exprs = match &statements[0] {
1243 Statement::Print(expressions) => expressions,
1244 _ => panic!("expected print statement"),
1245 };
1246
1247 match &exprs[0] {
1248 Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
1249 _ => panic!("expected field expression"),
1250 }
1251 }
1252
1253 #[test]
1254 fn parse_print_with_commas() {
1255 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
1256
1257 let program = parser.parse_program();
1258
1259 assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
1260 }
1261
1262 #[test]
1263 fn parse_number_of_fields_identifier() {
1264 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
1265
1266 let program = parser.parse_program();
1267
1268 assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
1269 }
1270
1271 #[test]
1272 fn parse_printf_with_format_and_arguments() {
1273 let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
1274
1275 let program = parser.parse_program();
1276
1277 assert_eq!(
1278 r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
1279 program.to_string()
1280 );
1281 }
1282
1283 #[test]
1284 fn parse_add_assignment_and_pre_increment() {
1285 let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
1286
1287 let program = parser.parse_program();
1288
1289 assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
1290 }
1291
1292 #[test]
1293 fn parse_regex_match_pattern_action() {
1294 let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
1295
1296 let program = parser.parse_program();
1297
1298 assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
1299 }
1300
1301 #[test]
1302 fn parse_print_with_line_continuation_after_comma() {
1303 let mut parser = Parser::new(Lexer::new(
1304 "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
1305 ));
1306
1307 let program = parser.parse_program();
1308
1309 assert_eq!(
1310 "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
1311 program.to_string()
1312 );
1313 }
1314
1315 #[test]
1316 fn parse_gsub_statement() {
1317 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
1318
1319 let program = parser.parse_program();
1320
1321 assert_eq!(
1322 r#"{ gsub(/USA/, "United States"); print }"#,
1323 program.to_string()
1324 );
1325 }
1326
1327 #[test]
1328 fn parse_system_statement() {
1329 let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
1330
1331 let program = parser.parse_program();
1332
1333 assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
1334 }
1335
1336 #[test]
1337 fn parse_print_length_builtin_expression() {
1338 let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
1339
1340 let program = parser.parse_program();
1341
1342 assert_eq!(r#"{ print length, $0 }"#, program.to_string());
1343 }
1344
1345 #[test]
1346 fn parse_length_expression_as_rule_pattern() {
1347 let mut parser = Parser::new(Lexer::new(
1348 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1349 ));
1350
1351 let program = parser.parse_program();
1352
1353 assert_eq!(
1354 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1355 program.to_string()
1356 );
1357 }
1358
1359 #[test]
1360 fn parse_field_assignment_with_substr() {
1361 let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
1362
1363 let program = parser.parse_program();
1364
1365 assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
1366 }
1367
1368 #[test]
1369 fn parse_assignment_with_concatenation_and_substr() {
1370 let mut parser = Parser::new(Lexer::new(
1371 r#"{ s = s " " substr($1, 1, 3) }"#,
1372 ));
1373
1374 let program = parser.parse_program();
1375
1376 assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
1377 }
1378
1379 #[test]
1380 fn parse_field_divide_assignment() {
1381 let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
1382
1383 let program = parser.parse_program();
1384
1385 assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
1386 }
1387
1388 #[test]
1389 fn parse_chained_assignment() {
1390 let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
1391
1392 let program = parser.parse_program();
1393
1394 assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
1395 }
1396
1397 #[test]
1398 fn parse_if_statement_with_block() {
1399 let mut parser = Parser::new(Lexer::new(
1400 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1401 ));
1402
1403 let program = parser.parse_program();
1404
1405 assert_eq!(
1406 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1407 program.to_string()
1408 );
1409 }
1410
1411 #[test]
1412 fn parse_while_with_post_increment() {
1413 let mut parser = Parser::new(Lexer::new(
1414 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1415 ));
1416
1417 let program = parser.parse_program();
1418
1419 assert_eq!(
1420 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1421 program.to_string()
1422 );
1423 }
1424
1425 #[test]
1426 fn parse_post_decrement_statement() {
1427 let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
1428
1429 let program = parser.parse_program();
1430
1431 assert_eq!(r#"{ k--; n-- }"#, program.to_string());
1432 }
1433
1434 #[test]
1435 fn parse_rand_expression() {
1436 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
1437
1438 let program = parser.parse_program();
1439
1440 assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
1441 }
1442
1443 #[test]
1444 fn parse_for_loop_with_single_body_statement() {
1445 let mut parser = Parser::new(Lexer::new(
1446 r#"{ for (i = 1; i <= NF; i++) print $i }"#,
1447 ));
1448
1449 let program = parser.parse_program();
1450
1451 assert_eq!(
1452 r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
1453 program.to_string()
1454 );
1455 }
1456
1457 #[test]
1458 fn parse_if_with_single_statement_body() {
1459 let mut parser = Parser::new(Lexer::new(
1460 r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
1461 ));
1462
1463 let program = parser.parse_program();
1464
1465 assert_eq!(
1466 r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
1467 program.to_string()
1468 );
1469 }
1470
1471 #[test]
1472 fn parse_exit_statement() {
1473 let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
1474
1475 let program = parser.parse_program();
1476
1477 assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
1478 }
1479
1480 #[test]
1481 fn parse_array_add_assignment_and_access() {
1482 let mut parser = Parser::new(Lexer::new(
1483 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
1484 ));
1485
1486 let program = parser.parse_program();
1487
1488 assert_eq!(
1489 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
1490 program.to_string()
1491 );
1492 }
1493
1494 #[test]
1495 fn parse_for_in_loop() {
1496 let mut parser = Parser::new(Lexer::new(
1497 r#"END { for (name in area) print name ":" area[name] }"#,
1498 ));
1499
1500 let program = parser.parse_program();
1501
1502 assert_eq!(
1503 r#"END { for (name in area) { print name ":" area[name] } }"#,
1504 program.to_string()
1505 );
1506 }
1507
1508 #[test]
1509 fn parse_print_redirection() {
1510 let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
1511
1512 let program = parser.parse_program();
1513
1514 assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
1515 }
1516
1517 #[test]
1518 fn parse_print_pipe() {
1519 let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
1520
1521 let program = parser.parse_program();
1522
1523 assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
1524 }
1525}