1use crate::{
2 Lexer, Program,
3 ast::{Action, Expression, FunctionDefinition, Rule, Statement},
4 token::{Token, TokenKind},
5};
6
7#[derive(Debug)]
8pub struct Parser<'a> {
9 lexer: Lexer<'a>,
10 current_token: Token<'a>,
11 function_definitions: Vec<FunctionDefinition<'a>>,
12}
13
14impl<'a> Parser<'a> {
15 pub fn new(mut lexer: Lexer<'a>) -> Self {
16 let current_token = lexer.next_token_regex_aware();
17 Parser {
18 lexer,
19 current_token,
20 function_definitions: Vec::new(),
21 }
22 }
23
24 fn next_token(&mut self) {
25 self.current_token = self.lexer.next_token();
26 }
27
28 fn next_token_in_regex_context(&mut self) {
29 self.current_token = self.lexer.next_token_regex_aware();
30 }
31
32 fn is_eof(&self) -> bool {
33 self.current_token.kind == TokenKind::Eof
34 }
35
36 fn is_statement_terminator(&self) -> bool {
37 matches!(
38 self.current_token.kind,
39 TokenKind::Semicolon | TokenKind::NewLine | TokenKind::RightCurlyBrace | TokenKind::Eof
40 )
41 }
42
43 fn token_is_immediately_after(&self, previous: &Token<'a>) -> bool {
44 self.current_token.span.start == previous.span.start + previous.literal.len()
45 }
46
47 fn parse_array_index_expression(&mut self) -> Expression<'a> {
48 let mut index = self.parse_expression();
49 while self.current_token.kind == TokenKind::Comma {
50 let operator = self.current_token.clone();
51 self.next_token_in_regex_context();
52 let right = self.parse_expression();
53 index = Expression::Infix {
54 left: Box::new(index),
55 operator,
56 right: Box::new(right),
57 };
58 }
59 index
60 }
61
62 fn parse_next_rule(&mut self) -> Option<Rule<'a>> {
63 match &self.current_token.kind {
64 TokenKind::Begin => {
65 self.next_token();
66 match self.parse_action() {
67 Rule::Action(action) => Some(Rule::Begin(action)),
68 _ => panic!("Expected action after BEGIN"),
69 }
70 }
71 TokenKind::NewLine => {
72 self.next_token_in_regex_context();
73 self.parse_next_rule()
74 }
75 TokenKind::Eof => None,
76 TokenKind::LeftCurlyBrace => Some(self.parse_action()),
77 TokenKind::Function => {
78 self.parse_function_definition();
79 None
80 }
81 TokenKind::End => {
82 self.next_token();
83 match self.parse_action() {
84 Rule::Action(action) => Some(Rule::End(action)),
85 _ => panic!("Expected action after END"),
86 }
87 }
88 TokenKind::Regex
89 | TokenKind::String
90 | TokenKind::Number
91 | TokenKind::DollarSign
92 | TokenKind::LeftParen
93 | TokenKind::Identifier
94 | TokenKind::Cos
95 | TokenKind::Exp
96 | TokenKind::Index
97 | TokenKind::Int
98 | TokenKind::Length
99 | TokenKind::Log
100 | TokenKind::Match
101 | TokenKind::Rand
102 | TokenKind::Sin
103 | TokenKind::Sprintf
104 | TokenKind::Split
105 | TokenKind::Sqrt
106 | TokenKind::Srand
107 | TokenKind::Substr
108 | TokenKind::ExclamationMark
109 | TokenKind::Increment
110 | TokenKind::Decrement => self.parse_pattern_rule(),
111 _ => panic!(
112 "parse_next_rule not yet implemented, found token: {:?}",
113 self.current_token
114 ),
115 }
116 }
117
118 fn parse_pattern_rule(&mut self) -> Option<Rule<'a>> {
119 let mut pattern = self.parse_expression();
120 if self.current_token.kind == TokenKind::Comma {
121 let operator = self.current_token.clone();
122 self.next_token_in_regex_context();
123 let right = self.parse_expression();
124 pattern = Expression::Infix {
125 left: Box::new(pattern),
126 operator,
127 right: Box::new(right),
128 };
129 }
130 let pattern = Some(pattern);
131
132 if self.current_token.kind == TokenKind::LeftCurlyBrace {
133 match self.parse_action() {
134 Rule::Action(action) => Some(Rule::PatternAction {
135 pattern,
136 action: Some(action),
137 }),
138 _ => panic!("Expected action after pattern"),
139 }
140 } else {
141 Some(Rule::PatternAction {
142 pattern,
143 action: None,
144 })
145 }
146 }
147
148 fn parse_action(&mut self) -> Rule<'a> {
149 self.next_token(); let pattern = None;
152
153 let mut statements = Vec::new();
154 while self.current_token.kind != TokenKind::RightCurlyBrace
155 && self.current_token.kind != TokenKind::Eof
156 {
157 while self.current_token.kind == TokenKind::NewLine
158 || self.current_token.kind == TokenKind::Semicolon
159 {
160 self.next_token();
161 }
162
163 if self.current_token.kind == TokenKind::RightCurlyBrace
164 || self.current_token.kind == TokenKind::Eof
165 {
166 break;
167 }
168
169 statements.push(self.parse_statement());
170 }
171
172 if pattern.is_some() {
173 Rule::PatternAction {
174 pattern,
175 action: Some(Action { statements }),
176 }
177 } else {
178 Rule::Action(Action { statements })
179 }
180 }
181
182 fn parse_statement(&mut self) -> Statement<'a> {
183 match self.current_token.kind {
184 TokenKind::Print => self.parse_print_function(),
185 TokenKind::Printf => self.parse_printf_function(),
186 TokenKind::System => self.parse_system_function(),
187 TokenKind::Split => self.parse_split_statement(),
188 TokenKind::Sub => self.parse_sub_function(),
189 TokenKind::Gsub => self.parse_gsub_function(),
190 TokenKind::Break => self.parse_break_statement(),
191 TokenKind::Continue => self.parse_continue_statement(),
192 TokenKind::Delete => self.parse_delete_statement(),
193 TokenKind::If => self.parse_if_statement(),
194 TokenKind::Do => self.parse_do_statement(),
195 TokenKind::While => self.parse_while_statement(),
196 TokenKind::For => self.parse_for_statement(),
197 TokenKind::Return => self.parse_return_statement(),
198 TokenKind::Next => self.parse_next_statement(),
199 TokenKind::Exit => self.parse_exit_statement(),
200 TokenKind::Identifier => self.parse_assignment_statement(),
201 TokenKind::DollarSign => self.parse_field_assignment_statement(),
202 TokenKind::Increment => self.parse_pre_increment_statement(),
203 TokenKind::Decrement => self.parse_pre_decrement_statement(),
204 TokenKind::Number
205 | TokenKind::String
206 | TokenKind::Regex
207 | TokenKind::LeftParen
208 | TokenKind::Close
209 | TokenKind::Cos
210 | TokenKind::Exp
211 | TokenKind::Index
212 | TokenKind::Int
213 | TokenKind::Length
214 | TokenKind::Log
215 | TokenKind::Match
216 | TokenKind::Rand
217 | TokenKind::Sin
218 | TokenKind::Sprintf
219 | TokenKind::Sqrt
220 | TokenKind::Srand
221 | TokenKind::Substr
222 | TokenKind::ToLower
223 | TokenKind::ToUpper => Statement::Expression(self.parse_expression()),
224 _ => todo!(),
225 }
226 }
227
228 fn parse_function_definition(&mut self) {
229 self.next_token();
230 if self.current_token.kind != TokenKind::Identifier {
231 todo!()
232 }
233 let name = self.current_token.literal;
234 self.next_token();
235 if self.current_token.kind != TokenKind::LeftParen {
236 todo!()
237 }
238 self.next_token();
239
240 let mut parameters = Vec::new();
241 while self.current_token.kind != TokenKind::RightParen {
242 if self.current_token.kind != TokenKind::Identifier {
243 todo!()
244 }
245 parameters.push(self.current_token.literal);
246 self.next_token();
247 if self.current_token.kind == TokenKind::Comma {
248 self.next_token();
249 } else if self.current_token.kind != TokenKind::RightParen {
250 todo!()
251 }
252 }
253
254 self.next_token();
255 while self.current_token.kind == TokenKind::NewLine {
256 self.next_token();
257 }
258 if self.current_token.kind != TokenKind::LeftCurlyBrace {
259 todo!()
260 }
261
262 let mut statements = Vec::new();
263 self.next_token(); while self.current_token.kind != TokenKind::RightCurlyBrace
265 && self.current_token.kind != TokenKind::Eof
266 {
267 while self.current_token.kind == TokenKind::NewLine
268 || self.current_token.kind == TokenKind::Semicolon
269 {
270 self.next_token();
271 }
272
273 if self.current_token.kind == TokenKind::RightCurlyBrace
274 || self.current_token.kind == TokenKind::Eof
275 {
276 break;
277 }
278
279 statements.push(self.parse_statement());
280 }
281 self.function_definitions.push(FunctionDefinition {
282 name,
283 parameters,
284 statements,
285 });
286 }
287
288 fn parse_assignment_statement(&mut self) -> Statement<'a> {
289 let identifier = self.current_token.clone();
290 self.next_token();
291 self.parse_assignment_statement_with_identifier(identifier)
292 }
293
294 fn parse_assignment_statement_with_identifier(&mut self, identifier: Token<'a>) -> Statement<'a> {
295 if self.current_token.kind == TokenKind::LeftParen
296 && self.token_is_immediately_after(&identifier)
297 {
298 let args = self.parse_call_arguments();
299 return Statement::Expression(Expression::FunctionCall {
300 name: identifier.literal,
301 args,
302 });
303 }
304 if self.current_token.kind == TokenKind::LeftSquareBracket {
305 self.next_token_in_regex_context();
306 let index = self.parse_array_index_expression();
307 if self.current_token.kind != TokenKind::RightSquareBracket {
308 todo!()
309 }
310 self.next_token();
311 if self.current_token.kind == TokenKind::Assign {
312 self.next_token();
313 let value = self.parse_expression();
314 return Statement::ArrayAssignment {
315 identifier: identifier.literal,
316 index,
317 value,
318 };
319 }
320 if self.current_token.kind == TokenKind::AddAssign {
321 self.next_token();
322 let value = self.parse_expression();
323 return Statement::ArrayAddAssignment {
324 identifier: identifier.literal,
325 index,
326 value,
327 };
328 }
329 if self.current_token.kind == TokenKind::Increment {
330 self.next_token();
331 return Statement::ArrayPostIncrement {
332 identifier: identifier.literal,
333 index,
334 };
335 }
336 if self.current_token.kind == TokenKind::Decrement {
337 self.next_token();
338 return Statement::ArrayPostDecrement {
339 identifier: identifier.literal,
340 index,
341 };
342 }
343 todo!()
344 }
345 if self.current_token.kind == TokenKind::Assign {
346 self.next_token();
347 if self.current_token.kind == TokenKind::Split {
348 return self.parse_split_assignment_statement(identifier.literal);
349 }
350 let value = self.parse_expression();
351 Statement::Assignment {
352 identifier: identifier.literal,
353 value,
354 }
355 } else if self.current_token.kind == TokenKind::Increment {
356 self.next_token();
357 Statement::PostIncrement {
358 identifier: identifier.literal,
359 }
360 } else if self.current_token.kind == TokenKind::Decrement {
361 self.next_token();
362 Statement::PostDecrement {
363 identifier: identifier.literal,
364 }
365 } else if self.current_token.kind == TokenKind::AddAssign {
366 self.next_token();
367 let value = self.parse_expression();
368 Statement::AddAssignment {
369 identifier: identifier.literal,
370 value,
371 }
372 } else if matches!(
373 self.current_token.kind,
374 TokenKind::SubtractAssign
375 | TokenKind::MultiplyAssign
376 | TokenKind::DivideAssign
377 | TokenKind::ModuloAssign
378 | TokenKind::PowerAssign
379 ) {
380 let assign_token = self.current_token.clone();
381 self.next_token();
382 let right_value = self.parse_expression();
383 Statement::Assignment {
384 identifier: identifier.literal,
385 value: Expression::Infix {
386 left: Box::new(Expression::Identifier(identifier.literal)),
387 operator: compound_assign_operator(&assign_token),
388 right: Box::new(right_value),
389 },
390 }
391 } else {
392 todo!()
393 }
394 }
395
396 fn parse_delete_statement(&mut self) -> Statement<'a> {
397 self.next_token();
398 if self.current_token.kind != TokenKind::Identifier {
399 todo!()
400 }
401 let identifier = self.current_token.literal;
402 self.next_token();
403 if self.current_token.kind != TokenKind::LeftSquareBracket {
404 return Statement::Delete {
405 identifier,
406 index: None,
407 };
408 }
409
410 self.next_token_in_regex_context();
411 let index = self.parse_array_index_expression();
412 if self.current_token.kind != TokenKind::RightSquareBracket {
413 todo!()
414 }
415 self.next_token();
416 Statement::Delete {
417 identifier,
418 index: Some(index),
419 }
420 }
421
422 fn parse_break_statement(&mut self) -> Statement<'a> {
423 self.next_token();
424 Statement::Break
425 }
426
427 fn parse_continue_statement(&mut self) -> Statement<'a> {
428 self.next_token();
429 Statement::Continue
430 }
431
432 fn parse_pre_increment_statement(&mut self) -> Statement<'a> {
433 self.next_token();
434 if self.current_token.kind != TokenKind::Identifier {
435 todo!()
436 }
437 let identifier = self.current_token.literal;
438 self.next_token();
439 Statement::PreIncrement { identifier }
440 }
441
442 fn parse_pre_decrement_statement(&mut self) -> Statement<'a> {
443 self.next_token();
444 if self.current_token.kind != TokenKind::Identifier {
445 todo!()
446 }
447 let identifier = self.current_token.literal;
448 self.next_token();
449 Statement::PreDecrement { identifier }
450 }
451
452 fn parse_split_assignment_statement(&mut self, identifier: &'a str) -> Statement<'a> {
453 self.next_token();
454 if self.current_token.kind != TokenKind::LeftParen {
455 todo!()
456 }
457 self.next_token_in_regex_context();
458 let string = self.parse_expression();
459 if self.current_token.kind != TokenKind::Comma {
460 todo!()
461 }
462 self.next_token();
463 if self.current_token.kind != TokenKind::Identifier {
464 todo!()
465 }
466 let array = self.current_token.literal;
467 self.next_token();
468 let separator = if self.current_token.kind == TokenKind::Comma {
469 self.next_token_in_regex_context();
470 Some(self.parse_expression())
471 } else {
472 None
473 };
474 if self.current_token.kind != TokenKind::RightParen {
475 todo!()
476 }
477 self.next_token();
478 Statement::SplitAssignment {
479 identifier,
480 string,
481 array,
482 separator,
483 }
484 }
485
486 fn parse_split_statement(&mut self) -> Statement<'a> {
487 self.next_token();
488 if self.current_token.kind != TokenKind::LeftParen {
489 todo!()
490 }
491 self.next_token_in_regex_context();
492 let string = self.parse_expression();
493 if self.current_token.kind != TokenKind::Comma {
494 todo!()
495 }
496 self.next_token();
497 if self.current_token.kind != TokenKind::Identifier {
498 todo!()
499 }
500 let array = self.current_token.literal;
501 self.next_token();
502 let separator = if self.current_token.kind == TokenKind::Comma {
503 self.next_token_in_regex_context();
504 Some(self.parse_expression())
505 } else {
506 None
507 };
508 if self.current_token.kind != TokenKind::RightParen {
509 todo!()
510 }
511 self.next_token();
512 Statement::Split {
513 string,
514 array,
515 separator,
516 }
517 }
518
519 fn parse_field_assignment_statement(&mut self) -> Statement<'a> {
520 self.next_token();
521 let field = self.parse_primary_expression();
522 let assign_token = self.current_token.clone();
523 self.next_token();
524 let right_value = self.parse_expression();
525
526 let value = if assign_token.kind == TokenKind::Assign {
527 right_value
528 } else {
529 let operator = compound_assign_operator(&assign_token);
530 Expression::Infix {
531 left: Box::new(Expression::Field(Box::new(field.clone()))),
532 operator,
533 right: Box::new(right_value),
534 }
535 };
536 Statement::FieldAssignment { field, value }
537 }
538
539 fn parse_if_statement(&mut self) -> Statement<'a> {
540 self.next_token();
541 if self.current_token.kind != TokenKind::LeftParen {
542 todo!()
543 }
544 self.next_token_in_regex_context();
545 let condition = self.parse_condition_in_parens();
546 if self.current_token.kind != TokenKind::RightParen {
547 todo!()
548 }
549 self.next_token();
550 let then_statements = self.parse_control_statement_body();
551
552 while self.current_token.kind == TokenKind::NewLine
553 || self.current_token.kind == TokenKind::Semicolon
554 {
555 self.next_token();
556 }
557
558 if self.current_token.kind == TokenKind::Else {
559 self.next_token();
560 let else_statements = self.parse_control_statement_body();
561 return Statement::IfElse {
562 condition,
563 then_statements,
564 else_statements,
565 };
566 }
567
568 Statement::If {
569 condition,
570 then_statements,
571 }
572 }
573
574 fn parse_exit_statement(&mut self) -> Statement<'a> {
575 self.next_token();
576 let status = if self.is_statement_terminator() {
577 None
578 } else {
579 Some(self.parse_expression())
580 };
581 Statement::Exit(status)
582 }
583
584 fn parse_return_statement(&mut self) -> Statement<'a> {
585 self.next_token();
586 let value = if self.is_statement_terminator() {
587 None
588 } else {
589 Some(self.parse_expression())
590 };
591 Statement::Return(value)
592 }
593
594 fn parse_next_statement(&mut self) -> Statement<'a> {
595 self.next_token();
596 Statement::Next
597 }
598
599 fn parse_statement_block(&mut self) -> Vec<Statement<'a>> {
600 self.next_token(); let mut statements = Vec::new();
602 while self.current_token.kind != TokenKind::RightCurlyBrace
603 && self.current_token.kind != TokenKind::Eof
604 {
605 while self.current_token.kind == TokenKind::NewLine
606 || self.current_token.kind == TokenKind::Semicolon
607 {
608 self.next_token();
609 }
610
611 if self.current_token.kind == TokenKind::RightCurlyBrace
612 || self.current_token.kind == TokenKind::Eof
613 {
614 break;
615 }
616 statements.push(self.parse_statement());
617 }
618 if self.current_token.kind == TokenKind::RightCurlyBrace {
619 self.next_token();
620 }
621 statements
622 }
623
624 fn parse_control_statement_body(&mut self) -> Vec<Statement<'a>> {
625 while self.current_token.kind == TokenKind::NewLine {
626 self.next_token();
627 }
628
629 if self.current_token.kind == TokenKind::LeftCurlyBrace {
630 return self.parse_statement_block();
631 }
632
633 if self.current_token.kind == TokenKind::Semicolon {
634 self.next_token();
635 return vec![Statement::Empty];
636 }
637
638 vec![self.parse_statement()]
639 }
640
641 fn parse_while_statement(&mut self) -> Statement<'a> {
642 self.next_token();
643 if self.current_token.kind != TokenKind::LeftParen {
644 todo!()
645 }
646 self.next_token_in_regex_context();
647 let condition = self.parse_condition_in_parens();
648 if self.current_token.kind != TokenKind::RightParen {
649 todo!()
650 }
651 self.next_token();
652 let statements = self.parse_control_statement_body();
653 Statement::While {
654 condition,
655 statements,
656 }
657 }
658
659 fn parse_do_statement(&mut self) -> Statement<'a> {
660 self.next_token();
661 let statements = self.parse_control_statement_body();
662
663 while self.current_token.kind == TokenKind::NewLine
664 || self.current_token.kind == TokenKind::Semicolon
665 {
666 self.next_token();
667 }
668
669 if self.current_token.kind != TokenKind::While {
670 todo!()
671 }
672 self.next_token();
673 if self.current_token.kind != TokenKind::LeftParen {
674 todo!()
675 }
676 self.next_token_in_regex_context();
677 let condition = self.parse_condition_in_parens();
678 if self.current_token.kind != TokenKind::RightParen {
679 todo!()
680 }
681 self.next_token();
682 Statement::DoWhile {
683 condition,
684 statements,
685 }
686 }
687
688 fn parse_for_statement(&mut self) -> Statement<'a> {
689 self.next_token();
690 if self.current_token.kind != TokenKind::LeftParen {
691 todo!()
692 }
693 self.next_token();
694 while self.current_token.kind == TokenKind::NewLine {
695 self.next_token();
696 }
697
698 let init = if self.current_token.kind == TokenKind::Semicolon {
699 Statement::Empty
700 } else if self.current_token.kind == TokenKind::Identifier {
701 let variable = self.current_token.clone();
702 self.next_token();
703 if self.current_token.kind == TokenKind::In {
704 self.next_token();
705 if self.current_token.kind != TokenKind::Identifier {
706 todo!()
707 }
708 let array = self.current_token.literal;
709 self.next_token();
710 if self.current_token.kind != TokenKind::RightParen {
711 todo!()
712 }
713 self.next_token();
714 let statements = self.parse_control_statement_body();
715 return Statement::ForIn {
716 variable: variable.literal,
717 array,
718 statements,
719 };
720 }
721 self.parse_assignment_statement_with_identifier(variable)
722 } else {
723 self.parse_statement()
724 };
725 while self.current_token.kind == TokenKind::NewLine {
726 self.next_token();
727 }
728 if self.current_token.kind != TokenKind::Semicolon {
729 todo!()
730 }
731 self.next_token_in_regex_context();
732 while self.current_token.kind == TokenKind::NewLine {
733 self.next_token_in_regex_context();
734 }
735
736 let condition = if self.current_token.kind == TokenKind::Semicolon {
737 Expression::Number(1.0)
738 } else {
739 self.parse_expression()
740 };
741 while self.current_token.kind == TokenKind::NewLine {
742 self.next_token();
743 }
744 if self.current_token.kind != TokenKind::Semicolon {
745 todo!()
746 }
747 self.next_token_in_regex_context();
748 while self.current_token.kind == TokenKind::NewLine {
749 self.next_token_in_regex_context();
750 }
751
752 let update = if self.current_token.kind == TokenKind::RightParen {
753 Statement::Empty
754 } else {
755 self.parse_statement()
756 };
757 while self.current_token.kind == TokenKind::NewLine {
758 self.next_token();
759 }
760 if self.current_token.kind != TokenKind::RightParen {
761 todo!()
762 }
763 self.next_token();
764 let statements = self.parse_control_statement_body();
765
766 Statement::For {
767 init: Box::new(init),
768 condition,
769 update: Box::new(update),
770 statements,
771 }
772 }
773
774 fn parse_print_function(&mut self) -> Statement<'a> {
775 let mut expressions = Vec::new();
776 let mut expect_more = false;
777 self.next_token();
778
779 loop {
780 if self.current_token.kind == TokenKind::RightCurlyBrace
781 || self.current_token.kind == TokenKind::RightParen
782 || self.current_token.kind == TokenKind::Eof
783 || self.current_token.kind == TokenKind::GreaterThan
784 || self.current_token.kind == TokenKind::Append
785 || self.current_token.kind == TokenKind::Pipe
786 {
787 break;
788 }
789
790 if self.current_token.kind == TokenKind::NewLine
791 || self.current_token.kind == TokenKind::Semicolon
792 {
793 if expect_more {
794 self.next_token();
795 continue;
796 }
797 break;
798 }
799
800 if self.current_token.kind == TokenKind::Comma {
801 self.next_token();
802 expect_more = true;
803 continue;
804 }
805
806 let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
807 let expression = self.parse_expression();
808 expressions.push(expression);
809 if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
810 while self.current_token.kind == TokenKind::Comma {
811 self.next_token();
812 expressions.push(self.parse_expression());
813 }
814 if self.current_token.kind != TokenKind::RightParen {
815 todo!()
816 }
817 self.next_token();
818 }
819 expect_more = false;
820 }
821 if self.current_token.kind == TokenKind::RightParen {
822 self.next_token();
823 }
824
825 if self.current_token.kind == TokenKind::GreaterThan
826 || self.current_token.kind == TokenKind::Append
827 {
828 let append = self.current_token.kind == TokenKind::Append;
829 self.next_token();
830 let target = self.parse_expression();
831 return Statement::PrintRedirect {
832 expressions,
833 target,
834 append,
835 };
836 }
837 if self.current_token.kind == TokenKind::Pipe {
838 self.next_token();
839 let target = self.parse_expression();
840 return Statement::PrintPipe {
841 expressions,
842 target,
843 };
844 }
845
846 Statement::Print(expressions)
847 }
848
849 fn parse_printf_function(&mut self) -> Statement<'a> {
850 self.next_token();
851 let expressions = if self.current_token.kind == TokenKind::LeftParen {
852 self.next_token_in_regex_context();
853 let mut expressions = Vec::new();
854 while self.current_token.kind != TokenKind::RightParen
855 && self.current_token.kind != TokenKind::Eof
856 {
857 if self.current_token.kind == TokenKind::Comma {
858 self.next_token();
859 continue;
860 }
861 expressions.push(self.parse_expression());
862 }
863 if self.current_token.kind == TokenKind::RightParen {
864 self.next_token();
865 }
866 expressions
867 } else {
868 self.parse_expression_list_until_action_end_from_current()
869 };
870
871 Statement::Printf(expressions)
872 }
873
874 fn parse_gsub_function(&mut self) -> Statement<'a> {
875 self.next_token();
876 if self.current_token.kind != TokenKind::LeftParen {
877 todo!()
878 }
879
880 self.next_token_in_regex_context();
881 let pattern = self.parse_expression();
882
883 if self.current_token.kind != TokenKind::Comma {
884 todo!()
885 }
886 self.next_token();
887 let replacement = self.parse_expression();
888
889 let target = if self.current_token.kind == TokenKind::Comma {
890 self.next_token();
891 Some(self.parse_expression())
892 } else {
893 None
894 };
895
896 if self.current_token.kind != TokenKind::RightParen {
897 todo!()
898 }
899 self.next_token();
900
901 Statement::Gsub {
902 pattern,
903 replacement,
904 target,
905 }
906 }
907
908 fn parse_sub_function(&mut self) -> Statement<'a> {
909 self.next_token();
910 if self.current_token.kind != TokenKind::LeftParen {
911 todo!()
912 }
913
914 self.next_token_in_regex_context();
915 let pattern = self.parse_expression();
916
917 if self.current_token.kind != TokenKind::Comma {
918 todo!()
919 }
920 self.next_token();
921 let replacement = self.parse_expression();
922
923 if self.current_token.kind == TokenKind::Comma {
924 todo!()
925 }
926
927 if self.current_token.kind != TokenKind::RightParen {
928 todo!()
929 }
930 self.next_token();
931
932 Statement::Sub {
933 pattern,
934 replacement,
935 }
936 }
937
938 fn parse_system_function(&mut self) -> Statement<'a> {
939 self.next_token();
940 if self.current_token.kind != TokenKind::LeftParen {
941 todo!()
942 }
943 self.next_token();
944 let command = self.parse_expression();
945 if self.current_token.kind != TokenKind::RightParen {
946 todo!()
947 }
948 self.next_token();
949 Statement::System(command)
950 }
951
952 fn parse_expression_list_until_action_end_from_current(&mut self) -> Vec<Expression<'a>> {
953 let mut expressions = Vec::new();
954 let mut expect_more = false;
955
956 loop {
957 if self.current_token.kind == TokenKind::RightCurlyBrace
958 || self.current_token.kind == TokenKind::RightParen
959 || self.current_token.kind == TokenKind::Eof
960 {
961 break;
962 }
963
964 if self.current_token.kind == TokenKind::NewLine
965 || self.current_token.kind == TokenKind::Semicolon
966 {
967 if expect_more {
968 self.next_token();
969 continue;
970 }
971 break;
972 }
973
974 if self.current_token.kind == TokenKind::Comma {
975 self.next_token();
976 expect_more = true;
977 continue;
978 }
979
980 let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
981 let expression = self.parse_expression();
982 expressions.push(expression);
983 if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
984 while self.current_token.kind == TokenKind::Comma {
985 self.next_token();
986 expressions.push(self.parse_expression());
987 }
988 if self.current_token.kind != TokenKind::RightParen {
989 todo!()
990 }
991 self.next_token();
992 }
993 expect_more = false;
994 }
995
996 if self.current_token.kind == TokenKind::RightParen {
997 self.next_token();
998 }
999
1000 expressions
1001 }
1002
1003 fn parse_expression(&mut self) -> Expression<'a> {
1004 self.parse_expression_with_min_precedence(0)
1005 }
1006
1007 fn parse_expression_with_min_precedence(&mut self, min_precedence: u8) -> Expression<'a> {
1008 let left = self.parse_primary_expression();
1009 self.parse_expression_suffix(left, min_precedence)
1010 }
1011
1012 fn parse_expression_suffix(
1013 &mut self,
1014 mut left: Expression<'a>,
1015 min_precedence: u8,
1016 ) -> Expression<'a> {
1017 const CONCAT_LEFT_PRECEDENCE: u8 = 6;
1018 const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
1019
1020 loop {
1021 if self.current_token.kind == TokenKind::QuestionMark {
1022 if min_precedence > 0 {
1023 break;
1024 }
1025 self.next_token_in_regex_context();
1026 let then_expr = self.parse_expression_with_min_precedence(0);
1027 if self.current_token.kind != TokenKind::Colon {
1028 todo!()
1029 }
1030 self.next_token_in_regex_context();
1031 let else_expr = self.parse_expression_with_min_precedence(0);
1032 left = Expression::Ternary {
1033 condition: Box::new(left),
1034 then_expr: Box::new(then_expr),
1035 else_expr: Box::new(else_expr),
1036 };
1037 continue;
1038 }
1039
1040 if infix_operator_precedence(&self.current_token.kind).is_none()
1041 && is_expression_start(&self.current_token.kind)
1042 {
1043 if CONCAT_LEFT_PRECEDENCE < min_precedence {
1044 break;
1045 }
1046
1047 let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE);
1048 left = Expression::Concatenation {
1049 left: Box::new(left),
1050 right: Box::new(right),
1051 };
1052 continue;
1053 }
1054
1055 let (left_precedence, right_precedence) =
1056 match infix_operator_precedence(&self.current_token.kind) {
1057 Some(value) => value,
1058 None => break,
1059 };
1060
1061 if left_precedence < min_precedence {
1062 break;
1063 }
1064
1065 let operator = self.current_token.clone();
1066 if matches!(
1067 operator.kind,
1068 TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
1069 ) {
1070 self.next_token_in_regex_context();
1071 } else {
1072 self.next_token();
1073 }
1074 let right = self.parse_expression_with_min_precedence(right_precedence);
1075
1076 left = Expression::Infix {
1077 left: Box::new(left),
1078 operator,
1079 right: Box::new(right),
1080 };
1081 }
1082
1083 left
1084 }
1085
1086 fn parse_condition_in_parens(&mut self) -> Expression<'a> {
1087 let mut condition = self.parse_expression();
1088 if self.current_token.kind == TokenKind::Comma {
1089 while self.current_token.kind == TokenKind::Comma {
1090 let operator = self.current_token.clone();
1091 self.next_token_in_regex_context();
1092 let right = self.parse_expression();
1093 condition = Expression::Infix {
1094 left: Box::new(condition),
1095 operator,
1096 right: Box::new(right),
1097 };
1098 }
1099 if self.current_token.kind != TokenKind::RightParen {
1100 todo!()
1101 }
1102 self.next_token();
1103 condition = self.parse_expression_suffix(condition, 0);
1104 }
1105 condition
1106 }
1107
1108 fn parse_primary_expression(&mut self) -> Expression<'a> {
1109 if self.current_token.kind == TokenKind::Minus {
1110 let operator = self.current_token.clone();
1111 self.next_token();
1112 let right = self.parse_primary_expression();
1113 return Expression::Infix {
1114 left: Box::new(Expression::Number(0.0)),
1115 operator,
1116 right: Box::new(right),
1117 };
1118 }
1119 if self.current_token.kind == TokenKind::Plus {
1120 self.next_token();
1121 return self.parse_primary_expression();
1122 }
1123 if self.current_token.kind == TokenKind::ExclamationMark {
1124 self.next_token_in_regex_context();
1125 let expression = self.parse_primary_expression();
1126 return Expression::Not(Box::new(expression));
1127 }
1128 if self.current_token.kind == TokenKind::Increment {
1129 self.next_token();
1130 let expression = self.parse_primary_expression();
1131 return Expression::PreIncrement(Box::new(expression));
1132 }
1133 if self.current_token.kind == TokenKind::Decrement {
1134 self.next_token();
1135 let expression = self.parse_primary_expression();
1136 return Expression::PreDecrement(Box::new(expression));
1137 }
1138
1139 let mut expression = self.parse_primary_atom();
1140 if self.current_token.kind == TokenKind::Increment {
1141 self.next_token();
1142 expression = Expression::PostIncrement(Box::new(expression));
1143 } else if self.current_token.kind == TokenKind::Decrement {
1144 self.next_token();
1145 expression = Expression::PostDecrement(Box::new(expression));
1146 }
1147 expression
1148 }
1149
1150 fn parse_primary_atom(&mut self) -> Expression<'a> {
1151 match self.current_token.kind {
1152 TokenKind::String => {
1153 let expression = Expression::String(self.current_token.literal);
1154 self.next_token();
1155 expression
1156 }
1157 TokenKind::Regex => {
1158 let expression = Expression::Regex(self.current_token.literal);
1159 self.next_token();
1160 expression
1161 }
1162 TokenKind::Number => {
1163 let expression = if let Ok(value) = self.current_token.literal.parse::<f64>() {
1164 Expression::Number(value)
1165 } else {
1166 todo!()
1167 };
1168 self.next_token();
1169 expression
1170 }
1171 TokenKind::DollarSign => {
1172 self.next_token();
1173 let expression = self.parse_primary_atom();
1174 Expression::Field(Box::new(expression))
1175 }
1176 TokenKind::LeftParen => {
1177 self.next_token();
1178 let expression = self.parse_expression();
1179 if self.current_token.kind == TokenKind::RightParen {
1180 self.next_token();
1181 }
1182 expression
1183 }
1184 TokenKind::Identifier => {
1185 let identifier = self.current_token.clone();
1186 self.next_token();
1187 if self.current_token.kind == TokenKind::LeftParen
1188 && self.token_is_immediately_after(&identifier)
1189 {
1190 let args = self.parse_call_arguments();
1191 return Expression::FunctionCall {
1192 name: identifier.literal,
1193 args,
1194 };
1195 }
1196 if self.current_token.kind == TokenKind::LeftSquareBracket {
1197 self.next_token_in_regex_context();
1198 let index = self.parse_array_index_expression();
1199 if self.current_token.kind != TokenKind::RightSquareBracket {
1200 todo!()
1201 }
1202 self.next_token();
1203 Expression::ArrayAccess {
1204 identifier: identifier.literal,
1205 index: Box::new(index),
1206 }
1207 } else {
1208 Expression::Identifier(identifier.literal)
1209 }
1210 }
1211 TokenKind::Length => {
1212 self.next_token();
1213 if self.current_token.kind == TokenKind::LeftParen {
1214 self.next_token();
1215 if self.current_token.kind == TokenKind::RightParen {
1216 self.next_token();
1217 Expression::Length(None)
1218 } else {
1219 let expression = self.parse_expression();
1220 if self.current_token.kind != TokenKind::RightParen {
1221 todo!()
1222 }
1223 self.next_token();
1224 Expression::Length(Some(Box::new(expression)))
1225 }
1226 } else {
1227 Expression::Length(None)
1228 }
1229 }
1230 TokenKind::Substr => {
1231 self.next_token();
1232 if self.current_token.kind != TokenKind::LeftParen {
1233 todo!()
1234 }
1235 self.next_token();
1236 let string = self.parse_expression();
1237 if self.current_token.kind != TokenKind::Comma {
1238 todo!()
1239 }
1240 self.next_token();
1241 let start = self.parse_expression();
1242 let mut length = None;
1243 if self.current_token.kind == TokenKind::Comma {
1244 self.next_token();
1245 length = Some(Box::new(self.parse_expression()));
1246 }
1247 if self.current_token.kind != TokenKind::RightParen {
1248 todo!()
1249 }
1250 self.next_token();
1251 Expression::Substr {
1252 string: Box::new(string),
1253 start: Box::new(start),
1254 length,
1255 }
1256 }
1257 TokenKind::Rand => {
1258 self.next_token();
1259 if self.current_token.kind == TokenKind::LeftParen {
1260 self.next_token();
1261 if self.current_token.kind != TokenKind::RightParen {
1262 todo!()
1263 }
1264 self.next_token();
1265 }
1266 Expression::Rand
1267 }
1268 TokenKind::Close
1269 | TokenKind::Cos
1270 | TokenKind::Exp
1271 | TokenKind::Index
1272 | TokenKind::Int
1273 | TokenKind::Log
1274 | TokenKind::Match
1275 | TokenKind::Sin
1276 | TokenKind::Sprintf
1277 | TokenKind::Split
1278 | TokenKind::Sqrt
1279 | TokenKind::Srand => {
1280 let name = self.current_token.literal;
1281 self.next_token();
1282 if self.current_token.kind == TokenKind::LeftParen {
1283 let args = self.parse_call_arguments();
1284 return Expression::FunctionCall { name, args };
1285 }
1286 Expression::Number(0.0)
1287 }
1288 _ => {
1289 panic!(
1290 "parse_primary_expression not yet implemented, found token: {:?}",
1291 self.current_token
1292 )
1293 }
1294 }
1295 }
1296
1297 pub fn parse_program(&mut self) -> Program<'_> {
1298 let mut program = Program::new();
1299
1300 while !self.is_eof() {
1301 match self.parse_next_rule() {
1302 Some(Rule::Begin(action)) => program.add_begin_block(action),
1303 Some(Rule::End(action)) => program.add_end_block(action),
1304 Some(rule) => program.add_rule(rule),
1305 None => {}
1306 }
1307 self.next_token_in_regex_context();
1308 }
1309
1310 for definition in self.function_definitions.drain(..) {
1311 program.add_function_definition(definition);
1312 }
1313
1314 program
1315 }
1316
1317 fn parse_call_arguments(&mut self) -> Vec<Expression<'a>> {
1318 if self.current_token.kind != TokenKind::LeftParen {
1319 return vec![];
1320 }
1321 self.next_token_in_regex_context();
1322 let mut args = Vec::new();
1323 while self.current_token.kind != TokenKind::RightParen
1324 && self.current_token.kind != TokenKind::Eof
1325 {
1326 if self.current_token.kind == TokenKind::Comma {
1327 self.next_token();
1328 continue;
1329 }
1330 args.push(self.parse_expression());
1331 }
1332 if self.current_token.kind == TokenKind::RightParen {
1333 self.next_token();
1334 }
1335 args
1336 }
1337}
1338
1339fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
1340 match kind {
1341 TokenKind::Assign
1342 | TokenKind::AddAssign
1343 | TokenKind::SubtractAssign
1344 | TokenKind::MultiplyAssign
1345 | TokenKind::DivideAssign
1346 | TokenKind::ModuloAssign
1347 | TokenKind::PowerAssign => Some((0, 0)),
1348 TokenKind::Or => Some((1, 2)),
1349 TokenKind::And => Some((3, 4)),
1350 TokenKind::Equal
1351 | TokenKind::NotEqual
1352 | TokenKind::GreaterThan
1353 | TokenKind::GreaterThanOrEqual
1354 | TokenKind::In
1355 | TokenKind::LessThan
1356 | TokenKind::LessThanOrEqual
1357 | TokenKind::Tilde
1358 | TokenKind::NoMatch => Some((5, 6)),
1359 TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
1360 TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
1361 TokenKind::Caret => Some((13, 12)),
1362 _ => None,
1363 }
1364}
1365
1366fn is_expression_start(kind: &TokenKind) -> bool {
1367 matches!(
1368 kind,
1369 TokenKind::String
1370 | TokenKind::Regex
1371 | TokenKind::Number
1372 | TokenKind::DollarSign
1373 | TokenKind::LeftParen
1374 | TokenKind::Identifier
1375 | TokenKind::Cos
1376 | TokenKind::Exp
1377 | TokenKind::Index
1378 | TokenKind::Int
1379 | TokenKind::Length
1380 | TokenKind::Log
1381 | TokenKind::Match
1382 | TokenKind::Rand
1383 | TokenKind::Sin
1384 | TokenKind::Sprintf
1385 | TokenKind::Split
1386 | TokenKind::Sqrt
1387 | TokenKind::Srand
1388 | TokenKind::Substr
1389 | TokenKind::Increment
1390 | TokenKind::Decrement
1391 )
1392}
1393
1394fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
1395 let (kind, literal) = match token.kind {
1396 TokenKind::AddAssign => (TokenKind::Plus, "+"),
1397 TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
1398 TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
1399 TokenKind::DivideAssign => (TokenKind::Division, "/"),
1400 TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
1401 TokenKind::PowerAssign => (TokenKind::Caret, "^"),
1402 _ => todo!(),
1403 };
1404
1405 Token::new(kind, literal, token.span.start)
1406}
1407
1408#[cfg(test)]
1409mod tests {
1410 use super::*;
1411
1412 #[test]
1413 fn create_parser() {
1414 let mut parser = Parser::new(Lexer::new("42 == 42"));
1415
1416 assert_eq!(parser.current_token.literal, "42");
1417 parser.next_token();
1418 assert_eq!(parser.current_token.literal, "==");
1419 }
1420
1421 #[test]
1422 fn parse_empty_program() {
1423 let mut parser = Parser::new(Lexer::new(""));
1424
1425 let program = parser.parse_program();
1426
1427 assert_eq!(program.len(), 0);
1428 }
1429
1430 #[test]
1431 fn parse_action_without_pattern() {
1432 let mut parser = Parser::new(Lexer::new("{ print }"));
1433
1434 let program = parser.parse_program();
1435
1436 assert_eq!(program.len(), 1);
1437 assert_eq!("{ print }", program.to_string());
1438 }
1439
1440 #[test]
1441 fn parse_action_with_leading_newlines() {
1442 let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
1443
1444 let program = parser.parse_program();
1445
1446 assert_eq!(program.len(), 1);
1447 assert_eq!("{ print }", program.to_string());
1448 }
1449
1450 #[test]
1451 fn parse_begin_block() {
1452 let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
1453
1454 let program = parser.parse_program();
1455
1456 assert_eq!(program.len(), 1);
1457 assert_eq!("BEGIN { print }", program.to_string());
1458 }
1459
1460 #[test]
1461 fn parse_end_block() {
1462 let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1463
1464 let program = parser.parse_program();
1465
1466 assert_eq!(program.len(), 1);
1467 assert_eq!("END { print 42 }", program.to_string());
1468 }
1469
1470 #[test]
1471 fn parse_regex_pattern_action() {
1472 let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1473
1474 let program = parser.parse_program();
1475
1476 assert_eq!(program.len(), 1);
1477 assert_eq!("/foo/ { print }", program.to_string());
1478 }
1479
1480 #[test]
1481 fn parse_print_infix_expression() {
1482 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1483
1484 let program = parser.parse_program();
1485 let mut begin_blocks = program.begin_blocks_iter();
1486 let Action { statements } = begin_blocks.next().expect("expected begin block");
1487
1488 let exprs = match &statements[0] {
1489 Statement::Print(expressions) => expressions,
1490 _ => panic!("expected print statement"),
1491 };
1492
1493 match &exprs[0] {
1494 Expression::Infix {
1495 left,
1496 operator,
1497 right,
1498 } => {
1499 assert!(matches!(**left, Expression::Number(1.0)));
1500 assert_eq!(operator.kind, TokenKind::Plus);
1501 assert!(matches!(**right, Expression::Number(2.0)));
1502 }
1503 _ => panic!("expected infix expression"),
1504 }
1505 }
1506
1507 #[test]
1508 fn parse_print_parenthesized_expression() {
1509 let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1510
1511 let program = parser.parse_program();
1512 let mut begin_blocks = program.begin_blocks_iter();
1513 let Action { statements } = begin_blocks.next().expect("expected begin block");
1514
1515 let exprs = match &statements[0] {
1516 Statement::Print(expressions) => expressions,
1517 _ => panic!("expected print statement"),
1518 };
1519
1520 match &exprs[0] {
1521 Expression::Infix {
1522 left,
1523 operator,
1524 right,
1525 } => {
1526 assert_eq!(operator.kind, TokenKind::Asterisk);
1527 assert!(matches!(**right, Expression::Number(3.0)));
1528 assert!(matches!(**left, Expression::Infix { .. }));
1529 }
1530 _ => panic!("expected infix expression"),
1531 }
1532 }
1533
1534 #[test]
1535 fn parse_print_multiplication_has_higher_precedence_than_addition() {
1536 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1537
1538 let program = parser.parse_program();
1539 let mut begin_blocks = program.begin_blocks_iter();
1540 let Action { statements } = begin_blocks.next().expect("expected begin block");
1541
1542 let exprs = match &statements[0] {
1543 Statement::Print(expressions) => expressions,
1544 _ => panic!("expected print statement"),
1545 };
1546
1547 match &exprs[0] {
1548 Expression::Infix {
1549 left,
1550 operator,
1551 right,
1552 } => {
1553 assert_eq!(operator.kind, TokenKind::Plus);
1554 assert!(matches!(**left, Expression::Number(1.0)));
1555 match &**right {
1556 Expression::Infix {
1557 operator: right_op, ..
1558 } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1559 _ => panic!("expected nested infix expression"),
1560 }
1561 }
1562 _ => panic!("expected infix expression"),
1563 }
1564 }
1565
1566 #[test]
1567 fn parse_print_power_is_right_associative() {
1568 let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1569
1570 let program = parser.parse_program();
1571 let mut begin_blocks = program.begin_blocks_iter();
1572 let Action { statements } = begin_blocks.next().expect("expected begin block");
1573
1574 let exprs = match &statements[0] {
1575 Statement::Print(expressions) => expressions,
1576 _ => panic!("expected print statement"),
1577 };
1578
1579 match &exprs[0] {
1580 Expression::Infix {
1581 left,
1582 operator,
1583 right,
1584 } => {
1585 assert_eq!(operator.kind, TokenKind::Caret);
1586 assert!(matches!(**left, Expression::Number(2.0)));
1587 match &**right {
1588 Expression::Infix {
1589 operator: right_op, ..
1590 } => assert_eq!(right_op.kind, TokenKind::Caret),
1591 _ => panic!("expected nested infix expression"),
1592 }
1593 }
1594 _ => panic!("expected infix expression"),
1595 }
1596 }
1597
1598 #[test]
1599 fn parse_print_minus_is_left_associative() {
1600 let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1601
1602 let program = parser.parse_program();
1603 let mut begin_blocks = program.begin_blocks_iter();
1604 let Action { statements } = begin_blocks.next().expect("expected begin block");
1605
1606 let exprs = match &statements[0] {
1607 Statement::Print(expressions) => expressions,
1608 _ => panic!("expected print statement"),
1609 };
1610
1611 match &exprs[0] {
1612 Expression::Infix {
1613 left,
1614 operator,
1615 right,
1616 } => {
1617 assert_eq!(operator.kind, TokenKind::Minus);
1618 match &**left {
1619 Expression::Infix {
1620 operator: left_op, ..
1621 } => assert_eq!(left_op.kind, TokenKind::Minus),
1622 _ => panic!("expected nested infix expression"),
1623 }
1624 assert!(matches!(**right, Expression::Number(1.0)));
1625 }
1626 _ => panic!("expected infix expression"),
1627 }
1628 }
1629
1630 #[test]
1631 fn parse_print_concatenation() {
1632 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
1633
1634 let program = parser.parse_program();
1635 let mut begin_blocks = program.begin_blocks_iter();
1636 let Action { statements } = begin_blocks.next().expect("expected begin block");
1637
1638 let exprs = match &statements[0] {
1639 Statement::Print(expressions) => expressions,
1640 _ => panic!("expected print statement"),
1641 };
1642
1643 assert_eq!(exprs.len(), 1);
1644 match &exprs[0] {
1645 Expression::Concatenation { left, right } => {
1646 assert!(matches!(**left, Expression::String("Value:")));
1647 assert!(matches!(**right, Expression::Number(42.0)));
1648 }
1649 _ => panic!("expected concatenation expression"),
1650 }
1651 }
1652
1653 #[test]
1654 fn parse_continue_statement() {
1655 let mut parser = Parser::new(Lexer::new(r#"{ continue }"#));
1656
1657 let program = parser.parse_program();
1658 let mut rules = program.rules_iter();
1659 let rule = rules.next().expect("expected rule");
1660
1661 let statements = match rule {
1662 Rule::Action(Action { statements }) => statements,
1663 _ => panic!("expected action rule"),
1664 };
1665
1666 assert!(matches!(statements[0], Statement::Continue));
1667 }
1668
1669 #[test]
1670 fn parse_identifier_followed_by_spaced_parentheses_as_concatenation() {
1671 let mut parser = Parser::new(Lexer::new(r#"{ x = $1; print x (++i) }"#));
1672
1673 let program = parser.parse_program();
1674 let mut rules = program.rules_iter();
1675 let rule = rules.next().expect("expected rule");
1676
1677 let statements = match rule {
1678 Rule::Action(Action { statements }) => statements,
1679 _ => panic!("expected action rule"),
1680 };
1681
1682 let exprs = match &statements[1] {
1683 Statement::Print(expressions) => expressions,
1684 _ => panic!("expected print statement"),
1685 };
1686
1687 assert_eq!(exprs.len(), 1);
1688 match &exprs[0] {
1689 Expression::Concatenation { left, right } => {
1690 assert!(matches!(**left, Expression::Identifier("x")));
1691 assert!(matches!(**right, Expression::PreIncrement(_)));
1692 }
1693 _ => panic!("expected concatenation expression"),
1694 }
1695 }
1696
1697 #[test]
1698 fn parse_print_field_expression() {
1699 let mut parser = Parser::new(Lexer::new("{ print $1 }"));
1700
1701 let program = parser.parse_program();
1702 let mut rules = program.rules_iter();
1703 let rule = rules.next().expect("expected rule");
1704
1705 let statements = match rule {
1706 Rule::Action(Action { statements }) => statements,
1707 _ => panic!("expected action rule"),
1708 };
1709
1710 let exprs = match &statements[0] {
1711 Statement::Print(expressions) => expressions,
1712 _ => panic!("expected print statement"),
1713 };
1714
1715 match &exprs[0] {
1716 Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
1717 _ => panic!("expected field expression"),
1718 }
1719 }
1720
1721 #[test]
1722 fn parse_print_with_commas() {
1723 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
1724
1725 let program = parser.parse_program();
1726
1727 assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
1728 }
1729
1730 #[test]
1731 fn parse_number_of_fields_identifier() {
1732 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
1733
1734 let program = parser.parse_program();
1735
1736 assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
1737 }
1738
1739 #[test]
1740 fn parse_printf_with_format_and_arguments() {
1741 let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
1742
1743 let program = parser.parse_program();
1744
1745 assert_eq!(
1746 r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
1747 program.to_string()
1748 );
1749 }
1750
1751 #[test]
1752 fn parse_add_assignment_and_pre_increment() {
1753 let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
1754
1755 let program = parser.parse_program();
1756
1757 assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
1758 }
1759
1760 #[test]
1761 fn parse_regex_match_pattern_action() {
1762 let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
1763
1764 let program = parser.parse_program();
1765
1766 assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
1767 }
1768
1769 #[test]
1770 fn parse_not_pattern_action() {
1771 let mut parser = Parser::new(Lexer::new(r#"!($1 < 2000) { print $1 }"#));
1772
1773 let program = parser.parse_program();
1774 let mut rules = program.rules_iter();
1775 let rule = rules.next().expect("expected rule");
1776
1777 match rule {
1778 Rule::PatternAction {
1779 pattern: Some(Expression::Not(inner)),
1780 action: Some(Action { statements }),
1781 } => {
1782 assert!(matches!(**inner, Expression::Infix { .. }));
1783 assert!(matches!(statements[0], Statement::Print(_)));
1784 }
1785 _ => panic!("expected negated pattern action"),
1786 }
1787 }
1788
1789 #[test]
1790 fn parse_print_with_line_continuation_after_comma() {
1791 let mut parser = Parser::new(Lexer::new(
1792 "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
1793 ));
1794
1795 let program = parser.parse_program();
1796
1797 assert_eq!(
1798 "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
1799 program.to_string()
1800 );
1801 }
1802
1803 #[test]
1804 fn parse_gsub_statement() {
1805 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
1806
1807 let program = parser.parse_program();
1808
1809 assert_eq!(
1810 r#"{ gsub(/USA/, "United States"); print }"#,
1811 program.to_string()
1812 );
1813 }
1814
1815 #[test]
1816 fn parse_gsub_statement_with_target() {
1817 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/[ \t]+/, "", t) }"#));
1818
1819 let program = parser.parse_program();
1820
1821 assert_eq!(r#"{ gsub(/[ \t]+/, "", t) }"#, program.to_string());
1822 }
1823
1824 #[test]
1825 fn parse_system_statement() {
1826 let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
1827
1828 let program = parser.parse_program();
1829
1830 assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
1831 }
1832
1833 #[test]
1834 fn parse_print_length_builtin_expression() {
1835 let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
1836
1837 let program = parser.parse_program();
1838
1839 assert_eq!(r#"{ print length, $0 }"#, program.to_string());
1840 }
1841
1842 #[test]
1843 fn parse_length_expression_as_rule_pattern() {
1844 let mut parser = Parser::new(Lexer::new(
1845 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1846 ));
1847
1848 let program = parser.parse_program();
1849
1850 assert_eq!(
1851 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1852 program.to_string()
1853 );
1854 }
1855
1856 #[test]
1857 fn parse_field_assignment_with_substr() {
1858 let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
1859
1860 let program = parser.parse_program();
1861
1862 assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
1863 }
1864
1865 #[test]
1866 fn parse_assignment_with_concatenation_and_substr() {
1867 let mut parser = Parser::new(Lexer::new(r#"{ s = s " " substr($1, 1, 3) }"#));
1868
1869 let program = parser.parse_program();
1870
1871 assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
1872 }
1873
1874 #[test]
1875 fn parse_field_divide_assignment() {
1876 let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
1877
1878 let program = parser.parse_program();
1879
1880 assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
1881 }
1882
1883 #[test]
1884 fn parse_chained_assignment() {
1885 let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
1886
1887 let program = parser.parse_program();
1888
1889 assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
1890 }
1891
1892 #[test]
1893 fn parse_if_statement_with_block() {
1894 let mut parser = Parser::new(Lexer::new(
1895 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1896 ));
1897
1898 let program = parser.parse_program();
1899
1900 assert_eq!(
1901 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1902 program.to_string()
1903 );
1904 }
1905
1906 #[test]
1907 fn parse_while_with_post_increment() {
1908 let mut parser = Parser::new(Lexer::new(
1909 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1910 ));
1911
1912 let program = parser.parse_program();
1913
1914 assert_eq!(
1915 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1916 program.to_string()
1917 );
1918 }
1919
1920 #[test]
1921 fn parse_while_with_single_body_statement() {
1922 let mut parser = Parser::new(Lexer::new(r#"{ while (n > 1) print n }"#));
1923
1924 let program = parser.parse_program();
1925
1926 assert_eq!(r#"{ while (n > 1) { print n } }"#, program.to_string());
1927 }
1928
1929 #[test]
1930 fn parse_do_while_with_post_increment() {
1931 let mut parser = Parser::new(Lexer::new(
1932 r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1933 ));
1934
1935 let program = parser.parse_program();
1936
1937 assert_eq!(
1938 r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1939 program.to_string()
1940 );
1941 }
1942
1943 #[test]
1944 fn parse_for_with_empty_body_statement() {
1945 let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; s += $(i++)) ; print s }"#));
1946
1947 let program = parser.parse_program();
1948
1949 assert_eq!(
1950 r#"{ for (i = 1; i <= NF; s += $i++) { }; print s }"#,
1951 program.to_string()
1952 );
1953 }
1954
1955 #[test]
1956 fn parse_post_decrement_statement() {
1957 let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
1958
1959 let program = parser.parse_program();
1960
1961 assert_eq!(r#"{ k--; n-- }"#, program.to_string());
1962 }
1963
1964 #[test]
1965 fn parse_rand_expression() {
1966 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
1967
1968 let program = parser.parse_program();
1969
1970 assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
1971 }
1972
1973 #[test]
1974 fn parse_math_builtin_expressions() {
1975 let mut parser = Parser::new(Lexer::new(
1976 r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1977 ));
1978
1979 let program = parser.parse_program();
1980
1981 assert_eq!(
1982 r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1983 program.to_string()
1984 );
1985 }
1986
1987 #[test]
1988 fn parse_index_builtin_expression() {
1989 let mut parser = Parser::new(Lexer::new(r#"{ print index(1, $1) }"#));
1990
1991 let program = parser.parse_program();
1992
1993 assert_eq!(r#"{ print index(1, $1) }"#, program.to_string());
1994 }
1995
1996 #[test]
1997 fn parse_match_builtin_expression() {
1998 let mut parser = Parser::new(Lexer::new(r#"{ print match($NF, $1), RSTART, RLENGTH }"#));
1999
2000 let program = parser.parse_program();
2001
2002 assert_eq!(
2003 r#"{ print match($NF, $1), RSTART, RLENGTH }"#,
2004 program.to_string()
2005 );
2006 }
2007
2008 #[test]
2009 fn parse_in_membership_expression() {
2010 let mut parser = Parser::new(Lexer::new(r#"{ print 1 in x }"#));
2011
2012 let program = parser.parse_program();
2013
2014 assert_eq!(r#"{ print 1 in x }"#, program.to_string());
2015 }
2016
2017 #[test]
2018 fn parse_parenthesized_composite_membership_expression() {
2019 let mut parser = Parser::new(Lexer::new(r#"{ if (($0, $1) in x) print "yes" }"#));
2020
2021 let program = parser.parse_program();
2022
2023 assert_eq!(r#"{ if ($0, $1 in x) { print "yes" } }"#, program.to_string());
2024 }
2025
2026 #[test]
2027 fn parse_for_loop_with_single_body_statement() {
2028 let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; i++) print $i }"#));
2029
2030 let program = parser.parse_program();
2031
2032 assert_eq!(
2033 r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
2034 program.to_string()
2035 );
2036 }
2037
2038 #[test]
2039 fn parse_if_with_single_statement_body() {
2040 let mut parser = Parser::new(Lexer::new(
2041 r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
2042 ));
2043
2044 let program = parser.parse_program();
2045
2046 assert_eq!(
2047 r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
2048 program.to_string()
2049 );
2050 }
2051
2052 #[test]
2053 fn parse_exit_statement() {
2054 let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
2055
2056 let program = parser.parse_program();
2057
2058 assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
2059 }
2060
2061 #[test]
2062 fn parse_exit_statement_with_status() {
2063 let mut parser = Parser::new(Lexer::new(r#"$1 < 5000 { exit NR }"#));
2064
2065 let program = parser.parse_program();
2066
2067 assert_eq!(r#"$1 < 5000 { exit NR }"#, program.to_string());
2068 }
2069
2070 #[test]
2071 fn parse_user_defined_function_call_statement() {
2072 let mut parser = Parser::new(Lexer::new(
2073 "BEGIN { myabort(1) }\nfunction myabort(n) { exit n }",
2074 ));
2075
2076 let program = parser.parse_program();
2077
2078 let definition = program
2079 .function_definition("myabort")
2080 .expect("expected function definition");
2081 assert_eq!(definition.parameters, vec!["n"]);
2082 assert_eq!(definition.statements.len(), 1);
2083 }
2084
2085 #[test]
2086 fn parse_delete_array_element_statement() {
2087 let mut parser = Parser::new(Lexer::new(r#"{ delete x[i, j] }"#));
2088
2089 let program = parser.parse_program();
2090
2091 assert_eq!(r#"{ delete x[i, j] }"#, program.to_string());
2092 }
2093
2094 #[test]
2095 fn parse_array_add_assignment_and_access() {
2096 let mut parser = Parser::new(Lexer::new(
2097 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2098 ));
2099
2100 let program = parser.parse_program();
2101
2102 assert_eq!(
2103 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2104 program.to_string()
2105 );
2106 }
2107
2108 #[test]
2109 fn parse_for_in_loop() {
2110 let mut parser = Parser::new(Lexer::new(
2111 r#"END { for (name in area) print name ":" area[name] }"#,
2112 ));
2113
2114 let program = parser.parse_program();
2115
2116 assert_eq!(
2117 r#"END { for (name in area) { print name ":" area[name] } }"#,
2118 program.to_string()
2119 );
2120 }
2121
2122 #[test]
2123 fn parse_print_redirection() {
2124 let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
2125
2126 let program = parser.parse_program();
2127
2128 assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
2129 }
2130
2131 #[test]
2132 fn parse_print_pipe() {
2133 let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
2134
2135 let program = parser.parse_program();
2136
2137 assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
2138 }
2139}