1use crate::{
2 Lexer, Program,
3 ast::{Action, Expression, FunctionDefinition, Rule, Statement},
4 token::{Token, TokenKind},
5};
6
7#[derive(Debug)]
8pub struct Parser<'a> {
9 lexer: Lexer<'a>,
10 current_token: Token<'a>,
11 function_definitions: Vec<FunctionDefinition<'a>>,
12}
13
14impl<'a> Parser<'a> {
15 pub fn new(mut lexer: Lexer<'a>) -> Self {
16 let current_token = lexer.next_token_regex_aware();
17 Parser {
18 lexer,
19 current_token,
20 function_definitions: Vec::new(),
21 }
22 }
23
24 fn next_token(&mut self) {
25 self.current_token = self.lexer.next_token();
26 }
27
28 fn next_token_in_regex_context(&mut self) {
29 self.current_token = self.lexer.next_token_regex_aware();
30 }
31
32 fn is_eof(&self) -> bool {
33 self.current_token.kind == TokenKind::Eof
34 }
35
36 fn is_statement_terminator(&self) -> bool {
37 matches!(
38 self.current_token.kind,
39 TokenKind::Semicolon | TokenKind::NewLine | TokenKind::RightCurlyBrace | TokenKind::Eof
40 )
41 }
42
43 fn token_is_immediately_after(&self, previous: &Token<'a>) -> bool {
44 self.current_token.span.start == previous.span.start + previous.literal.len()
45 }
46
47 fn parse_array_index_expression(&mut self) -> Expression<'a> {
48 let mut index = self.parse_expression();
49 while self.current_token.kind == TokenKind::Comma {
50 let operator = self.current_token.clone();
51 self.next_token_in_regex_context();
52 let right = self.parse_expression();
53 index = Expression::Infix {
54 left: Box::new(index),
55 operator,
56 right: Box::new(right),
57 };
58 }
59 index
60 }
61
62 fn parse_next_rule(&mut self) -> Option<Rule<'a>> {
63 match &self.current_token.kind {
64 TokenKind::Begin => {
65 self.next_token();
66 match self.parse_action() {
67 Rule::Action(action) => Some(Rule::Begin(action)),
68 _ => panic!("Expected action after BEGIN"),
69 }
70 }
71 TokenKind::NewLine => {
72 self.next_token_in_regex_context();
73 self.parse_next_rule()
74 }
75 TokenKind::Eof => None,
76 TokenKind::LeftCurlyBrace => Some(self.parse_action()),
77 TokenKind::Function => {
78 self.parse_function_definition();
79 None
80 }
81 TokenKind::End => {
82 self.next_token();
83 match self.parse_action() {
84 Rule::Action(action) => Some(Rule::End(action)),
85 _ => panic!("Expected action after END"),
86 }
87 }
88 TokenKind::Regex
89 | TokenKind::String
90 | TokenKind::Number
91 | TokenKind::DollarSign
92 | TokenKind::LeftParen
93 | TokenKind::Identifier
94 | TokenKind::Cos
95 | TokenKind::Exp
96 | TokenKind::Index
97 | TokenKind::Int
98 | TokenKind::Length
99 | TokenKind::Log
100 | TokenKind::Rand
101 | TokenKind::Sin
102 | TokenKind::Sprintf
103 | TokenKind::Split
104 | TokenKind::Sqrt
105 | TokenKind::Srand
106 | TokenKind::Substr
107 | TokenKind::Increment
108 | TokenKind::Decrement => self.parse_pattern_rule(),
109 _ => panic!(
110 "parse_next_rule not yet implemented, found token: {:?}",
111 self.current_token
112 ),
113 }
114 }
115
116 fn parse_pattern_rule(&mut self) -> Option<Rule<'a>> {
117 let mut pattern = self.parse_expression();
118 if self.current_token.kind == TokenKind::Comma {
119 let operator = self.current_token.clone();
120 self.next_token_in_regex_context();
121 let right = self.parse_expression();
122 pattern = Expression::Infix {
123 left: Box::new(pattern),
124 operator,
125 right: Box::new(right),
126 };
127 }
128 let pattern = Some(pattern);
129
130 if self.current_token.kind == TokenKind::LeftCurlyBrace {
131 match self.parse_action() {
132 Rule::Action(action) => Some(Rule::PatternAction {
133 pattern,
134 action: Some(action),
135 }),
136 _ => panic!("Expected action after pattern"),
137 }
138 } else {
139 Some(Rule::PatternAction {
140 pattern,
141 action: None,
142 })
143 }
144 }
145
146 fn parse_action(&mut self) -> Rule<'a> {
147 self.next_token(); let pattern = None;
150
151 let mut statements = Vec::new();
152 while self.current_token.kind != TokenKind::RightCurlyBrace
153 && self.current_token.kind != TokenKind::Eof
154 {
155 while self.current_token.kind == TokenKind::NewLine
156 || self.current_token.kind == TokenKind::Semicolon
157 {
158 self.next_token();
159 }
160
161 if self.current_token.kind == TokenKind::RightCurlyBrace
162 || self.current_token.kind == TokenKind::Eof
163 {
164 break;
165 }
166
167 statements.push(self.parse_statement());
168 }
169
170 if pattern.is_some() {
171 Rule::PatternAction {
172 pattern,
173 action: Some(Action { statements }),
174 }
175 } else {
176 Rule::Action(Action { statements })
177 }
178 }
179
180 fn parse_statement(&mut self) -> Statement<'a> {
181 match self.current_token.kind {
182 TokenKind::Print => self.parse_print_function(),
183 TokenKind::Printf => self.parse_printf_function(),
184 TokenKind::System => self.parse_system_function(),
185 TokenKind::Split => self.parse_split_statement(),
186 TokenKind::Sub => self.parse_sub_function(),
187 TokenKind::Gsub => self.parse_gsub_function(),
188 TokenKind::Break => self.parse_break_statement(),
189 TokenKind::Continue => self.parse_continue_statement(),
190 TokenKind::Delete => self.parse_delete_statement(),
191 TokenKind::If => self.parse_if_statement(),
192 TokenKind::Do => self.parse_do_statement(),
193 TokenKind::While => self.parse_while_statement(),
194 TokenKind::For => self.parse_for_statement(),
195 TokenKind::Return => self.parse_return_statement(),
196 TokenKind::Next => self.parse_next_statement(),
197 TokenKind::Exit => self.parse_exit_statement(),
198 TokenKind::Identifier => self.parse_assignment_statement(),
199 TokenKind::DollarSign => self.parse_field_assignment_statement(),
200 TokenKind::Increment => self.parse_pre_increment_statement(),
201 TokenKind::Decrement => self.parse_pre_decrement_statement(),
202 _ => todo!(),
203 }
204 }
205
206 fn parse_function_definition(&mut self) {
207 self.next_token();
208 if self.current_token.kind != TokenKind::Identifier {
209 todo!()
210 }
211 let name = self.current_token.literal;
212 self.next_token();
213 if self.current_token.kind != TokenKind::LeftParen {
214 todo!()
215 }
216 self.next_token();
217
218 let mut parameters = Vec::new();
219 while self.current_token.kind != TokenKind::RightParen {
220 if self.current_token.kind != TokenKind::Identifier {
221 todo!()
222 }
223 parameters.push(self.current_token.literal);
224 self.next_token();
225 if self.current_token.kind == TokenKind::Comma {
226 self.next_token();
227 } else if self.current_token.kind != TokenKind::RightParen {
228 todo!()
229 }
230 }
231
232 self.next_token();
233 while self.current_token.kind == TokenKind::NewLine {
234 self.next_token();
235 }
236 if self.current_token.kind != TokenKind::LeftCurlyBrace {
237 todo!()
238 }
239
240 let mut statements = Vec::new();
241 self.next_token(); while self.current_token.kind != TokenKind::RightCurlyBrace
243 && self.current_token.kind != TokenKind::Eof
244 {
245 while self.current_token.kind == TokenKind::NewLine
246 || self.current_token.kind == TokenKind::Semicolon
247 {
248 self.next_token();
249 }
250
251 if self.current_token.kind == TokenKind::RightCurlyBrace
252 || self.current_token.kind == TokenKind::Eof
253 {
254 break;
255 }
256
257 statements.push(self.parse_statement());
258 }
259 self.function_definitions.push(FunctionDefinition {
260 name,
261 parameters,
262 statements,
263 });
264 }
265
266 fn parse_assignment_statement(&mut self) -> Statement<'a> {
267 let identifier = self.current_token.clone();
268 self.next_token();
269 self.parse_assignment_statement_with_identifier(identifier)
270 }
271
272 fn parse_assignment_statement_with_identifier(&mut self, identifier: Token<'a>) -> Statement<'a> {
273 if self.current_token.kind == TokenKind::LeftParen
274 && self.token_is_immediately_after(&identifier)
275 {
276 let args = self.parse_call_arguments();
277 return Statement::Expression(Expression::FunctionCall {
278 name: identifier.literal,
279 args,
280 });
281 }
282 if self.current_token.kind == TokenKind::LeftSquareBracket {
283 self.next_token_in_regex_context();
284 let index = self.parse_array_index_expression();
285 if self.current_token.kind != TokenKind::RightSquareBracket {
286 todo!()
287 }
288 self.next_token();
289 if self.current_token.kind == TokenKind::Assign {
290 self.next_token();
291 let value = self.parse_expression();
292 return Statement::ArrayAssignment {
293 identifier: identifier.literal,
294 index,
295 value,
296 };
297 }
298 if self.current_token.kind == TokenKind::AddAssign {
299 self.next_token();
300 let value = self.parse_expression();
301 return Statement::ArrayAddAssignment {
302 identifier: identifier.literal,
303 index,
304 value,
305 };
306 }
307 if self.current_token.kind == TokenKind::Increment {
308 self.next_token();
309 return Statement::ArrayPostIncrement {
310 identifier: identifier.literal,
311 index,
312 };
313 }
314 if self.current_token.kind == TokenKind::Decrement {
315 self.next_token();
316 return Statement::ArrayPostDecrement {
317 identifier: identifier.literal,
318 index,
319 };
320 }
321 todo!()
322 }
323 if self.current_token.kind == TokenKind::Assign {
324 self.next_token();
325 if self.current_token.kind == TokenKind::Split {
326 return self.parse_split_assignment_statement(identifier.literal);
327 }
328 let value = self.parse_expression();
329 Statement::Assignment {
330 identifier: identifier.literal,
331 value,
332 }
333 } else if self.current_token.kind == TokenKind::Increment {
334 self.next_token();
335 Statement::PostIncrement {
336 identifier: identifier.literal,
337 }
338 } else if self.current_token.kind == TokenKind::Decrement {
339 self.next_token();
340 Statement::PostDecrement {
341 identifier: identifier.literal,
342 }
343 } else if self.current_token.kind == TokenKind::AddAssign {
344 self.next_token();
345 let value = self.parse_expression();
346 Statement::AddAssignment {
347 identifier: identifier.literal,
348 value,
349 }
350 } else if matches!(
351 self.current_token.kind,
352 TokenKind::SubtractAssign
353 | TokenKind::MultiplyAssign
354 | TokenKind::DivideAssign
355 | TokenKind::ModuloAssign
356 | TokenKind::PowerAssign
357 ) {
358 let assign_token = self.current_token.clone();
359 self.next_token();
360 let right_value = self.parse_expression();
361 Statement::Assignment {
362 identifier: identifier.literal,
363 value: Expression::Infix {
364 left: Box::new(Expression::Identifier(identifier.literal)),
365 operator: compound_assign_operator(&assign_token),
366 right: Box::new(right_value),
367 },
368 }
369 } else {
370 todo!()
371 }
372 }
373
374 fn parse_delete_statement(&mut self) -> Statement<'a> {
375 self.next_token();
376 if self.current_token.kind != TokenKind::Identifier {
377 todo!()
378 }
379 let identifier = self.current_token.literal;
380 self.next_token();
381 if self.current_token.kind != TokenKind::LeftSquareBracket {
382 return Statement::Delete {
383 identifier,
384 index: None,
385 };
386 }
387
388 self.next_token_in_regex_context();
389 let index = self.parse_array_index_expression();
390 if self.current_token.kind != TokenKind::RightSquareBracket {
391 todo!()
392 }
393 self.next_token();
394 Statement::Delete {
395 identifier,
396 index: Some(index),
397 }
398 }
399
400 fn parse_break_statement(&mut self) -> Statement<'a> {
401 self.next_token();
402 Statement::Break
403 }
404
405 fn parse_continue_statement(&mut self) -> Statement<'a> {
406 self.next_token();
407 Statement::Continue
408 }
409
410 fn parse_pre_increment_statement(&mut self) -> Statement<'a> {
411 self.next_token();
412 if self.current_token.kind != TokenKind::Identifier {
413 todo!()
414 }
415 let identifier = self.current_token.literal;
416 self.next_token();
417 Statement::PreIncrement { identifier }
418 }
419
420 fn parse_pre_decrement_statement(&mut self) -> Statement<'a> {
421 self.next_token();
422 if self.current_token.kind != TokenKind::Identifier {
423 todo!()
424 }
425 let identifier = self.current_token.literal;
426 self.next_token();
427 Statement::PreDecrement { identifier }
428 }
429
430 fn parse_split_assignment_statement(&mut self, identifier: &'a str) -> Statement<'a> {
431 self.next_token();
432 if self.current_token.kind != TokenKind::LeftParen {
433 todo!()
434 }
435 self.next_token_in_regex_context();
436 let string = self.parse_expression();
437 if self.current_token.kind != TokenKind::Comma {
438 todo!()
439 }
440 self.next_token();
441 if self.current_token.kind != TokenKind::Identifier {
442 todo!()
443 }
444 let array = self.current_token.literal;
445 self.next_token();
446 if self.current_token.kind != TokenKind::RightParen {
447 todo!()
448 }
449 self.next_token();
450 Statement::SplitAssignment {
451 identifier,
452 string,
453 array,
454 }
455 }
456
457 fn parse_split_statement(&mut self) -> Statement<'a> {
458 self.next_token();
459 if self.current_token.kind != TokenKind::LeftParen {
460 todo!()
461 }
462 self.next_token_in_regex_context();
463 let string = self.parse_expression();
464 if self.current_token.kind != TokenKind::Comma {
465 todo!()
466 }
467 self.next_token();
468 if self.current_token.kind != TokenKind::Identifier {
469 todo!()
470 }
471 let array = self.current_token.literal;
472 self.next_token();
473 if self.current_token.kind != TokenKind::RightParen {
474 todo!()
475 }
476 self.next_token();
477 Statement::Split { string, array }
478 }
479
480 fn parse_field_assignment_statement(&mut self) -> Statement<'a> {
481 self.next_token();
482 let field = self.parse_primary_expression();
483 let assign_token = self.current_token.clone();
484 self.next_token();
485 let right_value = self.parse_expression();
486
487 let value = if assign_token.kind == TokenKind::Assign {
488 right_value
489 } else {
490 let operator = compound_assign_operator(&assign_token);
491 Expression::Infix {
492 left: Box::new(Expression::Field(Box::new(field.clone()))),
493 operator,
494 right: Box::new(right_value),
495 }
496 };
497 Statement::FieldAssignment { field, value }
498 }
499
500 fn parse_if_statement(&mut self) -> Statement<'a> {
501 self.next_token();
502 if self.current_token.kind != TokenKind::LeftParen {
503 todo!()
504 }
505 self.next_token_in_regex_context();
506 let condition = self.parse_expression();
507 if self.current_token.kind != TokenKind::RightParen {
508 todo!()
509 }
510 self.next_token();
511 while self.current_token.kind == TokenKind::NewLine
512 || self.current_token.kind == TokenKind::Semicolon
513 {
514 self.next_token();
515 }
516 let then_statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
517 self.parse_statement_block()
518 } else {
519 vec![self.parse_statement()]
520 };
521
522 while self.current_token.kind == TokenKind::NewLine
523 || self.current_token.kind == TokenKind::Semicolon
524 {
525 self.next_token();
526 }
527
528 if self.current_token.kind == TokenKind::Else {
529 self.next_token();
530 while self.current_token.kind == TokenKind::NewLine
531 || self.current_token.kind == TokenKind::Semicolon
532 {
533 self.next_token();
534 }
535 let else_statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
536 self.parse_statement_block()
537 } else {
538 vec![self.parse_statement()]
539 };
540 return Statement::IfElse {
541 condition,
542 then_statements,
543 else_statements,
544 };
545 }
546
547 Statement::If {
548 condition,
549 then_statements,
550 }
551 }
552
553 fn parse_exit_statement(&mut self) -> Statement<'a> {
554 self.next_token();
555 let status = if self.is_statement_terminator() {
556 None
557 } else {
558 Some(self.parse_expression())
559 };
560 Statement::Exit(status)
561 }
562
563 fn parse_return_statement(&mut self) -> Statement<'a> {
564 self.next_token();
565 let value = if self.is_statement_terminator() {
566 None
567 } else {
568 Some(self.parse_expression())
569 };
570 Statement::Return(value)
571 }
572
573 fn parse_next_statement(&mut self) -> Statement<'a> {
574 self.next_token();
575 Statement::Next
576 }
577
578 fn parse_statement_block(&mut self) -> Vec<Statement<'a>> {
579 self.next_token(); let mut statements = Vec::new();
581 while self.current_token.kind != TokenKind::RightCurlyBrace
582 && self.current_token.kind != TokenKind::Eof
583 {
584 while self.current_token.kind == TokenKind::NewLine
585 || self.current_token.kind == TokenKind::Semicolon
586 {
587 self.next_token();
588 }
589
590 if self.current_token.kind == TokenKind::RightCurlyBrace
591 || self.current_token.kind == TokenKind::Eof
592 {
593 break;
594 }
595 statements.push(self.parse_statement());
596 }
597 if self.current_token.kind == TokenKind::RightCurlyBrace {
598 self.next_token();
599 }
600 statements
601 }
602
603 fn parse_while_statement(&mut self) -> Statement<'a> {
604 self.next_token();
605 if self.current_token.kind != TokenKind::LeftParen {
606 todo!()
607 }
608 self.next_token_in_regex_context();
609 let condition = self.parse_expression();
610 if self.current_token.kind != TokenKind::RightParen {
611 todo!()
612 }
613 self.next_token();
614 while self.current_token.kind == TokenKind::NewLine
615 || self.current_token.kind == TokenKind::Semicolon
616 {
617 self.next_token();
618 }
619
620 let statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
621 self.parse_statement_block()
622 } else {
623 vec![self.parse_statement()]
624 };
625 Statement::While {
626 condition,
627 statements,
628 }
629 }
630
631 fn parse_do_statement(&mut self) -> Statement<'a> {
632 self.next_token();
633 while self.current_token.kind == TokenKind::NewLine
634 || self.current_token.kind == TokenKind::Semicolon
635 {
636 self.next_token();
637 }
638
639 let statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
640 self.parse_statement_block()
641 } else {
642 vec![self.parse_statement()]
643 };
644
645 while self.current_token.kind == TokenKind::NewLine
646 || self.current_token.kind == TokenKind::Semicolon
647 {
648 self.next_token();
649 }
650
651 if self.current_token.kind != TokenKind::While {
652 todo!()
653 }
654 self.next_token();
655 if self.current_token.kind != TokenKind::LeftParen {
656 todo!()
657 }
658 self.next_token_in_regex_context();
659 let condition = self.parse_expression();
660 if self.current_token.kind != TokenKind::RightParen {
661 todo!()
662 }
663 self.next_token();
664 Statement::DoWhile {
665 condition,
666 statements,
667 }
668 }
669
670 fn parse_for_statement(&mut self) -> Statement<'a> {
671 self.next_token();
672 if self.current_token.kind != TokenKind::LeftParen {
673 todo!()
674 }
675 self.next_token();
676 while self.current_token.kind == TokenKind::NewLine {
677 self.next_token();
678 }
679
680 let init = if self.current_token.kind == TokenKind::Semicolon {
681 Statement::Empty
682 } else if self.current_token.kind == TokenKind::Identifier {
683 let variable = self.current_token.clone();
684 self.next_token();
685 if self.current_token.kind == TokenKind::In {
686 self.next_token();
687 if self.current_token.kind != TokenKind::Identifier {
688 todo!()
689 }
690 let array = self.current_token.literal;
691 self.next_token();
692 if self.current_token.kind != TokenKind::RightParen {
693 todo!()
694 }
695 self.next_token();
696 while self.current_token.kind == TokenKind::NewLine
697 || self.current_token.kind == TokenKind::Semicolon
698 {
699 self.next_token();
700 }
701 let statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
702 self.parse_statement_block()
703 } else {
704 vec![self.parse_statement()]
705 };
706 return Statement::ForIn {
707 variable: variable.literal,
708 array,
709 statements,
710 };
711 }
712 self.parse_assignment_statement_with_identifier(variable)
713 } else {
714 self.parse_statement()
715 };
716 while self.current_token.kind == TokenKind::NewLine {
717 self.next_token();
718 }
719 if self.current_token.kind != TokenKind::Semicolon {
720 todo!()
721 }
722 self.next_token_in_regex_context();
723 while self.current_token.kind == TokenKind::NewLine {
724 self.next_token_in_regex_context();
725 }
726
727 let condition = if self.current_token.kind == TokenKind::Semicolon {
728 Expression::Number(1.0)
729 } else {
730 self.parse_expression()
731 };
732 while self.current_token.kind == TokenKind::NewLine {
733 self.next_token();
734 }
735 if self.current_token.kind != TokenKind::Semicolon {
736 todo!()
737 }
738 self.next_token_in_regex_context();
739 while self.current_token.kind == TokenKind::NewLine {
740 self.next_token_in_regex_context();
741 }
742
743 let update = if self.current_token.kind == TokenKind::RightParen {
744 Statement::Empty
745 } else {
746 self.parse_statement()
747 };
748 while self.current_token.kind == TokenKind::NewLine {
749 self.next_token();
750 }
751 if self.current_token.kind != TokenKind::RightParen {
752 todo!()
753 }
754 self.next_token();
755
756 while self.current_token.kind == TokenKind::NewLine
757 || self.current_token.kind == TokenKind::Semicolon
758 {
759 self.next_token();
760 }
761
762 let statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
763 self.parse_statement_block()
764 } else {
765 vec![self.parse_statement()]
766 };
767
768 Statement::For {
769 init: Box::new(init),
770 condition,
771 update: Box::new(update),
772 statements,
773 }
774 }
775
776 fn parse_print_function(&mut self) -> Statement<'a> {
777 let mut expressions = Vec::new();
778 let mut expect_more = false;
779 self.next_token();
780
781 loop {
782 if self.current_token.kind == TokenKind::RightCurlyBrace
783 || self.current_token.kind == TokenKind::RightParen
784 || self.current_token.kind == TokenKind::Eof
785 || self.current_token.kind == TokenKind::GreaterThan
786 || self.current_token.kind == TokenKind::Append
787 || self.current_token.kind == TokenKind::Pipe
788 {
789 break;
790 }
791
792 if self.current_token.kind == TokenKind::NewLine
793 || self.current_token.kind == TokenKind::Semicolon
794 {
795 if expect_more {
796 self.next_token();
797 continue;
798 }
799 break;
800 }
801
802 if self.current_token.kind == TokenKind::Comma {
803 self.next_token();
804 expect_more = true;
805 continue;
806 }
807
808 let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
809 let expression = self.parse_expression();
810 expressions.push(expression);
811 if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
812 while self.current_token.kind == TokenKind::Comma {
813 self.next_token();
814 expressions.push(self.parse_expression());
815 }
816 if self.current_token.kind != TokenKind::RightParen {
817 todo!()
818 }
819 self.next_token();
820 }
821 expect_more = false;
822 }
823 if self.current_token.kind == TokenKind::RightParen {
824 self.next_token();
825 }
826
827 if self.current_token.kind == TokenKind::GreaterThan
828 || self.current_token.kind == TokenKind::Append
829 {
830 let append = self.current_token.kind == TokenKind::Append;
831 self.next_token();
832 let target = self.parse_expression();
833 return Statement::PrintRedirect {
834 expressions,
835 target,
836 append,
837 };
838 }
839 if self.current_token.kind == TokenKind::Pipe {
840 self.next_token();
841 let target = self.parse_expression();
842 return Statement::PrintPipe {
843 expressions,
844 target,
845 };
846 }
847
848 Statement::Print(expressions)
849 }
850
851 fn parse_printf_function(&mut self) -> Statement<'a> {
852 self.next_token();
853 let expressions = if self.current_token.kind == TokenKind::LeftParen {
854 self.next_token_in_regex_context();
855 let mut expressions = Vec::new();
856 while self.current_token.kind != TokenKind::RightParen
857 && self.current_token.kind != TokenKind::Eof
858 {
859 if self.current_token.kind == TokenKind::Comma {
860 self.next_token();
861 continue;
862 }
863 expressions.push(self.parse_expression());
864 }
865 if self.current_token.kind == TokenKind::RightParen {
866 self.next_token();
867 }
868 expressions
869 } else {
870 self.parse_expression_list_until_action_end_from_current()
871 };
872
873 Statement::Printf(expressions)
874 }
875
876 fn parse_gsub_function(&mut self) -> Statement<'a> {
877 self.next_token();
878 if self.current_token.kind != TokenKind::LeftParen {
879 todo!()
880 }
881
882 self.next_token_in_regex_context();
883 let pattern = self.parse_expression();
884
885 if self.current_token.kind != TokenKind::Comma {
886 todo!()
887 }
888 self.next_token();
889 let replacement = self.parse_expression();
890
891 let target = if self.current_token.kind == TokenKind::Comma {
892 self.next_token();
893 Some(self.parse_expression())
894 } else {
895 None
896 };
897
898 if self.current_token.kind != TokenKind::RightParen {
899 todo!()
900 }
901 self.next_token();
902
903 Statement::Gsub {
904 pattern,
905 replacement,
906 target,
907 }
908 }
909
910 fn parse_sub_function(&mut self) -> Statement<'a> {
911 self.next_token();
912 if self.current_token.kind != TokenKind::LeftParen {
913 todo!()
914 }
915
916 self.next_token_in_regex_context();
917 let pattern = self.parse_expression();
918
919 if self.current_token.kind != TokenKind::Comma {
920 todo!()
921 }
922 self.next_token();
923 let replacement = self.parse_expression();
924
925 if self.current_token.kind == TokenKind::Comma {
926 todo!()
927 }
928
929 if self.current_token.kind != TokenKind::RightParen {
930 todo!()
931 }
932 self.next_token();
933
934 Statement::Sub {
935 pattern,
936 replacement,
937 }
938 }
939
940 fn parse_system_function(&mut self) -> Statement<'a> {
941 self.next_token();
942 if self.current_token.kind != TokenKind::LeftParen {
943 todo!()
944 }
945 self.next_token();
946 let command = self.parse_expression();
947 if self.current_token.kind != TokenKind::RightParen {
948 todo!()
949 }
950 self.next_token();
951 Statement::System(command)
952 }
953
954 fn parse_expression_list_until_action_end_from_current(&mut self) -> Vec<Expression<'a>> {
955 let mut expressions = Vec::new();
956 let mut expect_more = false;
957
958 loop {
959 if self.current_token.kind == TokenKind::RightCurlyBrace
960 || self.current_token.kind == TokenKind::RightParen
961 || self.current_token.kind == TokenKind::Eof
962 {
963 break;
964 }
965
966 if self.current_token.kind == TokenKind::NewLine
967 || self.current_token.kind == TokenKind::Semicolon
968 {
969 if expect_more {
970 self.next_token();
971 continue;
972 }
973 break;
974 }
975
976 if self.current_token.kind == TokenKind::Comma {
977 self.next_token();
978 expect_more = true;
979 continue;
980 }
981
982 let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
983 let expression = self.parse_expression();
984 expressions.push(expression);
985 if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
986 while self.current_token.kind == TokenKind::Comma {
987 self.next_token();
988 expressions.push(self.parse_expression());
989 }
990 if self.current_token.kind != TokenKind::RightParen {
991 todo!()
992 }
993 self.next_token();
994 }
995 expect_more = false;
996 }
997
998 if self.current_token.kind == TokenKind::RightParen {
999 self.next_token();
1000 }
1001
1002 expressions
1003 }
1004
1005 fn parse_expression(&mut self) -> Expression<'a> {
1006 self.parse_expression_with_min_precedence(0)
1007 }
1008
1009 fn parse_expression_with_min_precedence(&mut self, min_precedence: u8) -> Expression<'a> {
1010 const CONCAT_LEFT_PRECEDENCE: u8 = 6;
1011 const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
1012 let mut left = self.parse_primary_expression();
1013
1014 loop {
1015 if self.current_token.kind == TokenKind::QuestionMark {
1016 if min_precedence > 0 {
1017 break;
1018 }
1019 self.next_token_in_regex_context();
1020 let then_expr = self.parse_expression_with_min_precedence(0);
1021 if self.current_token.kind != TokenKind::Colon {
1022 todo!()
1023 }
1024 self.next_token_in_regex_context();
1025 let else_expr = self.parse_expression_with_min_precedence(0);
1026 left = Expression::Ternary {
1027 condition: Box::new(left),
1028 then_expr: Box::new(then_expr),
1029 else_expr: Box::new(else_expr),
1030 };
1031 continue;
1032 }
1033
1034 if infix_operator_precedence(&self.current_token.kind).is_none()
1035 && is_expression_start(&self.current_token.kind)
1036 {
1037 if CONCAT_LEFT_PRECEDENCE < min_precedence {
1038 break;
1039 }
1040
1041 let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE);
1042 left = Expression::Concatenation {
1043 left: Box::new(left),
1044 right: Box::new(right),
1045 };
1046 continue;
1047 }
1048
1049 let (left_precedence, right_precedence) =
1050 match infix_operator_precedence(&self.current_token.kind) {
1051 Some(value) => value,
1052 None => break,
1053 };
1054
1055 if left_precedence < min_precedence {
1056 break;
1057 }
1058
1059 let operator = self.current_token.clone();
1060 if matches!(
1061 operator.kind,
1062 TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
1063 ) {
1064 self.next_token_in_regex_context();
1065 } else {
1066 self.next_token();
1067 }
1068 let right = self.parse_expression_with_min_precedence(right_precedence);
1069
1070 left = Expression::Infix {
1071 left: Box::new(left),
1072 operator,
1073 right: Box::new(right),
1074 };
1075 }
1076
1077 left
1078 }
1079
1080 fn parse_primary_expression(&mut self) -> Expression<'a> {
1081 if self.current_token.kind == TokenKind::Minus {
1082 let operator = self.current_token.clone();
1083 self.next_token();
1084 let right = self.parse_primary_expression();
1085 return Expression::Infix {
1086 left: Box::new(Expression::Number(0.0)),
1087 operator,
1088 right: Box::new(right),
1089 };
1090 }
1091 if self.current_token.kind == TokenKind::Plus {
1092 self.next_token();
1093 return self.parse_primary_expression();
1094 }
1095 if self.current_token.kind == TokenKind::ExclamationMark {
1096 self.next_token_in_regex_context();
1097 let expression = self.parse_primary_expression();
1098 return Expression::Not(Box::new(expression));
1099 }
1100 if self.current_token.kind == TokenKind::Increment {
1101 self.next_token();
1102 let expression = self.parse_primary_expression();
1103 return Expression::PreIncrement(Box::new(expression));
1104 }
1105 if self.current_token.kind == TokenKind::Decrement {
1106 self.next_token();
1107 let expression = self.parse_primary_expression();
1108 return Expression::PreDecrement(Box::new(expression));
1109 }
1110
1111 let mut expression = self.parse_primary_atom();
1112 if self.current_token.kind == TokenKind::Increment {
1113 self.next_token();
1114 expression = Expression::PostIncrement(Box::new(expression));
1115 } else if self.current_token.kind == TokenKind::Decrement {
1116 self.next_token();
1117 expression = Expression::PostDecrement(Box::new(expression));
1118 }
1119 expression
1120 }
1121
1122 fn parse_primary_atom(&mut self) -> Expression<'a> {
1123 match self.current_token.kind {
1124 TokenKind::String => {
1125 let expression = Expression::String(self.current_token.literal);
1126 self.next_token();
1127 expression
1128 }
1129 TokenKind::Regex => {
1130 let expression = Expression::Regex(self.current_token.literal);
1131 self.next_token();
1132 expression
1133 }
1134 TokenKind::Number => {
1135 let expression = if let Ok(value) = self.current_token.literal.parse::<f64>() {
1136 Expression::Number(value)
1137 } else {
1138 todo!()
1139 };
1140 self.next_token();
1141 expression
1142 }
1143 TokenKind::DollarSign => {
1144 self.next_token();
1145 let expression = self.parse_primary_atom();
1146 Expression::Field(Box::new(expression))
1147 }
1148 TokenKind::LeftParen => {
1149 self.next_token();
1150 let expression = self.parse_expression();
1151 if self.current_token.kind == TokenKind::RightParen {
1152 self.next_token();
1153 }
1154 expression
1155 }
1156 TokenKind::Identifier => {
1157 let identifier = self.current_token.clone();
1158 self.next_token();
1159 if self.current_token.kind == TokenKind::LeftParen
1160 && self.token_is_immediately_after(&identifier)
1161 {
1162 let args = self.parse_call_arguments();
1163 return Expression::FunctionCall {
1164 name: identifier.literal,
1165 args,
1166 };
1167 }
1168 if self.current_token.kind == TokenKind::LeftSquareBracket {
1169 self.next_token_in_regex_context();
1170 let index = self.parse_array_index_expression();
1171 if self.current_token.kind != TokenKind::RightSquareBracket {
1172 todo!()
1173 }
1174 self.next_token();
1175 Expression::ArrayAccess {
1176 identifier: identifier.literal,
1177 index: Box::new(index),
1178 }
1179 } else {
1180 Expression::Identifier(identifier.literal)
1181 }
1182 }
1183 TokenKind::Length => {
1184 self.next_token();
1185 if self.current_token.kind == TokenKind::LeftParen {
1186 self.next_token();
1187 if self.current_token.kind == TokenKind::RightParen {
1188 self.next_token();
1189 Expression::Length(None)
1190 } else {
1191 let expression = self.parse_expression();
1192 if self.current_token.kind != TokenKind::RightParen {
1193 todo!()
1194 }
1195 self.next_token();
1196 Expression::Length(Some(Box::new(expression)))
1197 }
1198 } else {
1199 Expression::Length(None)
1200 }
1201 }
1202 TokenKind::Substr => {
1203 self.next_token();
1204 if self.current_token.kind != TokenKind::LeftParen {
1205 todo!()
1206 }
1207 self.next_token();
1208 let string = self.parse_expression();
1209 if self.current_token.kind != TokenKind::Comma {
1210 todo!()
1211 }
1212 self.next_token();
1213 let start = self.parse_expression();
1214 let mut length = None;
1215 if self.current_token.kind == TokenKind::Comma {
1216 self.next_token();
1217 length = Some(Box::new(self.parse_expression()));
1218 }
1219 if self.current_token.kind != TokenKind::RightParen {
1220 todo!()
1221 }
1222 self.next_token();
1223 Expression::Substr {
1224 string: Box::new(string),
1225 start: Box::new(start),
1226 length,
1227 }
1228 }
1229 TokenKind::Rand => {
1230 self.next_token();
1231 if self.current_token.kind == TokenKind::LeftParen {
1232 self.next_token();
1233 if self.current_token.kind != TokenKind::RightParen {
1234 todo!()
1235 }
1236 self.next_token();
1237 }
1238 Expression::Rand
1239 }
1240 TokenKind::Cos
1241 | TokenKind::Exp
1242 | TokenKind::Index
1243 | TokenKind::Int
1244 | TokenKind::Log
1245 | TokenKind::Sin
1246 | TokenKind::Sprintf
1247 | TokenKind::Split
1248 | TokenKind::Sqrt
1249 | TokenKind::Srand => {
1250 let name = self.current_token.literal;
1251 self.next_token();
1252 if self.current_token.kind == TokenKind::LeftParen {
1253 let args = self.parse_call_arguments();
1254 return Expression::FunctionCall { name, args };
1255 }
1256 Expression::Number(0.0)
1257 }
1258 _ => {
1259 panic!(
1260 "parse_primary_expression not yet implemented, found token: {:?}",
1261 self.current_token
1262 )
1263 }
1264 }
1265 }
1266
1267 pub fn parse_program(&mut self) -> Program<'_> {
1268 let mut program = Program::new();
1269
1270 while !self.is_eof() {
1271 match self.parse_next_rule() {
1272 Some(Rule::Begin(action)) => program.add_begin_block(action),
1273 Some(Rule::End(action)) => program.add_end_block(action),
1274 Some(rule) => program.add_rule(rule),
1275 None => {}
1276 }
1277 self.next_token_in_regex_context();
1278 }
1279
1280 for definition in self.function_definitions.drain(..) {
1281 program.add_function_definition(definition);
1282 }
1283
1284 program
1285 }
1286
1287 fn parse_call_arguments(&mut self) -> Vec<Expression<'a>> {
1288 if self.current_token.kind != TokenKind::LeftParen {
1289 return vec![];
1290 }
1291 self.next_token_in_regex_context();
1292 let mut args = Vec::new();
1293 while self.current_token.kind != TokenKind::RightParen
1294 && self.current_token.kind != TokenKind::Eof
1295 {
1296 if self.current_token.kind == TokenKind::Comma {
1297 self.next_token();
1298 continue;
1299 }
1300 args.push(self.parse_expression());
1301 }
1302 if self.current_token.kind == TokenKind::RightParen {
1303 self.next_token();
1304 }
1305 args
1306 }
1307}
1308
1309fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
1310 match kind {
1311 TokenKind::Assign
1312 | TokenKind::AddAssign
1313 | TokenKind::SubtractAssign
1314 | TokenKind::MultiplyAssign
1315 | TokenKind::DivideAssign
1316 | TokenKind::ModuloAssign
1317 | TokenKind::PowerAssign => Some((0, 0)),
1318 TokenKind::Or => Some((1, 2)),
1319 TokenKind::And => Some((3, 4)),
1320 TokenKind::Equal
1321 | TokenKind::NotEqual
1322 | TokenKind::GreaterThan
1323 | TokenKind::GreaterThanOrEqual
1324 | TokenKind::In
1325 | TokenKind::LessThan
1326 | TokenKind::LessThanOrEqual
1327 | TokenKind::Tilde
1328 | TokenKind::NoMatch => Some((5, 6)),
1329 TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
1330 TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
1331 TokenKind::Caret => Some((13, 12)),
1332 _ => None,
1333 }
1334}
1335
1336fn is_expression_start(kind: &TokenKind) -> bool {
1337 matches!(
1338 kind,
1339 TokenKind::String
1340 | TokenKind::Regex
1341 | TokenKind::Number
1342 | TokenKind::DollarSign
1343 | TokenKind::LeftParen
1344 | TokenKind::Identifier
1345 | TokenKind::Cos
1346 | TokenKind::Exp
1347 | TokenKind::Index
1348 | TokenKind::Int
1349 | TokenKind::Length
1350 | TokenKind::Log
1351 | TokenKind::Rand
1352 | TokenKind::Sin
1353 | TokenKind::Sprintf
1354 | TokenKind::Split
1355 | TokenKind::Sqrt
1356 | TokenKind::Srand
1357 | TokenKind::Substr
1358 | TokenKind::Increment
1359 | TokenKind::Decrement
1360 )
1361}
1362
1363fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
1364 let (kind, literal) = match token.kind {
1365 TokenKind::AddAssign => (TokenKind::Plus, "+"),
1366 TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
1367 TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
1368 TokenKind::DivideAssign => (TokenKind::Division, "/"),
1369 TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
1370 TokenKind::PowerAssign => (TokenKind::Caret, "^"),
1371 _ => todo!(),
1372 };
1373
1374 Token::new(kind, literal, token.span.start)
1375}
1376
1377#[cfg(test)]
1378mod tests {
1379 use super::*;
1380
1381 #[test]
1382 fn create_parser() {
1383 let mut parser = Parser::new(Lexer::new("42 == 42"));
1384
1385 assert_eq!(parser.current_token.literal, "42");
1386 parser.next_token();
1387 assert_eq!(parser.current_token.literal, "==");
1388 }
1389
1390 #[test]
1391 fn parse_empty_program() {
1392 let mut parser = Parser::new(Lexer::new(""));
1393
1394 let program = parser.parse_program();
1395
1396 assert_eq!(program.len(), 0);
1397 }
1398
1399 #[test]
1400 fn parse_action_without_pattern() {
1401 let mut parser = Parser::new(Lexer::new("{ print }"));
1402
1403 let program = parser.parse_program();
1404
1405 assert_eq!(program.len(), 1);
1406 assert_eq!("{ print }", program.to_string());
1407 }
1408
1409 #[test]
1410 fn parse_action_with_leading_newlines() {
1411 let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
1412
1413 let program = parser.parse_program();
1414
1415 assert_eq!(program.len(), 1);
1416 assert_eq!("{ print }", program.to_string());
1417 }
1418
1419 #[test]
1420 fn parse_begin_block() {
1421 let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
1422
1423 let program = parser.parse_program();
1424
1425 assert_eq!(program.len(), 1);
1426 assert_eq!("BEGIN { print }", program.to_string());
1427 }
1428
1429 #[test]
1430 fn parse_end_block() {
1431 let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1432
1433 let program = parser.parse_program();
1434
1435 assert_eq!(program.len(), 1);
1436 assert_eq!("END { print 42 }", program.to_string());
1437 }
1438
1439 #[test]
1440 fn parse_regex_pattern_action() {
1441 let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1442
1443 let program = parser.parse_program();
1444
1445 assert_eq!(program.len(), 1);
1446 assert_eq!("/foo/ { print }", program.to_string());
1447 }
1448
1449 #[test]
1450 fn parse_print_infix_expression() {
1451 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1452
1453 let program = parser.parse_program();
1454 let mut begin_blocks = program.begin_blocks_iter();
1455 let Action { statements } = begin_blocks.next().expect("expected begin block");
1456
1457 let exprs = match &statements[0] {
1458 Statement::Print(expressions) => expressions,
1459 _ => panic!("expected print statement"),
1460 };
1461
1462 match &exprs[0] {
1463 Expression::Infix {
1464 left,
1465 operator,
1466 right,
1467 } => {
1468 assert!(matches!(**left, Expression::Number(1.0)));
1469 assert_eq!(operator.kind, TokenKind::Plus);
1470 assert!(matches!(**right, Expression::Number(2.0)));
1471 }
1472 _ => panic!("expected infix expression"),
1473 }
1474 }
1475
1476 #[test]
1477 fn parse_print_parenthesized_expression() {
1478 let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1479
1480 let program = parser.parse_program();
1481 let mut begin_blocks = program.begin_blocks_iter();
1482 let Action { statements } = begin_blocks.next().expect("expected begin block");
1483
1484 let exprs = match &statements[0] {
1485 Statement::Print(expressions) => expressions,
1486 _ => panic!("expected print statement"),
1487 };
1488
1489 match &exprs[0] {
1490 Expression::Infix {
1491 left,
1492 operator,
1493 right,
1494 } => {
1495 assert_eq!(operator.kind, TokenKind::Asterisk);
1496 assert!(matches!(**right, Expression::Number(3.0)));
1497 assert!(matches!(**left, Expression::Infix { .. }));
1498 }
1499 _ => panic!("expected infix expression"),
1500 }
1501 }
1502
1503 #[test]
1504 fn parse_print_multiplication_has_higher_precedence_than_addition() {
1505 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1506
1507 let program = parser.parse_program();
1508 let mut begin_blocks = program.begin_blocks_iter();
1509 let Action { statements } = begin_blocks.next().expect("expected begin block");
1510
1511 let exprs = match &statements[0] {
1512 Statement::Print(expressions) => expressions,
1513 _ => panic!("expected print statement"),
1514 };
1515
1516 match &exprs[0] {
1517 Expression::Infix {
1518 left,
1519 operator,
1520 right,
1521 } => {
1522 assert_eq!(operator.kind, TokenKind::Plus);
1523 assert!(matches!(**left, Expression::Number(1.0)));
1524 match &**right {
1525 Expression::Infix {
1526 operator: right_op, ..
1527 } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1528 _ => panic!("expected nested infix expression"),
1529 }
1530 }
1531 _ => panic!("expected infix expression"),
1532 }
1533 }
1534
1535 #[test]
1536 fn parse_print_power_is_right_associative() {
1537 let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1538
1539 let program = parser.parse_program();
1540 let mut begin_blocks = program.begin_blocks_iter();
1541 let Action { statements } = begin_blocks.next().expect("expected begin block");
1542
1543 let exprs = match &statements[0] {
1544 Statement::Print(expressions) => expressions,
1545 _ => panic!("expected print statement"),
1546 };
1547
1548 match &exprs[0] {
1549 Expression::Infix {
1550 left,
1551 operator,
1552 right,
1553 } => {
1554 assert_eq!(operator.kind, TokenKind::Caret);
1555 assert!(matches!(**left, Expression::Number(2.0)));
1556 match &**right {
1557 Expression::Infix {
1558 operator: right_op, ..
1559 } => assert_eq!(right_op.kind, TokenKind::Caret),
1560 _ => panic!("expected nested infix expression"),
1561 }
1562 }
1563 _ => panic!("expected infix expression"),
1564 }
1565 }
1566
1567 #[test]
1568 fn parse_print_minus_is_left_associative() {
1569 let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1570
1571 let program = parser.parse_program();
1572 let mut begin_blocks = program.begin_blocks_iter();
1573 let Action { statements } = begin_blocks.next().expect("expected begin block");
1574
1575 let exprs = match &statements[0] {
1576 Statement::Print(expressions) => expressions,
1577 _ => panic!("expected print statement"),
1578 };
1579
1580 match &exprs[0] {
1581 Expression::Infix {
1582 left,
1583 operator,
1584 right,
1585 } => {
1586 assert_eq!(operator.kind, TokenKind::Minus);
1587 match &**left {
1588 Expression::Infix {
1589 operator: left_op, ..
1590 } => assert_eq!(left_op.kind, TokenKind::Minus),
1591 _ => panic!("expected nested infix expression"),
1592 }
1593 assert!(matches!(**right, Expression::Number(1.0)));
1594 }
1595 _ => panic!("expected infix expression"),
1596 }
1597 }
1598
1599 #[test]
1600 fn parse_print_concatenation() {
1601 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
1602
1603 let program = parser.parse_program();
1604 let mut begin_blocks = program.begin_blocks_iter();
1605 let Action { statements } = begin_blocks.next().expect("expected begin block");
1606
1607 let exprs = match &statements[0] {
1608 Statement::Print(expressions) => expressions,
1609 _ => panic!("expected print statement"),
1610 };
1611
1612 assert_eq!(exprs.len(), 1);
1613 match &exprs[0] {
1614 Expression::Concatenation { left, right } => {
1615 assert!(matches!(**left, Expression::String("Value:")));
1616 assert!(matches!(**right, Expression::Number(42.0)));
1617 }
1618 _ => panic!("expected concatenation expression"),
1619 }
1620 }
1621
1622 #[test]
1623 fn parse_continue_statement() {
1624 let mut parser = Parser::new(Lexer::new(r#"{ continue }"#));
1625
1626 let program = parser.parse_program();
1627 let mut rules = program.rules_iter();
1628 let rule = rules.next().expect("expected rule");
1629
1630 let statements = match rule {
1631 Rule::Action(Action { statements }) => statements,
1632 _ => panic!("expected action rule"),
1633 };
1634
1635 assert!(matches!(statements[0], Statement::Continue));
1636 }
1637
1638 #[test]
1639 fn parse_identifier_followed_by_spaced_parentheses_as_concatenation() {
1640 let mut parser = Parser::new(Lexer::new(r#"{ x = $1; print x (++i) }"#));
1641
1642 let program = parser.parse_program();
1643 let mut rules = program.rules_iter();
1644 let rule = rules.next().expect("expected rule");
1645
1646 let statements = match rule {
1647 Rule::Action(Action { statements }) => statements,
1648 _ => panic!("expected action rule"),
1649 };
1650
1651 let exprs = match &statements[1] {
1652 Statement::Print(expressions) => expressions,
1653 _ => panic!("expected print statement"),
1654 };
1655
1656 assert_eq!(exprs.len(), 1);
1657 match &exprs[0] {
1658 Expression::Concatenation { left, right } => {
1659 assert!(matches!(**left, Expression::Identifier("x")));
1660 assert!(matches!(**right, Expression::PreIncrement(_)));
1661 }
1662 _ => panic!("expected concatenation expression"),
1663 }
1664 }
1665
1666 #[test]
1667 fn parse_print_field_expression() {
1668 let mut parser = Parser::new(Lexer::new("{ print $1 }"));
1669
1670 let program = parser.parse_program();
1671 let mut rules = program.rules_iter();
1672 let rule = rules.next().expect("expected rule");
1673
1674 let statements = match rule {
1675 Rule::Action(Action { statements }) => statements,
1676 _ => panic!("expected action rule"),
1677 };
1678
1679 let exprs = match &statements[0] {
1680 Statement::Print(expressions) => expressions,
1681 _ => panic!("expected print statement"),
1682 };
1683
1684 match &exprs[0] {
1685 Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
1686 _ => panic!("expected field expression"),
1687 }
1688 }
1689
1690 #[test]
1691 fn parse_print_with_commas() {
1692 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
1693
1694 let program = parser.parse_program();
1695
1696 assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
1697 }
1698
1699 #[test]
1700 fn parse_number_of_fields_identifier() {
1701 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
1702
1703 let program = parser.parse_program();
1704
1705 assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
1706 }
1707
1708 #[test]
1709 fn parse_printf_with_format_and_arguments() {
1710 let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
1711
1712 let program = parser.parse_program();
1713
1714 assert_eq!(
1715 r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
1716 program.to_string()
1717 );
1718 }
1719
1720 #[test]
1721 fn parse_add_assignment_and_pre_increment() {
1722 let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
1723
1724 let program = parser.parse_program();
1725
1726 assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
1727 }
1728
1729 #[test]
1730 fn parse_regex_match_pattern_action() {
1731 let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
1732
1733 let program = parser.parse_program();
1734
1735 assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
1736 }
1737
1738 #[test]
1739 fn parse_print_with_line_continuation_after_comma() {
1740 let mut parser = Parser::new(Lexer::new(
1741 "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
1742 ));
1743
1744 let program = parser.parse_program();
1745
1746 assert_eq!(
1747 "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
1748 program.to_string()
1749 );
1750 }
1751
1752 #[test]
1753 fn parse_gsub_statement() {
1754 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
1755
1756 let program = parser.parse_program();
1757
1758 assert_eq!(
1759 r#"{ gsub(/USA/, "United States"); print }"#,
1760 program.to_string()
1761 );
1762 }
1763
1764 #[test]
1765 fn parse_gsub_statement_with_target() {
1766 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/[ \t]+/, "", t) }"#));
1767
1768 let program = parser.parse_program();
1769
1770 assert_eq!(r#"{ gsub(/[ \t]+/, "", t) }"#, program.to_string());
1771 }
1772
1773 #[test]
1774 fn parse_system_statement() {
1775 let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
1776
1777 let program = parser.parse_program();
1778
1779 assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
1780 }
1781
1782 #[test]
1783 fn parse_print_length_builtin_expression() {
1784 let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
1785
1786 let program = parser.parse_program();
1787
1788 assert_eq!(r#"{ print length, $0 }"#, program.to_string());
1789 }
1790
1791 #[test]
1792 fn parse_length_expression_as_rule_pattern() {
1793 let mut parser = Parser::new(Lexer::new(
1794 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1795 ));
1796
1797 let program = parser.parse_program();
1798
1799 assert_eq!(
1800 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1801 program.to_string()
1802 );
1803 }
1804
1805 #[test]
1806 fn parse_field_assignment_with_substr() {
1807 let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
1808
1809 let program = parser.parse_program();
1810
1811 assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
1812 }
1813
1814 #[test]
1815 fn parse_assignment_with_concatenation_and_substr() {
1816 let mut parser = Parser::new(Lexer::new(r#"{ s = s " " substr($1, 1, 3) }"#));
1817
1818 let program = parser.parse_program();
1819
1820 assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
1821 }
1822
1823 #[test]
1824 fn parse_field_divide_assignment() {
1825 let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
1826
1827 let program = parser.parse_program();
1828
1829 assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
1830 }
1831
1832 #[test]
1833 fn parse_chained_assignment() {
1834 let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
1835
1836 let program = parser.parse_program();
1837
1838 assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
1839 }
1840
1841 #[test]
1842 fn parse_if_statement_with_block() {
1843 let mut parser = Parser::new(Lexer::new(
1844 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1845 ));
1846
1847 let program = parser.parse_program();
1848
1849 assert_eq!(
1850 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1851 program.to_string()
1852 );
1853 }
1854
1855 #[test]
1856 fn parse_while_with_post_increment() {
1857 let mut parser = Parser::new(Lexer::new(
1858 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1859 ));
1860
1861 let program = parser.parse_program();
1862
1863 assert_eq!(
1864 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1865 program.to_string()
1866 );
1867 }
1868
1869 #[test]
1870 fn parse_while_with_single_body_statement() {
1871 let mut parser = Parser::new(Lexer::new(r#"{ while (n > 1) print n }"#));
1872
1873 let program = parser.parse_program();
1874
1875 assert_eq!(r#"{ while (n > 1) { print n } }"#, program.to_string());
1876 }
1877
1878 #[test]
1879 fn parse_do_while_with_post_increment() {
1880 let mut parser = Parser::new(Lexer::new(
1881 r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1882 ));
1883
1884 let program = parser.parse_program();
1885
1886 assert_eq!(
1887 r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1888 program.to_string()
1889 );
1890 }
1891
1892 #[test]
1893 fn parse_post_decrement_statement() {
1894 let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
1895
1896 let program = parser.parse_program();
1897
1898 assert_eq!(r#"{ k--; n-- }"#, program.to_string());
1899 }
1900
1901 #[test]
1902 fn parse_rand_expression() {
1903 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
1904
1905 let program = parser.parse_program();
1906
1907 assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
1908 }
1909
1910 #[test]
1911 fn parse_math_builtin_expressions() {
1912 let mut parser = Parser::new(Lexer::new(
1913 r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1914 ));
1915
1916 let program = parser.parse_program();
1917
1918 assert_eq!(
1919 r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1920 program.to_string()
1921 );
1922 }
1923
1924 #[test]
1925 fn parse_index_builtin_expression() {
1926 let mut parser = Parser::new(Lexer::new(r#"{ print index(1, $1) }"#));
1927
1928 let program = parser.parse_program();
1929
1930 assert_eq!(r#"{ print index(1, $1) }"#, program.to_string());
1931 }
1932
1933 #[test]
1934 fn parse_in_membership_expression() {
1935 let mut parser = Parser::new(Lexer::new(r#"{ print 1 in x }"#));
1936
1937 let program = parser.parse_program();
1938
1939 assert_eq!(r#"{ print 1 in x }"#, program.to_string());
1940 }
1941
1942 #[test]
1943 fn parse_for_loop_with_single_body_statement() {
1944 let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; i++) print $i }"#));
1945
1946 let program = parser.parse_program();
1947
1948 assert_eq!(
1949 r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
1950 program.to_string()
1951 );
1952 }
1953
1954 #[test]
1955 fn parse_if_with_single_statement_body() {
1956 let mut parser = Parser::new(Lexer::new(
1957 r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
1958 ));
1959
1960 let program = parser.parse_program();
1961
1962 assert_eq!(
1963 r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
1964 program.to_string()
1965 );
1966 }
1967
1968 #[test]
1969 fn parse_exit_statement() {
1970 let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
1971
1972 let program = parser.parse_program();
1973
1974 assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
1975 }
1976
1977 #[test]
1978 fn parse_exit_statement_with_status() {
1979 let mut parser = Parser::new(Lexer::new(r#"$1 < 5000 { exit NR }"#));
1980
1981 let program = parser.parse_program();
1982
1983 assert_eq!(r#"$1 < 5000 { exit NR }"#, program.to_string());
1984 }
1985
1986 #[test]
1987 fn parse_user_defined_function_call_statement() {
1988 let mut parser = Parser::new(Lexer::new(
1989 "BEGIN { myabort(1) }\nfunction myabort(n) { exit n }",
1990 ));
1991
1992 let program = parser.parse_program();
1993
1994 let definition = program
1995 .function_definition("myabort")
1996 .expect("expected function definition");
1997 assert_eq!(definition.parameters, vec!["n"]);
1998 assert_eq!(definition.statements.len(), 1);
1999 }
2000
2001 #[test]
2002 fn parse_delete_array_element_statement() {
2003 let mut parser = Parser::new(Lexer::new(r#"{ delete x[i, j] }"#));
2004
2005 let program = parser.parse_program();
2006
2007 assert_eq!(r#"{ delete x[i, j] }"#, program.to_string());
2008 }
2009
2010 #[test]
2011 fn parse_array_add_assignment_and_access() {
2012 let mut parser = Parser::new(Lexer::new(
2013 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2014 ));
2015
2016 let program = parser.parse_program();
2017
2018 assert_eq!(
2019 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2020 program.to_string()
2021 );
2022 }
2023
2024 #[test]
2025 fn parse_for_in_loop() {
2026 let mut parser = Parser::new(Lexer::new(
2027 r#"END { for (name in area) print name ":" area[name] }"#,
2028 ));
2029
2030 let program = parser.parse_program();
2031
2032 assert_eq!(
2033 r#"END { for (name in area) { print name ":" area[name] } }"#,
2034 program.to_string()
2035 );
2036 }
2037
2038 #[test]
2039 fn parse_print_redirection() {
2040 let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
2041
2042 let program = parser.parse_program();
2043
2044 assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
2045 }
2046
2047 #[test]
2048 fn parse_print_pipe() {
2049 let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
2050
2051 let program = parser.parse_program();
2052
2053 assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
2054 }
2055}