1use crate::{
2 Lexer, Program,
3 ast::{Action, Expression, FunctionDefinition, Rule, Statement},
4 token::{Token, TokenKind},
5};
6
7#[derive(Debug)]
8pub struct Parser<'a> {
9 lexer: Lexer<'a>,
10 current_token: Token<'a>,
11 function_definitions: Vec<FunctionDefinition<'a>>,
12}
13
14impl<'a> Parser<'a> {
15 pub fn new(mut lexer: Lexer<'a>) -> Self {
16 let current_token = lexer.next_token_regex_aware();
17 Parser {
18 lexer,
19 current_token,
20 function_definitions: Vec::new(),
21 }
22 }
23
24 fn next_token(&mut self) {
25 self.current_token = self.lexer.next_token();
26 }
27
28 fn next_token_in_regex_context(&mut self) {
29 self.current_token = self.lexer.next_token_regex_aware();
30 }
31
32 fn is_eof(&self) -> bool {
33 self.current_token.kind == TokenKind::Eof
34 }
35
36 fn is_statement_terminator(&self) -> bool {
37 matches!(
38 self.current_token.kind,
39 TokenKind::Semicolon | TokenKind::NewLine | TokenKind::RightCurlyBrace | TokenKind::Eof
40 )
41 }
42
43 fn token_is_immediately_after(&self, previous: &Token<'a>) -> bool {
44 self.current_token.span.start == previous.span.start + previous.literal.len()
45 }
46
47 fn parse_number_expression(&self) -> Option<Expression<'a>> {
48 let literal = self.current_token.literal;
49 if let Some(hex_digits) = literal
50 .strip_prefix("0x")
51 .or_else(|| literal.strip_prefix("0X"))
52 {
53 let value = u64::from_str_radix(hex_digits, 16).ok()? as f64;
54 return Some(Expression::HexNumber { literal, value });
55 }
56
57 literal.parse::<f64>().ok().map(Expression::Number)
58 }
59
60 fn parse_array_index_expression(&mut self) -> Expression<'a> {
61 let mut index = self.parse_expression();
62 while self.current_token.kind == TokenKind::Comma {
63 let operator = self.current_token.clone();
64 self.next_token_in_regex_context();
65 let right = self.parse_expression();
66 index = Expression::Infix {
67 left: Box::new(index),
68 operator,
69 right: Box::new(right),
70 };
71 }
72 index
73 }
74
75 fn parse_next_rule(&mut self) -> Option<Rule<'a>> {
76 match &self.current_token.kind {
77 TokenKind::Begin => {
78 self.next_token();
79 match self.parse_action() {
80 Rule::Action(action) => Some(Rule::Begin(action)),
81 _ => panic!("Expected action after BEGIN"),
82 }
83 }
84 TokenKind::NewLine => {
85 self.next_token_in_regex_context();
86 self.parse_next_rule()
87 }
88 TokenKind::Eof => None,
89 TokenKind::LeftCurlyBrace => Some(self.parse_action()),
90 TokenKind::Function => {
91 self.parse_function_definition();
92 None
93 }
94 TokenKind::End => {
95 self.next_token();
96 match self.parse_action() {
97 Rule::Action(action) => Some(Rule::End(action)),
98 _ => panic!("Expected action after END"),
99 }
100 }
101 TokenKind::Regex
102 | TokenKind::String
103 | TokenKind::Number
104 | TokenKind::DollarSign
105 | TokenKind::LeftParen
106 | TokenKind::Identifier
107 | TokenKind::Cos
108 | TokenKind::Exp
109 | TokenKind::Index
110 | TokenKind::Int
111 | TokenKind::Length
112 | TokenKind::Log
113 | TokenKind::Match
114 | TokenKind::Rand
115 | TokenKind::Sin
116 | TokenKind::Sprintf
117 | TokenKind::Split
118 | TokenKind::Sqrt
119 | TokenKind::Srand
120 | TokenKind::Substr
121 | TokenKind::ExclamationMark
122 | TokenKind::Increment
123 | TokenKind::Decrement => self.parse_pattern_rule(),
124 _ => panic!(
125 "parse_next_rule not yet implemented, found token: {:?}",
126 self.current_token
127 ),
128 }
129 }
130
131 fn parse_pattern_rule(&mut self) -> Option<Rule<'a>> {
132 let mut pattern = self.parse_expression();
133 if self.current_token.kind == TokenKind::Comma {
134 let operator = self.current_token.clone();
135 self.next_token_in_regex_context();
136 let right = self.parse_expression();
137 pattern = Expression::Infix {
138 left: Box::new(pattern),
139 operator,
140 right: Box::new(right),
141 };
142 }
143 let pattern = Some(pattern);
144
145 if self.current_token.kind == TokenKind::LeftCurlyBrace {
146 match self.parse_action() {
147 Rule::Action(action) => Some(Rule::PatternAction {
148 pattern,
149 action: Some(action),
150 }),
151 _ => panic!("Expected action after pattern"),
152 }
153 } else {
154 Some(Rule::PatternAction {
155 pattern,
156 action: None,
157 })
158 }
159 }
160
161 fn parse_action(&mut self) -> Rule<'a> {
162 self.next_token(); let pattern = None;
165
166 let mut statements = Vec::new();
167 while self.current_token.kind != TokenKind::RightCurlyBrace
168 && self.current_token.kind != TokenKind::Eof
169 {
170 while self.current_token.kind == TokenKind::NewLine
171 || self.current_token.kind == TokenKind::Semicolon
172 {
173 self.next_token();
174 }
175
176 if self.current_token.kind == TokenKind::RightCurlyBrace
177 || self.current_token.kind == TokenKind::Eof
178 {
179 break;
180 }
181
182 statements.push(self.parse_statement());
183 }
184
185 if pattern.is_some() {
186 Rule::PatternAction {
187 pattern,
188 action: Some(Action { statements }),
189 }
190 } else {
191 Rule::Action(Action { statements })
192 }
193 }
194
195 fn parse_statement(&mut self) -> Statement<'a> {
196 match self.current_token.kind {
197 TokenKind::Print => self.parse_print_function(),
198 TokenKind::Printf => self.parse_printf_function(),
199 TokenKind::System => self.parse_system_function(),
200 TokenKind::Split => self.parse_split_statement(),
201 TokenKind::Sub => self.parse_sub_function(),
202 TokenKind::Gsub => self.parse_gsub_function(),
203 TokenKind::Break => self.parse_break_statement(),
204 TokenKind::Continue => self.parse_continue_statement(),
205 TokenKind::Delete => self.parse_delete_statement(),
206 TokenKind::If => self.parse_if_statement(),
207 TokenKind::Do => self.parse_do_statement(),
208 TokenKind::While => self.parse_while_statement(),
209 TokenKind::For => self.parse_for_statement(),
210 TokenKind::Return => self.parse_return_statement(),
211 TokenKind::Next => self.parse_next_statement(),
212 TokenKind::Exit => self.parse_exit_statement(),
213 TokenKind::Identifier => self.parse_assignment_statement(),
214 TokenKind::DollarSign => self.parse_field_assignment_statement(),
215 TokenKind::Increment => self.parse_pre_increment_statement(),
216 TokenKind::Decrement => self.parse_pre_decrement_statement(),
217 TokenKind::Number
218 | TokenKind::String
219 | TokenKind::Regex
220 | TokenKind::LeftParen
221 | TokenKind::Close
222 | TokenKind::Cos
223 | TokenKind::Exp
224 | TokenKind::Index
225 | TokenKind::Int
226 | TokenKind::Length
227 | TokenKind::Log
228 | TokenKind::Match
229 | TokenKind::Rand
230 | TokenKind::Sin
231 | TokenKind::Sprintf
232 | TokenKind::Sqrt
233 | TokenKind::Srand
234 | TokenKind::Substr
235 | TokenKind::ToLower
236 | TokenKind::ToUpper => Statement::Expression(self.parse_expression()),
237 _ => todo!(),
238 }
239 }
240
241 fn parse_function_definition(&mut self) {
242 self.next_token();
243 if self.current_token.kind != TokenKind::Identifier {
244 todo!()
245 }
246 let name = self.current_token.literal;
247 self.next_token();
248 if self.current_token.kind != TokenKind::LeftParen {
249 todo!()
250 }
251 self.next_token();
252
253 let mut parameters = Vec::new();
254 while self.current_token.kind != TokenKind::RightParen {
255 if self.current_token.kind != TokenKind::Identifier {
256 todo!()
257 }
258 parameters.push(self.current_token.literal);
259 self.next_token();
260 if self.current_token.kind == TokenKind::Comma {
261 self.next_token();
262 } else if self.current_token.kind != TokenKind::RightParen {
263 todo!()
264 }
265 }
266
267 self.next_token();
268 while self.current_token.kind == TokenKind::NewLine {
269 self.next_token();
270 }
271 if self.current_token.kind != TokenKind::LeftCurlyBrace {
272 todo!()
273 }
274
275 let mut statements = Vec::new();
276 self.next_token(); while self.current_token.kind != TokenKind::RightCurlyBrace
278 && self.current_token.kind != TokenKind::Eof
279 {
280 while self.current_token.kind == TokenKind::NewLine
281 || self.current_token.kind == TokenKind::Semicolon
282 {
283 self.next_token();
284 }
285
286 if self.current_token.kind == TokenKind::RightCurlyBrace
287 || self.current_token.kind == TokenKind::Eof
288 {
289 break;
290 }
291
292 statements.push(self.parse_statement());
293 }
294 self.function_definitions.push(FunctionDefinition {
295 name,
296 parameters,
297 statements,
298 });
299 }
300
301 fn parse_assignment_statement(&mut self) -> Statement<'a> {
302 let identifier = self.current_token.clone();
303 self.next_token();
304 self.parse_assignment_statement_with_identifier(identifier)
305 }
306
307 fn parse_assignment_statement_with_identifier(
308 &mut self,
309 identifier: Token<'a>,
310 ) -> Statement<'a> {
311 if self.current_token.kind == TokenKind::LeftParen
312 && self.token_is_immediately_after(&identifier)
313 {
314 let args = self.parse_call_arguments();
315 return Statement::Expression(Expression::FunctionCall {
316 name: identifier.literal,
317 args,
318 });
319 }
320 if self.current_token.kind == TokenKind::LeftSquareBracket {
321 self.next_token_in_regex_context();
322 let index = self.parse_array_index_expression();
323 if self.current_token.kind != TokenKind::RightSquareBracket {
324 todo!()
325 }
326 self.next_token();
327 if self.current_token.kind == TokenKind::Assign {
328 self.next_token();
329 let value = self.parse_expression();
330 return Statement::ArrayAssignment {
331 identifier: identifier.literal,
332 index,
333 value,
334 };
335 }
336 if self.current_token.kind == TokenKind::AddAssign {
337 self.next_token();
338 let value = self.parse_expression();
339 return Statement::ArrayAddAssignment {
340 identifier: identifier.literal,
341 index,
342 value,
343 };
344 }
345 if self.current_token.kind == TokenKind::Increment {
346 self.next_token();
347 return Statement::ArrayPostIncrement {
348 identifier: identifier.literal,
349 index,
350 };
351 }
352 if self.current_token.kind == TokenKind::Decrement {
353 self.next_token();
354 return Statement::ArrayPostDecrement {
355 identifier: identifier.literal,
356 index,
357 };
358 }
359 todo!()
360 }
361 if self.current_token.kind == TokenKind::Assign {
362 self.next_token();
363 if self.current_token.kind == TokenKind::Split {
364 return self.parse_split_assignment_statement(identifier.literal);
365 }
366 let value = self.parse_expression();
367 Statement::Assignment {
368 identifier: identifier.literal,
369 value,
370 }
371 } else if self.current_token.kind == TokenKind::Increment {
372 self.next_token();
373 Statement::PostIncrement {
374 identifier: identifier.literal,
375 }
376 } else if self.current_token.kind == TokenKind::Decrement {
377 self.next_token();
378 Statement::PostDecrement {
379 identifier: identifier.literal,
380 }
381 } else if self.current_token.kind == TokenKind::AddAssign {
382 self.next_token();
383 let value = self.parse_expression();
384 Statement::AddAssignment {
385 identifier: identifier.literal,
386 value,
387 }
388 } else if matches!(
389 self.current_token.kind,
390 TokenKind::SubtractAssign
391 | TokenKind::MultiplyAssign
392 | TokenKind::DivideAssign
393 | TokenKind::ModuloAssign
394 | TokenKind::PowerAssign
395 ) {
396 let assign_token = self.current_token.clone();
397 self.next_token();
398 let right_value = self.parse_expression();
399 Statement::Assignment {
400 identifier: identifier.literal,
401 value: Expression::Infix {
402 left: Box::new(Expression::Identifier(identifier.literal)),
403 operator: compound_assign_operator(&assign_token),
404 right: Box::new(right_value),
405 },
406 }
407 } else {
408 todo!()
409 }
410 }
411
412 fn parse_delete_statement(&mut self) -> Statement<'a> {
413 self.next_token();
414 if self.current_token.kind != TokenKind::Identifier {
415 todo!()
416 }
417 let identifier = self.current_token.literal;
418 self.next_token();
419 if self.current_token.kind != TokenKind::LeftSquareBracket {
420 return Statement::Delete {
421 identifier,
422 index: None,
423 };
424 }
425
426 self.next_token_in_regex_context();
427 let index = self.parse_array_index_expression();
428 if self.current_token.kind != TokenKind::RightSquareBracket {
429 todo!()
430 }
431 self.next_token();
432 Statement::Delete {
433 identifier,
434 index: Some(index),
435 }
436 }
437
438 fn parse_break_statement(&mut self) -> Statement<'a> {
439 self.next_token();
440 Statement::Break
441 }
442
443 fn parse_continue_statement(&mut self) -> Statement<'a> {
444 self.next_token();
445 Statement::Continue
446 }
447
448 fn parse_pre_increment_statement(&mut self) -> Statement<'a> {
449 self.next_token();
450 if self.current_token.kind != TokenKind::Identifier {
451 todo!()
452 }
453 let identifier = self.current_token.literal;
454 self.next_token();
455 Statement::PreIncrement { identifier }
456 }
457
458 fn parse_pre_decrement_statement(&mut self) -> Statement<'a> {
459 self.next_token();
460 if self.current_token.kind != TokenKind::Identifier {
461 todo!()
462 }
463 let identifier = self.current_token.literal;
464 self.next_token();
465 Statement::PreDecrement { identifier }
466 }
467
468 fn parse_split_assignment_statement(&mut self, identifier: &'a str) -> Statement<'a> {
469 self.next_token();
470 if self.current_token.kind != TokenKind::LeftParen {
471 todo!()
472 }
473 self.next_token_in_regex_context();
474 let string = self.parse_expression();
475 if self.current_token.kind != TokenKind::Comma {
476 todo!()
477 }
478 self.next_token();
479 if self.current_token.kind != TokenKind::Identifier {
480 todo!()
481 }
482 let array = self.current_token.literal;
483 self.next_token();
484 let separator = if self.current_token.kind == TokenKind::Comma {
485 self.next_token_in_regex_context();
486 Some(self.parse_expression())
487 } else {
488 None
489 };
490 if self.current_token.kind != TokenKind::RightParen {
491 todo!()
492 }
493 self.next_token();
494 Statement::SplitAssignment {
495 identifier,
496 string,
497 array,
498 separator,
499 }
500 }
501
502 fn parse_split_statement(&mut self) -> Statement<'a> {
503 self.next_token();
504 if self.current_token.kind != TokenKind::LeftParen {
505 todo!()
506 }
507 self.next_token_in_regex_context();
508 let string = self.parse_expression();
509 if self.current_token.kind != TokenKind::Comma {
510 todo!()
511 }
512 self.next_token();
513 if self.current_token.kind != TokenKind::Identifier {
514 todo!()
515 }
516 let array = self.current_token.literal;
517 self.next_token();
518 let separator = if self.current_token.kind == TokenKind::Comma {
519 self.next_token_in_regex_context();
520 Some(self.parse_expression())
521 } else {
522 None
523 };
524 if self.current_token.kind != TokenKind::RightParen {
525 todo!()
526 }
527 self.next_token();
528 Statement::Split {
529 string,
530 array,
531 separator,
532 }
533 }
534
535 fn parse_field_assignment_statement(&mut self) -> Statement<'a> {
536 self.next_token();
537 let field = self.parse_primary_expression();
538 let assign_token = self.current_token.clone();
539 self.next_token();
540 let right_value = self.parse_expression();
541
542 let value = if assign_token.kind == TokenKind::Assign {
543 right_value
544 } else {
545 let operator = compound_assign_operator(&assign_token);
546 Expression::Infix {
547 left: Box::new(Expression::Field(Box::new(field.clone()))),
548 operator,
549 right: Box::new(right_value),
550 }
551 };
552 Statement::FieldAssignment { field, value }
553 }
554
555 fn parse_if_statement(&mut self) -> Statement<'a> {
556 self.next_token();
557 if self.current_token.kind != TokenKind::LeftParen {
558 todo!()
559 }
560 self.next_token_in_regex_context();
561 let condition = self.parse_condition_in_parens();
562 if self.current_token.kind != TokenKind::RightParen {
563 todo!()
564 }
565 self.next_token();
566 let then_statements = self.parse_control_statement_body();
567
568 while self.current_token.kind == TokenKind::NewLine
569 || self.current_token.kind == TokenKind::Semicolon
570 {
571 self.next_token();
572 }
573
574 if self.current_token.kind == TokenKind::Else {
575 self.next_token();
576 let else_statements = self.parse_control_statement_body();
577 return Statement::IfElse {
578 condition,
579 then_statements,
580 else_statements,
581 };
582 }
583
584 Statement::If {
585 condition,
586 then_statements,
587 }
588 }
589
590 fn parse_exit_statement(&mut self) -> Statement<'a> {
591 self.next_token();
592 let status = if self.is_statement_terminator() {
593 None
594 } else {
595 Some(self.parse_expression())
596 };
597 Statement::Exit(status)
598 }
599
600 fn parse_return_statement(&mut self) -> Statement<'a> {
601 self.next_token();
602 let value = if self.is_statement_terminator() {
603 None
604 } else {
605 Some(self.parse_expression())
606 };
607 Statement::Return(value)
608 }
609
610 fn parse_next_statement(&mut self) -> Statement<'a> {
611 self.next_token();
612 Statement::Next
613 }
614
615 fn parse_statement_block(&mut self) -> Vec<Statement<'a>> {
616 self.next_token(); let mut statements = Vec::new();
618 while self.current_token.kind != TokenKind::RightCurlyBrace
619 && self.current_token.kind != TokenKind::Eof
620 {
621 while self.current_token.kind == TokenKind::NewLine
622 || self.current_token.kind == TokenKind::Semicolon
623 {
624 self.next_token();
625 }
626
627 if self.current_token.kind == TokenKind::RightCurlyBrace
628 || self.current_token.kind == TokenKind::Eof
629 {
630 break;
631 }
632 statements.push(self.parse_statement());
633 }
634 if self.current_token.kind == TokenKind::RightCurlyBrace {
635 self.next_token();
636 }
637 statements
638 }
639
640 fn parse_control_statement_body(&mut self) -> Vec<Statement<'a>> {
641 while self.current_token.kind == TokenKind::NewLine {
642 self.next_token();
643 }
644
645 if self.current_token.kind == TokenKind::LeftCurlyBrace {
646 return self.parse_statement_block();
647 }
648
649 if self.current_token.kind == TokenKind::Semicolon {
650 self.next_token();
651 return vec![Statement::Empty];
652 }
653
654 vec![self.parse_statement()]
655 }
656
657 fn parse_while_statement(&mut self) -> Statement<'a> {
658 self.next_token();
659 if self.current_token.kind != TokenKind::LeftParen {
660 todo!()
661 }
662 self.next_token_in_regex_context();
663 let condition = self.parse_condition_in_parens();
664 if self.current_token.kind != TokenKind::RightParen {
665 todo!()
666 }
667 self.next_token();
668 let statements = self.parse_control_statement_body();
669 Statement::While {
670 condition,
671 statements,
672 }
673 }
674
675 fn parse_do_statement(&mut self) -> Statement<'a> {
676 self.next_token();
677 let statements = self.parse_control_statement_body();
678
679 while self.current_token.kind == TokenKind::NewLine
680 || self.current_token.kind == TokenKind::Semicolon
681 {
682 self.next_token();
683 }
684
685 if self.current_token.kind != TokenKind::While {
686 todo!()
687 }
688 self.next_token();
689 if self.current_token.kind != TokenKind::LeftParen {
690 todo!()
691 }
692 self.next_token_in_regex_context();
693 let condition = self.parse_condition_in_parens();
694 if self.current_token.kind != TokenKind::RightParen {
695 todo!()
696 }
697 self.next_token();
698 Statement::DoWhile {
699 condition,
700 statements,
701 }
702 }
703
704 fn parse_for_statement(&mut self) -> Statement<'a> {
705 self.next_token();
706 if self.current_token.kind != TokenKind::LeftParen {
707 todo!()
708 }
709 self.next_token();
710 while self.current_token.kind == TokenKind::NewLine {
711 self.next_token();
712 }
713
714 let init = if self.current_token.kind == TokenKind::Semicolon {
715 Statement::Empty
716 } else if self.current_token.kind == TokenKind::Identifier {
717 let variable = self.current_token.clone();
718 self.next_token();
719 if self.current_token.kind == TokenKind::In {
720 self.next_token();
721 if self.current_token.kind != TokenKind::Identifier {
722 todo!()
723 }
724 let array = self.current_token.literal;
725 self.next_token();
726 if self.current_token.kind != TokenKind::RightParen {
727 todo!()
728 }
729 self.next_token();
730 let statements = self.parse_control_statement_body();
731 return Statement::ForIn {
732 variable: variable.literal,
733 array,
734 statements,
735 };
736 }
737 self.parse_assignment_statement_with_identifier(variable)
738 } else {
739 self.parse_statement()
740 };
741 while self.current_token.kind == TokenKind::NewLine {
742 self.next_token();
743 }
744 if self.current_token.kind != TokenKind::Semicolon {
745 todo!()
746 }
747 self.next_token_in_regex_context();
748 while self.current_token.kind == TokenKind::NewLine {
749 self.next_token_in_regex_context();
750 }
751
752 let condition = if self.current_token.kind == TokenKind::Semicolon {
753 Expression::Number(1.0)
754 } else {
755 self.parse_expression()
756 };
757 while self.current_token.kind == TokenKind::NewLine {
758 self.next_token();
759 }
760 if self.current_token.kind != TokenKind::Semicolon {
761 todo!()
762 }
763 self.next_token_in_regex_context();
764 while self.current_token.kind == TokenKind::NewLine {
765 self.next_token_in_regex_context();
766 }
767
768 let update = if self.current_token.kind == TokenKind::RightParen {
769 Statement::Empty
770 } else {
771 self.parse_statement()
772 };
773 while self.current_token.kind == TokenKind::NewLine {
774 self.next_token();
775 }
776 if self.current_token.kind != TokenKind::RightParen {
777 todo!()
778 }
779 self.next_token();
780 let statements = self.parse_control_statement_body();
781
782 Statement::For {
783 init: Box::new(init),
784 condition,
785 update: Box::new(update),
786 statements,
787 }
788 }
789
790 fn parse_print_function(&mut self) -> Statement<'a> {
791 let mut expressions = Vec::new();
792 let mut expect_more = false;
793 self.next_token();
794
795 loop {
796 if self.current_token.kind == TokenKind::RightCurlyBrace
797 || self.current_token.kind == TokenKind::RightParen
798 || self.current_token.kind == TokenKind::Eof
799 || self.current_token.kind == TokenKind::GreaterThan
800 || self.current_token.kind == TokenKind::Append
801 || self.current_token.kind == TokenKind::Pipe
802 {
803 break;
804 }
805
806 if self.current_token.kind == TokenKind::NewLine
807 || self.current_token.kind == TokenKind::Semicolon
808 {
809 if expect_more {
810 self.next_token();
811 continue;
812 }
813 break;
814 }
815
816 if self.current_token.kind == TokenKind::Comma {
817 self.next_token();
818 expect_more = true;
819 continue;
820 }
821
822 let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
823 let expression = self.parse_expression();
824 expressions.push(expression);
825 if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
826 while self.current_token.kind == TokenKind::Comma {
827 self.next_token();
828 expressions.push(self.parse_expression());
829 }
830 if self.current_token.kind != TokenKind::RightParen {
831 todo!()
832 }
833 self.next_token();
834 }
835 expect_more = false;
836 }
837 if self.current_token.kind == TokenKind::RightParen {
838 self.next_token();
839 }
840
841 if self.current_token.kind == TokenKind::GreaterThan
842 || self.current_token.kind == TokenKind::Append
843 {
844 let append = self.current_token.kind == TokenKind::Append;
845 self.next_token();
846 let target = self.parse_expression();
847 return Statement::PrintRedirect {
848 expressions,
849 target,
850 append,
851 };
852 }
853 if self.current_token.kind == TokenKind::Pipe {
854 self.next_token();
855 let target = self.parse_expression();
856 return Statement::PrintPipe {
857 expressions,
858 target,
859 };
860 }
861
862 Statement::Print(expressions)
863 }
864
865 fn parse_printf_function(&mut self) -> Statement<'a> {
866 self.next_token();
867 let expressions = if self.current_token.kind == TokenKind::LeftParen {
868 self.next_token_in_regex_context();
869 let mut expressions = Vec::new();
870 while self.current_token.kind != TokenKind::RightParen
871 && self.current_token.kind != TokenKind::Eof
872 {
873 if self.current_token.kind == TokenKind::Comma {
874 self.next_token();
875 continue;
876 }
877 expressions.push(self.parse_expression());
878 }
879 if self.current_token.kind == TokenKind::RightParen {
880 self.next_token();
881 }
882 expressions
883 } else {
884 self.parse_expression_list_until_action_end_from_current()
885 };
886
887 Statement::Printf(expressions)
888 }
889
890 fn parse_gsub_function(&mut self) -> Statement<'a> {
891 self.next_token();
892 if self.current_token.kind != TokenKind::LeftParen {
893 todo!()
894 }
895
896 self.next_token_in_regex_context();
897 let pattern = self.parse_expression();
898
899 if self.current_token.kind != TokenKind::Comma {
900 todo!()
901 }
902 self.next_token();
903 let replacement = self.parse_expression();
904
905 let target = if self.current_token.kind == TokenKind::Comma {
906 self.next_token();
907 Some(self.parse_expression())
908 } else {
909 None
910 };
911
912 if self.current_token.kind != TokenKind::RightParen {
913 todo!()
914 }
915 self.next_token();
916
917 Statement::Gsub {
918 pattern,
919 replacement,
920 target,
921 }
922 }
923
924 fn parse_sub_function(&mut self) -> Statement<'a> {
925 self.next_token();
926 if self.current_token.kind != TokenKind::LeftParen {
927 todo!()
928 }
929
930 self.next_token_in_regex_context();
931 let pattern = self.parse_expression();
932
933 if self.current_token.kind != TokenKind::Comma {
934 todo!()
935 }
936 self.next_token();
937 let replacement = self.parse_expression();
938
939 if self.current_token.kind == TokenKind::Comma {
940 todo!()
941 }
942
943 if self.current_token.kind != TokenKind::RightParen {
944 todo!()
945 }
946 self.next_token();
947
948 Statement::Sub {
949 pattern,
950 replacement,
951 }
952 }
953
954 fn parse_system_function(&mut self) -> Statement<'a> {
955 self.next_token();
956 if self.current_token.kind != TokenKind::LeftParen {
957 todo!()
958 }
959 self.next_token();
960 let command = self.parse_expression();
961 if self.current_token.kind != TokenKind::RightParen {
962 todo!()
963 }
964 self.next_token();
965 Statement::System(command)
966 }
967
968 fn parse_expression_list_until_action_end_from_current(&mut self) -> Vec<Expression<'a>> {
969 let mut expressions = Vec::new();
970 let mut expect_more = false;
971
972 loop {
973 if self.current_token.kind == TokenKind::RightCurlyBrace
974 || self.current_token.kind == TokenKind::RightParen
975 || self.current_token.kind == TokenKind::Eof
976 {
977 break;
978 }
979
980 if self.current_token.kind == TokenKind::NewLine
981 || self.current_token.kind == TokenKind::Semicolon
982 {
983 if expect_more {
984 self.next_token();
985 continue;
986 }
987 break;
988 }
989
990 if self.current_token.kind == TokenKind::Comma {
991 self.next_token();
992 expect_more = true;
993 continue;
994 }
995
996 let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
997 let expression = self.parse_expression();
998 expressions.push(expression);
999 if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
1000 while self.current_token.kind == TokenKind::Comma {
1001 self.next_token();
1002 expressions.push(self.parse_expression());
1003 }
1004 if self.current_token.kind != TokenKind::RightParen {
1005 todo!()
1006 }
1007 self.next_token();
1008 }
1009 expect_more = false;
1010 }
1011
1012 if self.current_token.kind == TokenKind::RightParen {
1013 self.next_token();
1014 }
1015
1016 expressions
1017 }
1018
1019 fn parse_expression(&mut self) -> Expression<'a> {
1020 self.parse_expression_with_min_precedence(0)
1021 }
1022
1023 fn parse_expression_with_min_precedence(&mut self, min_precedence: u8) -> Expression<'a> {
1024 let left = self.parse_primary_expression();
1025 self.parse_expression_suffix(left, min_precedence)
1026 }
1027
1028 fn parse_expression_suffix(
1029 &mut self,
1030 mut left: Expression<'a>,
1031 min_precedence: u8,
1032 ) -> Expression<'a> {
1033 const CONCAT_LEFT_PRECEDENCE: u8 = 6;
1034 const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
1035
1036 loop {
1037 if self.current_token.kind == TokenKind::QuestionMark {
1038 if min_precedence > 0 {
1039 break;
1040 }
1041 self.next_token_in_regex_context();
1042 let then_expr = self.parse_expression_with_min_precedence(0);
1043 if self.current_token.kind != TokenKind::Colon {
1044 todo!()
1045 }
1046 self.next_token_in_regex_context();
1047 let else_expr = self.parse_expression_with_min_precedence(0);
1048 left = Expression::Ternary {
1049 condition: Box::new(left),
1050 then_expr: Box::new(then_expr),
1051 else_expr: Box::new(else_expr),
1052 };
1053 continue;
1054 }
1055
1056 if infix_operator_precedence(&self.current_token.kind).is_none()
1057 && is_expression_start(&self.current_token.kind)
1058 {
1059 if CONCAT_LEFT_PRECEDENCE < min_precedence {
1060 break;
1061 }
1062
1063 let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE);
1064 left = Expression::Concatenation {
1065 left: Box::new(left),
1066 right: Box::new(right),
1067 };
1068 continue;
1069 }
1070
1071 let (left_precedence, right_precedence) =
1072 match infix_operator_precedence(&self.current_token.kind) {
1073 Some(value) => value,
1074 None => break,
1075 };
1076
1077 if left_precedence < min_precedence {
1078 break;
1079 }
1080
1081 let operator = self.current_token.clone();
1082 if matches!(
1083 operator.kind,
1084 TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
1085 ) {
1086 self.next_token_in_regex_context();
1087 } else {
1088 self.next_token();
1089 }
1090 let right = self.parse_expression_with_min_precedence(right_precedence);
1091
1092 left = Expression::Infix {
1093 left: Box::new(left),
1094 operator,
1095 right: Box::new(right),
1096 };
1097 }
1098
1099 left
1100 }
1101
1102 fn parse_condition_in_parens(&mut self) -> Expression<'a> {
1103 let mut condition = self.parse_expression();
1104 if self.current_token.kind == TokenKind::Comma {
1105 while self.current_token.kind == TokenKind::Comma {
1106 let operator = self.current_token.clone();
1107 self.next_token_in_regex_context();
1108 let right = self.parse_expression();
1109 condition = Expression::Infix {
1110 left: Box::new(condition),
1111 operator,
1112 right: Box::new(right),
1113 };
1114 }
1115 if self.current_token.kind != TokenKind::RightParen {
1116 todo!()
1117 }
1118 self.next_token();
1119 condition = self.parse_expression_suffix(condition, 0);
1120 }
1121 condition
1122 }
1123
1124 fn parse_primary_expression(&mut self) -> Expression<'a> {
1125 if self.current_token.kind == TokenKind::Minus {
1126 let operator = self.current_token.clone();
1127 self.next_token();
1128 let right = self.parse_primary_expression();
1129 return Expression::Infix {
1130 left: Box::new(Expression::Number(0.0)),
1131 operator,
1132 right: Box::new(right),
1133 };
1134 }
1135 if self.current_token.kind == TokenKind::Plus {
1136 self.next_token();
1137 return self.parse_primary_expression();
1138 }
1139 if self.current_token.kind == TokenKind::ExclamationMark {
1140 self.next_token_in_regex_context();
1141 let expression = self.parse_primary_expression();
1142 return Expression::Not(Box::new(expression));
1143 }
1144 if self.current_token.kind == TokenKind::Increment {
1145 self.next_token();
1146 let expression = self.parse_primary_expression();
1147 return Expression::PreIncrement(Box::new(expression));
1148 }
1149 if self.current_token.kind == TokenKind::Decrement {
1150 self.next_token();
1151 let expression = self.parse_primary_expression();
1152 return Expression::PreDecrement(Box::new(expression));
1153 }
1154
1155 let mut expression = self.parse_primary_atom();
1156 if self.current_token.kind == TokenKind::Increment {
1157 self.next_token();
1158 expression = Expression::PostIncrement(Box::new(expression));
1159 } else if self.current_token.kind == TokenKind::Decrement {
1160 self.next_token();
1161 expression = Expression::PostDecrement(Box::new(expression));
1162 }
1163 expression
1164 }
1165
1166 fn parse_primary_atom(&mut self) -> Expression<'a> {
1167 match self.current_token.kind {
1168 TokenKind::String => {
1169 let expression = Expression::String(self.current_token.literal);
1170 self.next_token();
1171 expression
1172 }
1173 TokenKind::Regex => {
1174 let expression = Expression::Regex(self.current_token.literal);
1175 self.next_token();
1176 expression
1177 }
1178 TokenKind::Number => {
1179 let expression = self
1180 .parse_number_expression()
1181 .unwrap_or_else(|| panic!("failed to parse numeric literal: {}", self.current_token.literal));
1182 self.next_token();
1183 expression
1184 }
1185 TokenKind::DollarSign => {
1186 self.next_token();
1187 let expression = self.parse_primary_atom();
1188 Expression::Field(Box::new(expression))
1189 }
1190 TokenKind::LeftParen => {
1191 self.next_token();
1192 let expression = self.parse_expression();
1193 if self.current_token.kind == TokenKind::RightParen {
1194 self.next_token();
1195 }
1196 expression
1197 }
1198 TokenKind::Identifier => {
1199 let identifier = self.current_token.clone();
1200 self.next_token();
1201 if self.current_token.kind == TokenKind::LeftParen
1202 && self.token_is_immediately_after(&identifier)
1203 {
1204 let args = self.parse_call_arguments();
1205 return Expression::FunctionCall {
1206 name: identifier.literal,
1207 args,
1208 };
1209 }
1210 if self.current_token.kind == TokenKind::LeftSquareBracket {
1211 self.next_token_in_regex_context();
1212 let index = self.parse_array_index_expression();
1213 if self.current_token.kind != TokenKind::RightSquareBracket {
1214 todo!()
1215 }
1216 self.next_token();
1217 Expression::ArrayAccess {
1218 identifier: identifier.literal,
1219 index: Box::new(index),
1220 }
1221 } else {
1222 Expression::Identifier(identifier.literal)
1223 }
1224 }
1225 TokenKind::Length => {
1226 self.next_token();
1227 if self.current_token.kind == TokenKind::LeftParen {
1228 self.next_token();
1229 if self.current_token.kind == TokenKind::RightParen {
1230 self.next_token();
1231 Expression::Length(None)
1232 } else {
1233 let expression = self.parse_expression();
1234 if self.current_token.kind != TokenKind::RightParen {
1235 todo!()
1236 }
1237 self.next_token();
1238 Expression::Length(Some(Box::new(expression)))
1239 }
1240 } else {
1241 Expression::Length(None)
1242 }
1243 }
1244 TokenKind::Substr => {
1245 self.next_token();
1246 if self.current_token.kind != TokenKind::LeftParen {
1247 todo!()
1248 }
1249 self.next_token();
1250 let string = self.parse_expression();
1251 if self.current_token.kind != TokenKind::Comma {
1252 todo!()
1253 }
1254 self.next_token();
1255 let start = self.parse_expression();
1256 let mut length = None;
1257 if self.current_token.kind == TokenKind::Comma {
1258 self.next_token();
1259 length = Some(Box::new(self.parse_expression()));
1260 }
1261 if self.current_token.kind != TokenKind::RightParen {
1262 todo!()
1263 }
1264 self.next_token();
1265 Expression::Substr {
1266 string: Box::new(string),
1267 start: Box::new(start),
1268 length,
1269 }
1270 }
1271 TokenKind::Rand => {
1272 self.next_token();
1273 if self.current_token.kind == TokenKind::LeftParen {
1274 self.next_token();
1275 if self.current_token.kind != TokenKind::RightParen {
1276 todo!()
1277 }
1278 self.next_token();
1279 }
1280 Expression::Rand
1281 }
1282 TokenKind::Close
1283 | TokenKind::Cos
1284 | TokenKind::Exp
1285 | TokenKind::Index
1286 | TokenKind::Int
1287 | TokenKind::Log
1288 | TokenKind::Match
1289 | TokenKind::Sin
1290 | TokenKind::Sprintf
1291 | TokenKind::Split
1292 | TokenKind::Sqrt
1293 | TokenKind::Srand => {
1294 let name = self.current_token.literal;
1295 self.next_token();
1296 if self.current_token.kind == TokenKind::LeftParen {
1297 let args = self.parse_call_arguments();
1298 return Expression::FunctionCall { name, args };
1299 }
1300 Expression::Number(0.0)
1301 }
1302 _ => {
1303 panic!(
1304 "parse_primary_expression not yet implemented, found token: {:?}",
1305 self.current_token
1306 )
1307 }
1308 }
1309 }
1310
1311 pub fn parse_program(&mut self) -> Program<'_> {
1312 let mut program = Program::new();
1313
1314 while !self.is_eof() {
1315 match self.parse_next_rule() {
1316 Some(Rule::Begin(action)) => program.add_begin_block(action),
1317 Some(Rule::End(action)) => program.add_end_block(action),
1318 Some(rule) => program.add_rule(rule),
1319 None => {}
1320 }
1321 self.next_token_in_regex_context();
1322 }
1323
1324 for definition in self.function_definitions.drain(..) {
1325 program.add_function_definition(definition);
1326 }
1327
1328 program
1329 }
1330
1331 fn parse_call_arguments(&mut self) -> Vec<Expression<'a>> {
1332 if self.current_token.kind != TokenKind::LeftParen {
1333 return vec![];
1334 }
1335 self.next_token_in_regex_context();
1336 let mut args = Vec::new();
1337 while self.current_token.kind != TokenKind::RightParen
1338 && self.current_token.kind != TokenKind::Eof
1339 {
1340 if self.current_token.kind == TokenKind::Comma {
1341 self.next_token();
1342 continue;
1343 }
1344 args.push(self.parse_expression());
1345 }
1346 if self.current_token.kind == TokenKind::RightParen {
1347 self.next_token();
1348 }
1349 args
1350 }
1351}
1352
1353fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
1354 match kind {
1355 TokenKind::Assign
1356 | TokenKind::AddAssign
1357 | TokenKind::SubtractAssign
1358 | TokenKind::MultiplyAssign
1359 | TokenKind::DivideAssign
1360 | TokenKind::ModuloAssign
1361 | TokenKind::PowerAssign => Some((0, 0)),
1362 TokenKind::Or => Some((1, 2)),
1363 TokenKind::And => Some((3, 4)),
1364 TokenKind::Equal
1365 | TokenKind::NotEqual
1366 | TokenKind::GreaterThan
1367 | TokenKind::GreaterThanOrEqual
1368 | TokenKind::In
1369 | TokenKind::LessThan
1370 | TokenKind::LessThanOrEqual
1371 | TokenKind::Tilde
1372 | TokenKind::NoMatch => Some((5, 6)),
1373 TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
1374 TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
1375 TokenKind::Caret => Some((13, 12)),
1376 _ => None,
1377 }
1378}
1379
1380fn is_expression_start(kind: &TokenKind) -> bool {
1381 matches!(
1382 kind,
1383 TokenKind::String
1384 | TokenKind::Regex
1385 | TokenKind::Number
1386 | TokenKind::DollarSign
1387 | TokenKind::LeftParen
1388 | TokenKind::Identifier
1389 | TokenKind::Cos
1390 | TokenKind::Exp
1391 | TokenKind::Index
1392 | TokenKind::Int
1393 | TokenKind::Length
1394 | TokenKind::Log
1395 | TokenKind::Match
1396 | TokenKind::Rand
1397 | TokenKind::Sin
1398 | TokenKind::Sprintf
1399 | TokenKind::Split
1400 | TokenKind::Sqrt
1401 | TokenKind::Srand
1402 | TokenKind::Substr
1403 | TokenKind::Increment
1404 | TokenKind::Decrement
1405 )
1406}
1407
1408fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
1409 let (kind, literal) = match token.kind {
1410 TokenKind::AddAssign => (TokenKind::Plus, "+"),
1411 TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
1412 TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
1413 TokenKind::DivideAssign => (TokenKind::Division, "/"),
1414 TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
1415 TokenKind::PowerAssign => (TokenKind::Caret, "^"),
1416 _ => todo!(),
1417 };
1418
1419 Token::new(kind, literal, token.span.start)
1420}
1421
1422#[cfg(test)]
1423mod tests {
1424 use super::*;
1425
1426 #[test]
1427 fn create_parser() {
1428 let mut parser = Parser::new(Lexer::new("42 == 42"));
1429
1430 assert_eq!(parser.current_token.literal, "42");
1431 parser.next_token();
1432 assert_eq!(parser.current_token.literal, "==");
1433 }
1434
1435 #[test]
1436 fn parse_empty_program() {
1437 let mut parser = Parser::new(Lexer::new(""));
1438
1439 let program = parser.parse_program();
1440
1441 assert_eq!(program.len(), 0);
1442 }
1443
1444 #[test]
1445 fn parse_action_without_pattern() {
1446 let mut parser = Parser::new(Lexer::new("{ print }"));
1447
1448 let program = parser.parse_program();
1449
1450 assert_eq!(program.len(), 1);
1451 assert_eq!("{ print }", program.to_string());
1452 }
1453
1454 #[test]
1455 fn parse_action_with_leading_newlines() {
1456 let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
1457
1458 let program = parser.parse_program();
1459
1460 assert_eq!(program.len(), 1);
1461 assert_eq!("{ print }", program.to_string());
1462 }
1463
1464 #[test]
1465 fn parse_begin_block() {
1466 let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
1467
1468 let program = parser.parse_program();
1469
1470 assert_eq!(program.len(), 1);
1471 assert_eq!("BEGIN { print }", program.to_string());
1472 }
1473
1474 #[test]
1475 fn parse_end_block() {
1476 let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1477
1478 let program = parser.parse_program();
1479
1480 assert_eq!(program.len(), 1);
1481 assert_eq!("END { print 42 }", program.to_string());
1482 }
1483
1484 #[test]
1485 fn parse_regex_pattern_action() {
1486 let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1487
1488 let program = parser.parse_program();
1489
1490 assert_eq!(program.len(), 1);
1491 assert_eq!("/foo/ { print }", program.to_string());
1492 }
1493
1494 #[test]
1495 fn parse_print_infix_expression() {
1496 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1497
1498 let program = parser.parse_program();
1499 let mut begin_blocks = program.begin_blocks_iter();
1500 let Action { statements } = begin_blocks.next().expect("expected begin block");
1501
1502 let exprs = match &statements[0] {
1503 Statement::Print(expressions) => expressions,
1504 _ => panic!("expected print statement"),
1505 };
1506
1507 match &exprs[0] {
1508 Expression::Infix {
1509 left,
1510 operator,
1511 right,
1512 } => {
1513 assert!(matches!(**left, Expression::Number(1.0)));
1514 assert_eq!(operator.kind, TokenKind::Plus);
1515 assert!(matches!(**right, Expression::Number(2.0)));
1516 }
1517 _ => panic!("expected infix expression"),
1518 }
1519 }
1520
1521 #[test]
1522 fn parse_print_parenthesized_expression() {
1523 let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1524
1525 let program = parser.parse_program();
1526 let mut begin_blocks = program.begin_blocks_iter();
1527 let Action { statements } = begin_blocks.next().expect("expected begin block");
1528
1529 let exprs = match &statements[0] {
1530 Statement::Print(expressions) => expressions,
1531 _ => panic!("expected print statement"),
1532 };
1533
1534 match &exprs[0] {
1535 Expression::Infix {
1536 left,
1537 operator,
1538 right,
1539 } => {
1540 assert_eq!(operator.kind, TokenKind::Asterisk);
1541 assert!(matches!(**right, Expression::Number(3.0)));
1542 assert!(matches!(**left, Expression::Infix { .. }));
1543 }
1544 _ => panic!("expected infix expression"),
1545 }
1546 }
1547
1548 #[test]
1549 fn parse_print_multiplication_has_higher_precedence_than_addition() {
1550 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1551
1552 let program = parser.parse_program();
1553 let mut begin_blocks = program.begin_blocks_iter();
1554 let Action { statements } = begin_blocks.next().expect("expected begin block");
1555
1556 let exprs = match &statements[0] {
1557 Statement::Print(expressions) => expressions,
1558 _ => panic!("expected print statement"),
1559 };
1560
1561 match &exprs[0] {
1562 Expression::Infix {
1563 left,
1564 operator,
1565 right,
1566 } => {
1567 assert_eq!(operator.kind, TokenKind::Plus);
1568 assert!(matches!(**left, Expression::Number(1.0)));
1569 match &**right {
1570 Expression::Infix {
1571 operator: right_op, ..
1572 } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1573 _ => panic!("expected nested infix expression"),
1574 }
1575 }
1576 _ => panic!("expected infix expression"),
1577 }
1578 }
1579
1580 #[test]
1581 fn parse_print_power_is_right_associative() {
1582 let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1583
1584 let program = parser.parse_program();
1585 let mut begin_blocks = program.begin_blocks_iter();
1586 let Action { statements } = begin_blocks.next().expect("expected begin block");
1587
1588 let exprs = match &statements[0] {
1589 Statement::Print(expressions) => expressions,
1590 _ => panic!("expected print statement"),
1591 };
1592
1593 match &exprs[0] {
1594 Expression::Infix {
1595 left,
1596 operator,
1597 right,
1598 } => {
1599 assert_eq!(operator.kind, TokenKind::Caret);
1600 assert!(matches!(**left, Expression::Number(2.0)));
1601 match &**right {
1602 Expression::Infix {
1603 operator: right_op, ..
1604 } => assert_eq!(right_op.kind, TokenKind::Caret),
1605 _ => panic!("expected nested infix expression"),
1606 }
1607 }
1608 _ => panic!("expected infix expression"),
1609 }
1610 }
1611
1612 #[test]
1613 fn parse_print_minus_is_left_associative() {
1614 let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1615
1616 let program = parser.parse_program();
1617 let mut begin_blocks = program.begin_blocks_iter();
1618 let Action { statements } = begin_blocks.next().expect("expected begin block");
1619
1620 let exprs = match &statements[0] {
1621 Statement::Print(expressions) => expressions,
1622 _ => panic!("expected print statement"),
1623 };
1624
1625 match &exprs[0] {
1626 Expression::Infix {
1627 left,
1628 operator,
1629 right,
1630 } => {
1631 assert_eq!(operator.kind, TokenKind::Minus);
1632 match &**left {
1633 Expression::Infix {
1634 operator: left_op, ..
1635 } => assert_eq!(left_op.kind, TokenKind::Minus),
1636 _ => panic!("expected nested infix expression"),
1637 }
1638 assert!(matches!(**right, Expression::Number(1.0)));
1639 }
1640 _ => panic!("expected infix expression"),
1641 }
1642 }
1643
1644 #[test]
1645 fn parse_print_concatenation() {
1646 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
1647
1648 let program = parser.parse_program();
1649 let mut begin_blocks = program.begin_blocks_iter();
1650 let Action { statements } = begin_blocks.next().expect("expected begin block");
1651
1652 let exprs = match &statements[0] {
1653 Statement::Print(expressions) => expressions,
1654 _ => panic!("expected print statement"),
1655 };
1656
1657 assert_eq!(exprs.len(), 1);
1658 match &exprs[0] {
1659 Expression::Concatenation { left, right } => {
1660 assert!(matches!(**left, Expression::String("Value:")));
1661 assert!(matches!(**right, Expression::Number(42.0)));
1662 }
1663 _ => panic!("expected concatenation expression"),
1664 }
1665 }
1666
1667 #[test]
1668 fn parse_continue_statement() {
1669 let mut parser = Parser::new(Lexer::new(r#"{ continue }"#));
1670
1671 let program = parser.parse_program();
1672 let mut rules = program.rules_iter();
1673 let rule = rules.next().expect("expected rule");
1674
1675 let statements = match rule {
1676 Rule::Action(Action { statements }) => statements,
1677 _ => panic!("expected action rule"),
1678 };
1679
1680 assert!(matches!(statements[0], Statement::Continue));
1681 }
1682
1683 #[test]
1684 fn parse_identifier_followed_by_spaced_parentheses_as_concatenation() {
1685 let mut parser = Parser::new(Lexer::new(r#"{ x = $1; print x (++i) }"#));
1686
1687 let program = parser.parse_program();
1688 let mut rules = program.rules_iter();
1689 let rule = rules.next().expect("expected rule");
1690
1691 let statements = match rule {
1692 Rule::Action(Action { statements }) => statements,
1693 _ => panic!("expected action rule"),
1694 };
1695
1696 let exprs = match &statements[1] {
1697 Statement::Print(expressions) => expressions,
1698 _ => panic!("expected print statement"),
1699 };
1700
1701 assert_eq!(exprs.len(), 1);
1702 match &exprs[0] {
1703 Expression::Concatenation { left, right } => {
1704 assert!(matches!(**left, Expression::Identifier("x")));
1705 assert!(matches!(**right, Expression::PreIncrement(_)));
1706 }
1707 _ => panic!("expected concatenation expression"),
1708 }
1709 }
1710
1711 #[test]
1712 fn parse_print_field_expression() {
1713 let mut parser = Parser::new(Lexer::new("{ print $1 }"));
1714
1715 let program = parser.parse_program();
1716 let mut rules = program.rules_iter();
1717 let rule = rules.next().expect("expected rule");
1718
1719 let statements = match rule {
1720 Rule::Action(Action { statements }) => statements,
1721 _ => panic!("expected action rule"),
1722 };
1723
1724 let exprs = match &statements[0] {
1725 Statement::Print(expressions) => expressions,
1726 _ => panic!("expected print statement"),
1727 };
1728
1729 match &exprs[0] {
1730 Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
1731 _ => panic!("expected field expression"),
1732 }
1733 }
1734
1735 #[test]
1736 fn parse_print_with_commas() {
1737 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
1738
1739 let program = parser.parse_program();
1740
1741 assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
1742 }
1743
1744 #[test]
1745 fn parse_number_of_fields_identifier() {
1746 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
1747
1748 let program = parser.parse_program();
1749
1750 assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
1751 }
1752
1753 #[test]
1754 fn parse_printf_with_format_and_arguments() {
1755 let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
1756
1757 let program = parser.parse_program();
1758
1759 assert_eq!(
1760 r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
1761 program.to_string()
1762 );
1763 }
1764
1765 #[test]
1766 fn parse_add_assignment_and_pre_increment() {
1767 let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
1768
1769 let program = parser.parse_program();
1770
1771 assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
1772 }
1773
1774 #[test]
1775 fn parse_regex_match_pattern_action() {
1776 let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
1777
1778 let program = parser.parse_program();
1779
1780 assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
1781 }
1782
1783 #[test]
1784 fn parse_not_pattern_action() {
1785 let mut parser = Parser::new(Lexer::new(r#"!($1 < 2000) { print $1 }"#));
1786
1787 let program = parser.parse_program();
1788 let mut rules = program.rules_iter();
1789 let rule = rules.next().expect("expected rule");
1790
1791 match rule {
1792 Rule::PatternAction {
1793 pattern: Some(Expression::Not(inner)),
1794 action: Some(Action { statements }),
1795 } => {
1796 assert!(matches!(**inner, Expression::Infix { .. }));
1797 assert!(matches!(statements[0], Statement::Print(_)));
1798 }
1799 _ => panic!("expected negated pattern action"),
1800 }
1801 }
1802
1803 #[test]
1804 fn parse_print_with_line_continuation_after_comma() {
1805 let mut parser = Parser::new(Lexer::new(
1806 "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
1807 ));
1808
1809 let program = parser.parse_program();
1810
1811 assert_eq!(
1812 "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
1813 program.to_string()
1814 );
1815 }
1816
1817 #[test]
1818 fn parse_gsub_statement() {
1819 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
1820
1821 let program = parser.parse_program();
1822
1823 assert_eq!(
1824 r#"{ gsub(/USA/, "United States"); print }"#,
1825 program.to_string()
1826 );
1827 }
1828
1829 #[test]
1830 fn parse_gsub_statement_with_target() {
1831 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/[ \t]+/, "", t) }"#));
1832
1833 let program = parser.parse_program();
1834
1835 assert_eq!(r#"{ gsub(/[ \t]+/, "", t) }"#, program.to_string());
1836 }
1837
1838 #[test]
1839 fn parse_system_statement() {
1840 let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
1841
1842 let program = parser.parse_program();
1843
1844 assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
1845 }
1846
1847 #[test]
1848 fn parse_print_length_builtin_expression() {
1849 let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
1850
1851 let program = parser.parse_program();
1852
1853 assert_eq!(r#"{ print length, $0 }"#, program.to_string());
1854 }
1855
1856 #[test]
1857 fn parse_length_expression_as_rule_pattern() {
1858 let mut parser = Parser::new(Lexer::new(
1859 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1860 ));
1861
1862 let program = parser.parse_program();
1863
1864 assert_eq!(
1865 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1866 program.to_string()
1867 );
1868 }
1869
1870 #[test]
1871 fn parse_field_assignment_with_substr() {
1872 let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
1873
1874 let program = parser.parse_program();
1875
1876 assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
1877 }
1878
1879 #[test]
1880 fn parse_assignment_with_concatenation_and_substr() {
1881 let mut parser = Parser::new(Lexer::new(r#"{ s = s " " substr($1, 1, 3) }"#));
1882
1883 let program = parser.parse_program();
1884
1885 assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
1886 }
1887
1888 #[test]
1889 fn parse_field_divide_assignment() {
1890 let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
1891
1892 let program = parser.parse_program();
1893
1894 assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
1895 }
1896
1897 #[test]
1898 fn parse_chained_assignment() {
1899 let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
1900
1901 let program = parser.parse_program();
1902
1903 assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
1904 }
1905
1906 #[test]
1907 fn parse_if_statement_with_block() {
1908 let mut parser = Parser::new(Lexer::new(
1909 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1910 ));
1911
1912 let program = parser.parse_program();
1913
1914 assert_eq!(
1915 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1916 program.to_string()
1917 );
1918 }
1919
1920 #[test]
1921 fn parse_while_with_post_increment() {
1922 let mut parser = Parser::new(Lexer::new(
1923 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1924 ));
1925
1926 let program = parser.parse_program();
1927
1928 assert_eq!(
1929 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1930 program.to_string()
1931 );
1932 }
1933
1934 #[test]
1935 fn parse_while_with_single_body_statement() {
1936 let mut parser = Parser::new(Lexer::new(r#"{ while (n > 1) print n }"#));
1937
1938 let program = parser.parse_program();
1939
1940 assert_eq!(r#"{ while (n > 1) { print n } }"#, program.to_string());
1941 }
1942
1943 #[test]
1944 fn parse_do_while_with_post_increment() {
1945 let mut parser = Parser::new(Lexer::new(
1946 r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1947 ));
1948
1949 let program = parser.parse_program();
1950
1951 assert_eq!(
1952 r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1953 program.to_string()
1954 );
1955 }
1956
1957 #[test]
1958 fn parse_for_with_empty_body_statement() {
1959 let mut parser = Parser::new(Lexer::new(
1960 r#"{ for (i = 1; i <= NF; s += $(i++)) ; print s }"#,
1961 ));
1962
1963 let program = parser.parse_program();
1964
1965 assert_eq!(
1966 r#"{ for (i = 1; i <= NF; s += $i++) { }; print s }"#,
1967 program.to_string()
1968 );
1969 }
1970
1971 #[test]
1972 fn parse_post_decrement_statement() {
1973 let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
1974
1975 let program = parser.parse_program();
1976
1977 assert_eq!(r#"{ k--; n-- }"#, program.to_string());
1978 }
1979
1980 #[test]
1981 fn parse_rand_expression() {
1982 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
1983
1984 let program = parser.parse_program();
1985
1986 assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
1987 }
1988
1989 #[test]
1990 fn parse_math_builtin_expressions() {
1991 let mut parser = Parser::new(Lexer::new(
1992 r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1993 ));
1994
1995 let program = parser.parse_program();
1996
1997 assert_eq!(
1998 r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1999 program.to_string()
2000 );
2001 }
2002
2003 #[test]
2004 fn parse_index_builtin_expression() {
2005 let mut parser = Parser::new(Lexer::new(r#"{ print index(1, $1) }"#));
2006
2007 let program = parser.parse_program();
2008
2009 assert_eq!(r#"{ print index(1, $1) }"#, program.to_string());
2010 }
2011
2012 #[test]
2013 fn parse_match_builtin_expression() {
2014 let mut parser = Parser::new(Lexer::new(r#"{ print match($NF, $1), RSTART, RLENGTH }"#));
2015
2016 let program = parser.parse_program();
2017
2018 assert_eq!(
2019 r#"{ print match($NF, $1), RSTART, RLENGTH }"#,
2020 program.to_string()
2021 );
2022 }
2023
2024 #[test]
2025 fn parse_in_membership_expression() {
2026 let mut parser = Parser::new(Lexer::new(r#"{ print 1 in x }"#));
2027
2028 let program = parser.parse_program();
2029
2030 assert_eq!(r#"{ print 1 in x }"#, program.to_string());
2031 }
2032
2033 #[test]
2034 fn parse_parenthesized_composite_membership_expression() {
2035 let mut parser = Parser::new(Lexer::new(r#"{ if (($0, $1) in x) print "yes" }"#));
2036
2037 let program = parser.parse_program();
2038
2039 assert_eq!(
2040 r#"{ if ($0, $1 in x) { print "yes" } }"#,
2041 program.to_string()
2042 );
2043 }
2044
2045 #[test]
2046 fn parse_for_loop_with_single_body_statement() {
2047 let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; i++) print $i }"#));
2048
2049 let program = parser.parse_program();
2050
2051 assert_eq!(
2052 r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
2053 program.to_string()
2054 );
2055 }
2056
2057 #[test]
2058 fn parse_if_with_single_statement_body() {
2059 let mut parser = Parser::new(Lexer::new(
2060 r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
2061 ));
2062
2063 let program = parser.parse_program();
2064
2065 assert_eq!(
2066 r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
2067 program.to_string()
2068 );
2069 }
2070
2071 #[test]
2072 fn parse_exit_statement() {
2073 let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
2074
2075 let program = parser.parse_program();
2076
2077 assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
2078 }
2079
2080 #[test]
2081 fn parse_exit_statement_with_status() {
2082 let mut parser = Parser::new(Lexer::new(r#"$1 < 5000 { exit NR }"#));
2083
2084 let program = parser.parse_program();
2085
2086 assert_eq!(r#"$1 < 5000 { exit NR }"#, program.to_string());
2087 }
2088
2089 #[test]
2090 fn parse_user_defined_function_call_statement() {
2091 let mut parser = Parser::new(Lexer::new(
2092 "BEGIN { myabort(1) }\nfunction myabort(n) { exit n }",
2093 ));
2094
2095 let program = parser.parse_program();
2096
2097 let definition = program
2098 .function_definition("myabort")
2099 .expect("expected function definition");
2100 assert_eq!(definition.parameters, vec!["n"]);
2101 assert_eq!(definition.statements.len(), 1);
2102 }
2103
2104 #[test]
2105 fn parse_delete_array_element_statement() {
2106 let mut parser = Parser::new(Lexer::new(r#"{ delete x[i, j] }"#));
2107
2108 let program = parser.parse_program();
2109
2110 assert_eq!(r#"{ delete x[i, j] }"#, program.to_string());
2111 }
2112
2113 #[test]
2114 fn parse_array_add_assignment_and_access() {
2115 let mut parser = Parser::new(Lexer::new(
2116 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2117 ));
2118
2119 let program = parser.parse_program();
2120
2121 assert_eq!(
2122 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2123 program.to_string()
2124 );
2125 }
2126
2127 #[test]
2128 fn parse_for_in_loop() {
2129 let mut parser = Parser::new(Lexer::new(
2130 r#"END { for (name in area) print name ":" area[name] }"#,
2131 ));
2132
2133 let program = parser.parse_program();
2134
2135 assert_eq!(
2136 r#"END { for (name in area) { print name ":" area[name] } }"#,
2137 program.to_string()
2138 );
2139 }
2140
2141 #[test]
2142 fn parse_print_redirection() {
2143 let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
2144
2145 let program = parser.parse_program();
2146
2147 assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
2148 }
2149
2150 #[test]
2151 fn parse_print_pipe() {
2152 let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
2153
2154 let program = parser.parse_program();
2155
2156 assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
2157 }
2158
2159 #[test]
2160 fn parse_hexadecimal_number() {
2161 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print 0xAA }"#));
2162
2163 let program = parser.parse_program();
2164
2165 assert_eq!(r#"BEGIN { print 0xAA }"#, program.to_string());
2166 }
2167}