Skip to main content

rawk_core/
parser.rs

1use crate::{
2    Lexer, Program,
3    ast::{Action, Expression, FunctionDefinition, Rule, Statement},
4    token::{Token, TokenKind},
5};
6
7#[derive(Debug)]
8pub struct Parser<'a> {
9    lexer: Lexer<'a>,
10    current_token: Token<'a>,
11    function_definitions: Vec<FunctionDefinition<'a>>,
12}
13
14impl<'a> Parser<'a> {
15    pub fn new(mut lexer: Lexer<'a>) -> Self {
16        let current_token = lexer.next_token_regex_aware();
17        Parser {
18            lexer,
19            current_token,
20            function_definitions: Vec::new(),
21        }
22    }
23
24    fn next_token(&mut self) {
25        self.current_token = self.lexer.next_token();
26    }
27
28    fn next_token_in_regex_context(&mut self) {
29        self.current_token = self.lexer.next_token_regex_aware();
30    }
31
32    fn is_eof(&self) -> bool {
33        self.current_token.kind == TokenKind::Eof
34    }
35
36    fn is_statement_terminator(&self) -> bool {
37        matches!(
38            self.current_token.kind,
39            TokenKind::Semicolon | TokenKind::NewLine | TokenKind::RightCurlyBrace | TokenKind::Eof
40        )
41    }
42
43    fn token_is_immediately_after(&self, previous: &Token<'a>) -> bool {
44        self.current_token.span.start == previous.span.start + previous.literal.len()
45    }
46
47    fn parse_array_index_expression(&mut self) -> Expression<'a> {
48        let mut index = self.parse_expression();
49        while self.current_token.kind == TokenKind::Comma {
50            let operator = self.current_token.clone();
51            self.next_token_in_regex_context();
52            let right = self.parse_expression();
53            index = Expression::Infix {
54                left: Box::new(index),
55                operator,
56                right: Box::new(right),
57            };
58        }
59        index
60    }
61
62    fn parse_next_rule(&mut self) -> Option<Rule<'a>> {
63        match &self.current_token.kind {
64            TokenKind::Begin => {
65                self.next_token();
66                match self.parse_action() {
67                    Rule::Action(action) => Some(Rule::Begin(action)),
68                    _ => panic!("Expected action after BEGIN"),
69                }
70            }
71            TokenKind::NewLine => {
72                self.next_token_in_regex_context();
73                self.parse_next_rule()
74            }
75            TokenKind::Eof => None,
76            TokenKind::LeftCurlyBrace => Some(self.parse_action()),
77            TokenKind::Function => {
78                self.parse_function_definition();
79                None
80            }
81            TokenKind::End => {
82                self.next_token();
83                match self.parse_action() {
84                    Rule::Action(action) => Some(Rule::End(action)),
85                    _ => panic!("Expected action after END"),
86                }
87            }
88            TokenKind::Regex
89            | TokenKind::String
90            | TokenKind::Number
91            | TokenKind::DollarSign
92            | TokenKind::LeftParen
93            | TokenKind::Identifier
94            | TokenKind::Cos
95            | TokenKind::Exp
96            | TokenKind::Index
97            | TokenKind::Int
98            | TokenKind::Length
99            | TokenKind::Log
100            | TokenKind::Rand
101            | TokenKind::Sin
102            | TokenKind::Sprintf
103            | TokenKind::Split
104            | TokenKind::Sqrt
105            | TokenKind::Srand
106            | TokenKind::Substr
107            | TokenKind::Increment
108            | TokenKind::Decrement => self.parse_pattern_rule(),
109            _ => panic!(
110                "parse_next_rule not yet implemented, found token: {:?}",
111                self.current_token
112            ),
113        }
114    }
115
116    fn parse_pattern_rule(&mut self) -> Option<Rule<'a>> {
117        let mut pattern = self.parse_expression();
118        if self.current_token.kind == TokenKind::Comma {
119            let operator = self.current_token.clone();
120            self.next_token_in_regex_context();
121            let right = self.parse_expression();
122            pattern = Expression::Infix {
123                left: Box::new(pattern),
124                operator,
125                right: Box::new(right),
126            };
127        }
128        let pattern = Some(pattern);
129
130        if self.current_token.kind == TokenKind::LeftCurlyBrace {
131            match self.parse_action() {
132                Rule::Action(action) => Some(Rule::PatternAction {
133                    pattern,
134                    action: Some(action),
135                }),
136                _ => panic!("Expected action after pattern"),
137            }
138        } else {
139            Some(Rule::PatternAction {
140                pattern,
141                action: None,
142            })
143        }
144    }
145
146    fn parse_action(&mut self) -> Rule<'a> {
147        self.next_token(); // consume '{'
148
149        let pattern = None;
150
151        let mut statements = Vec::new();
152        while self.current_token.kind != TokenKind::RightCurlyBrace
153            && self.current_token.kind != TokenKind::Eof
154        {
155            while self.current_token.kind == TokenKind::NewLine
156                || self.current_token.kind == TokenKind::Semicolon
157            {
158                self.next_token();
159            }
160
161            if self.current_token.kind == TokenKind::RightCurlyBrace
162                || self.current_token.kind == TokenKind::Eof
163            {
164                break;
165            }
166
167            statements.push(self.parse_statement());
168        }
169
170        if pattern.is_some() {
171            Rule::PatternAction {
172                pattern,
173                action: Some(Action { statements }),
174            }
175        } else {
176            Rule::Action(Action { statements })
177        }
178    }
179
180    fn parse_statement(&mut self) -> Statement<'a> {
181        match self.current_token.kind {
182            TokenKind::Print => self.parse_print_function(),
183            TokenKind::Printf => self.parse_printf_function(),
184            TokenKind::System => self.parse_system_function(),
185            TokenKind::Split => self.parse_split_statement(),
186            TokenKind::Sub => self.parse_sub_function(),
187            TokenKind::Gsub => self.parse_gsub_function(),
188            TokenKind::Break => self.parse_break_statement(),
189            TokenKind::Continue => self.parse_continue_statement(),
190            TokenKind::Delete => self.parse_delete_statement(),
191            TokenKind::If => self.parse_if_statement(),
192            TokenKind::Do => self.parse_do_statement(),
193            TokenKind::While => self.parse_while_statement(),
194            TokenKind::For => self.parse_for_statement(),
195            TokenKind::Return => self.parse_return_statement(),
196            TokenKind::Next => self.parse_next_statement(),
197            TokenKind::Exit => self.parse_exit_statement(),
198            TokenKind::Identifier => self.parse_assignment_statement(),
199            TokenKind::DollarSign => self.parse_field_assignment_statement(),
200            TokenKind::Increment => self.parse_pre_increment_statement(),
201            TokenKind::Decrement => self.parse_pre_decrement_statement(),
202            _ => todo!(),
203        }
204    }
205
206    fn parse_function_definition(&mut self) {
207        self.next_token();
208        if self.current_token.kind != TokenKind::Identifier {
209            todo!()
210        }
211        let name = self.current_token.literal;
212        self.next_token();
213        if self.current_token.kind != TokenKind::LeftParen {
214            todo!()
215        }
216        self.next_token();
217
218        let mut parameters = Vec::new();
219        while self.current_token.kind != TokenKind::RightParen {
220            if self.current_token.kind != TokenKind::Identifier {
221                todo!()
222            }
223            parameters.push(self.current_token.literal);
224            self.next_token();
225            if self.current_token.kind == TokenKind::Comma {
226                self.next_token();
227            } else if self.current_token.kind != TokenKind::RightParen {
228                todo!()
229            }
230        }
231
232        self.next_token();
233        while self.current_token.kind == TokenKind::NewLine {
234            self.next_token();
235        }
236        if self.current_token.kind != TokenKind::LeftCurlyBrace {
237            todo!()
238        }
239
240        let mut statements = Vec::new();
241        self.next_token(); // consume '{'
242        while self.current_token.kind != TokenKind::RightCurlyBrace
243            && self.current_token.kind != TokenKind::Eof
244        {
245            while self.current_token.kind == TokenKind::NewLine
246                || self.current_token.kind == TokenKind::Semicolon
247            {
248                self.next_token();
249            }
250
251            if self.current_token.kind == TokenKind::RightCurlyBrace
252                || self.current_token.kind == TokenKind::Eof
253            {
254                break;
255            }
256
257            statements.push(self.parse_statement());
258        }
259        self.function_definitions.push(FunctionDefinition {
260            name,
261            parameters,
262            statements,
263        });
264    }
265
266    fn parse_assignment_statement(&mut self) -> Statement<'a> {
267        let identifier = self.current_token.clone();
268        self.next_token();
269        self.parse_assignment_statement_with_identifier(identifier)
270    }
271
272    fn parse_assignment_statement_with_identifier(&mut self, identifier: Token<'a>) -> Statement<'a> {
273        if self.current_token.kind == TokenKind::LeftParen
274            && self.token_is_immediately_after(&identifier)
275        {
276            let args = self.parse_call_arguments();
277            return Statement::Expression(Expression::FunctionCall {
278                name: identifier.literal,
279                args,
280            });
281        }
282        if self.current_token.kind == TokenKind::LeftSquareBracket {
283            self.next_token_in_regex_context();
284            let index = self.parse_array_index_expression();
285            if self.current_token.kind != TokenKind::RightSquareBracket {
286                todo!()
287            }
288            self.next_token();
289            if self.current_token.kind == TokenKind::Assign {
290                self.next_token();
291                let value = self.parse_expression();
292                return Statement::ArrayAssignment {
293                    identifier: identifier.literal,
294                    index,
295                    value,
296                };
297            }
298            if self.current_token.kind == TokenKind::AddAssign {
299                self.next_token();
300                let value = self.parse_expression();
301                return Statement::ArrayAddAssignment {
302                    identifier: identifier.literal,
303                    index,
304                    value,
305                };
306            }
307            if self.current_token.kind == TokenKind::Increment {
308                self.next_token();
309                return Statement::ArrayPostIncrement {
310                    identifier: identifier.literal,
311                    index,
312                };
313            }
314            if self.current_token.kind == TokenKind::Decrement {
315                self.next_token();
316                return Statement::ArrayPostDecrement {
317                    identifier: identifier.literal,
318                    index,
319                };
320            }
321            todo!()
322        }
323        if self.current_token.kind == TokenKind::Assign {
324            self.next_token();
325            if self.current_token.kind == TokenKind::Split {
326                return self.parse_split_assignment_statement(identifier.literal);
327            }
328            let value = self.parse_expression();
329            Statement::Assignment {
330                identifier: identifier.literal,
331                value,
332            }
333        } else if self.current_token.kind == TokenKind::Increment {
334            self.next_token();
335            Statement::PostIncrement {
336                identifier: identifier.literal,
337            }
338        } else if self.current_token.kind == TokenKind::Decrement {
339            self.next_token();
340            Statement::PostDecrement {
341                identifier: identifier.literal,
342            }
343        } else if self.current_token.kind == TokenKind::AddAssign {
344            self.next_token();
345            let value = self.parse_expression();
346            Statement::AddAssignment {
347                identifier: identifier.literal,
348                value,
349            }
350        } else if matches!(
351            self.current_token.kind,
352            TokenKind::SubtractAssign
353                | TokenKind::MultiplyAssign
354                | TokenKind::DivideAssign
355                | TokenKind::ModuloAssign
356                | TokenKind::PowerAssign
357        ) {
358            let assign_token = self.current_token.clone();
359            self.next_token();
360            let right_value = self.parse_expression();
361            Statement::Assignment {
362                identifier: identifier.literal,
363                value: Expression::Infix {
364                    left: Box::new(Expression::Identifier(identifier.literal)),
365                    operator: compound_assign_operator(&assign_token),
366                    right: Box::new(right_value),
367                },
368            }
369        } else {
370            todo!()
371        }
372    }
373
374    fn parse_delete_statement(&mut self) -> Statement<'a> {
375        self.next_token();
376        if self.current_token.kind != TokenKind::Identifier {
377            todo!()
378        }
379        let identifier = self.current_token.literal;
380        self.next_token();
381        if self.current_token.kind != TokenKind::LeftSquareBracket {
382            return Statement::Delete {
383                identifier,
384                index: None,
385            };
386        }
387
388        self.next_token_in_regex_context();
389        let index = self.parse_array_index_expression();
390        if self.current_token.kind != TokenKind::RightSquareBracket {
391            todo!()
392        }
393        self.next_token();
394        Statement::Delete {
395            identifier,
396            index: Some(index),
397        }
398    }
399
400    fn parse_break_statement(&mut self) -> Statement<'a> {
401        self.next_token();
402        Statement::Break
403    }
404
405    fn parse_continue_statement(&mut self) -> Statement<'a> {
406        self.next_token();
407        Statement::Continue
408    }
409
410    fn parse_pre_increment_statement(&mut self) -> Statement<'a> {
411        self.next_token();
412        if self.current_token.kind != TokenKind::Identifier {
413            todo!()
414        }
415        let identifier = self.current_token.literal;
416        self.next_token();
417        Statement::PreIncrement { identifier }
418    }
419
420    fn parse_pre_decrement_statement(&mut self) -> Statement<'a> {
421        self.next_token();
422        if self.current_token.kind != TokenKind::Identifier {
423            todo!()
424        }
425        let identifier = self.current_token.literal;
426        self.next_token();
427        Statement::PreDecrement { identifier }
428    }
429
430    fn parse_split_assignment_statement(&mut self, identifier: &'a str) -> Statement<'a> {
431        self.next_token();
432        if self.current_token.kind != TokenKind::LeftParen {
433            todo!()
434        }
435        self.next_token_in_regex_context();
436        let string = self.parse_expression();
437        if self.current_token.kind != TokenKind::Comma {
438            todo!()
439        }
440        self.next_token();
441        if self.current_token.kind != TokenKind::Identifier {
442            todo!()
443        }
444        let array = self.current_token.literal;
445        self.next_token();
446        if self.current_token.kind != TokenKind::RightParen {
447            todo!()
448        }
449        self.next_token();
450        Statement::SplitAssignment {
451            identifier,
452            string,
453            array,
454        }
455    }
456
457    fn parse_split_statement(&mut self) -> Statement<'a> {
458        self.next_token();
459        if self.current_token.kind != TokenKind::LeftParen {
460            todo!()
461        }
462        self.next_token_in_regex_context();
463        let string = self.parse_expression();
464        if self.current_token.kind != TokenKind::Comma {
465            todo!()
466        }
467        self.next_token();
468        if self.current_token.kind != TokenKind::Identifier {
469            todo!()
470        }
471        let array = self.current_token.literal;
472        self.next_token();
473        if self.current_token.kind != TokenKind::RightParen {
474            todo!()
475        }
476        self.next_token();
477        Statement::Split { string, array }
478    }
479
480    fn parse_field_assignment_statement(&mut self) -> Statement<'a> {
481        self.next_token();
482        let field = self.parse_primary_expression();
483        let assign_token = self.current_token.clone();
484        self.next_token();
485        let right_value = self.parse_expression();
486
487        let value = if assign_token.kind == TokenKind::Assign {
488            right_value
489        } else {
490            let operator = compound_assign_operator(&assign_token);
491            Expression::Infix {
492                left: Box::new(Expression::Field(Box::new(field.clone()))),
493                operator,
494                right: Box::new(right_value),
495            }
496        };
497        Statement::FieldAssignment { field, value }
498    }
499
500    fn parse_if_statement(&mut self) -> Statement<'a> {
501        self.next_token();
502        if self.current_token.kind != TokenKind::LeftParen {
503            todo!()
504        }
505        self.next_token_in_regex_context();
506        let condition = self.parse_expression();
507        if self.current_token.kind != TokenKind::RightParen {
508            todo!()
509        }
510        self.next_token();
511        while self.current_token.kind == TokenKind::NewLine
512            || self.current_token.kind == TokenKind::Semicolon
513        {
514            self.next_token();
515        }
516        let then_statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
517            self.parse_statement_block()
518        } else {
519            vec![self.parse_statement()]
520        };
521
522        while self.current_token.kind == TokenKind::NewLine
523            || self.current_token.kind == TokenKind::Semicolon
524        {
525            self.next_token();
526        }
527
528        if self.current_token.kind == TokenKind::Else {
529            self.next_token();
530            while self.current_token.kind == TokenKind::NewLine
531                || self.current_token.kind == TokenKind::Semicolon
532            {
533                self.next_token();
534            }
535            let else_statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
536                self.parse_statement_block()
537            } else {
538                vec![self.parse_statement()]
539            };
540            return Statement::IfElse {
541                condition,
542                then_statements,
543                else_statements,
544            };
545        }
546
547        Statement::If {
548            condition,
549            then_statements,
550        }
551    }
552
553    fn parse_exit_statement(&mut self) -> Statement<'a> {
554        self.next_token();
555        let status = if self.is_statement_terminator() {
556            None
557        } else {
558            Some(self.parse_expression())
559        };
560        Statement::Exit(status)
561    }
562
563    fn parse_return_statement(&mut self) -> Statement<'a> {
564        self.next_token();
565        let value = if self.is_statement_terminator() {
566            None
567        } else {
568            Some(self.parse_expression())
569        };
570        Statement::Return(value)
571    }
572
573    fn parse_next_statement(&mut self) -> Statement<'a> {
574        self.next_token();
575        Statement::Next
576    }
577
578    fn parse_statement_block(&mut self) -> Vec<Statement<'a>> {
579        self.next_token(); // consume '{'
580        let mut statements = Vec::new();
581        while self.current_token.kind != TokenKind::RightCurlyBrace
582            && self.current_token.kind != TokenKind::Eof
583        {
584            while self.current_token.kind == TokenKind::NewLine
585                || self.current_token.kind == TokenKind::Semicolon
586            {
587                self.next_token();
588            }
589
590            if self.current_token.kind == TokenKind::RightCurlyBrace
591                || self.current_token.kind == TokenKind::Eof
592            {
593                break;
594            }
595            statements.push(self.parse_statement());
596        }
597        if self.current_token.kind == TokenKind::RightCurlyBrace {
598            self.next_token();
599        }
600        statements
601    }
602
603    fn parse_while_statement(&mut self) -> Statement<'a> {
604        self.next_token();
605        if self.current_token.kind != TokenKind::LeftParen {
606            todo!()
607        }
608        self.next_token_in_regex_context();
609        let condition = self.parse_expression();
610        if self.current_token.kind != TokenKind::RightParen {
611            todo!()
612        }
613        self.next_token();
614        while self.current_token.kind == TokenKind::NewLine
615            || self.current_token.kind == TokenKind::Semicolon
616        {
617            self.next_token();
618        }
619
620        let statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
621            self.parse_statement_block()
622        } else {
623            vec![self.parse_statement()]
624        };
625        Statement::While {
626            condition,
627            statements,
628        }
629    }
630
631    fn parse_do_statement(&mut self) -> Statement<'a> {
632        self.next_token();
633        while self.current_token.kind == TokenKind::NewLine
634            || self.current_token.kind == TokenKind::Semicolon
635        {
636            self.next_token();
637        }
638
639        let statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
640            self.parse_statement_block()
641        } else {
642            vec![self.parse_statement()]
643        };
644
645        while self.current_token.kind == TokenKind::NewLine
646            || self.current_token.kind == TokenKind::Semicolon
647        {
648            self.next_token();
649        }
650
651        if self.current_token.kind != TokenKind::While {
652            todo!()
653        }
654        self.next_token();
655        if self.current_token.kind != TokenKind::LeftParen {
656            todo!()
657        }
658        self.next_token_in_regex_context();
659        let condition = self.parse_expression();
660        if self.current_token.kind != TokenKind::RightParen {
661            todo!()
662        }
663        self.next_token();
664        Statement::DoWhile {
665            condition,
666            statements,
667        }
668    }
669
670    fn parse_for_statement(&mut self) -> Statement<'a> {
671        self.next_token();
672        if self.current_token.kind != TokenKind::LeftParen {
673            todo!()
674        }
675        self.next_token();
676        while self.current_token.kind == TokenKind::NewLine {
677            self.next_token();
678        }
679
680        let init = if self.current_token.kind == TokenKind::Semicolon {
681            Statement::Empty
682        } else if self.current_token.kind == TokenKind::Identifier {
683            let variable = self.current_token.clone();
684            self.next_token();
685            if self.current_token.kind == TokenKind::In {
686                self.next_token();
687                if self.current_token.kind != TokenKind::Identifier {
688                    todo!()
689                }
690                let array = self.current_token.literal;
691                self.next_token();
692                if self.current_token.kind != TokenKind::RightParen {
693                    todo!()
694                }
695                self.next_token();
696                while self.current_token.kind == TokenKind::NewLine
697                    || self.current_token.kind == TokenKind::Semicolon
698                {
699                    self.next_token();
700                }
701                let statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
702                    self.parse_statement_block()
703                } else {
704                    vec![self.parse_statement()]
705                };
706                return Statement::ForIn {
707                    variable: variable.literal,
708                    array,
709                    statements,
710                };
711            }
712            self.parse_assignment_statement_with_identifier(variable)
713        } else {
714            self.parse_statement()
715        };
716        while self.current_token.kind == TokenKind::NewLine {
717            self.next_token();
718        }
719        if self.current_token.kind != TokenKind::Semicolon {
720            todo!()
721        }
722        self.next_token_in_regex_context();
723        while self.current_token.kind == TokenKind::NewLine {
724            self.next_token_in_regex_context();
725        }
726
727        let condition = if self.current_token.kind == TokenKind::Semicolon {
728            Expression::Number(1.0)
729        } else {
730            self.parse_expression()
731        };
732        while self.current_token.kind == TokenKind::NewLine {
733            self.next_token();
734        }
735        if self.current_token.kind != TokenKind::Semicolon {
736            todo!()
737        }
738        self.next_token_in_regex_context();
739        while self.current_token.kind == TokenKind::NewLine {
740            self.next_token_in_regex_context();
741        }
742
743        let update = if self.current_token.kind == TokenKind::RightParen {
744            Statement::Empty
745        } else {
746            self.parse_statement()
747        };
748        while self.current_token.kind == TokenKind::NewLine {
749            self.next_token();
750        }
751        if self.current_token.kind != TokenKind::RightParen {
752            todo!()
753        }
754        self.next_token();
755
756        while self.current_token.kind == TokenKind::NewLine
757            || self.current_token.kind == TokenKind::Semicolon
758        {
759            self.next_token();
760        }
761
762        let statements = if self.current_token.kind == TokenKind::LeftCurlyBrace {
763            self.parse_statement_block()
764        } else {
765            vec![self.parse_statement()]
766        };
767
768        Statement::For {
769            init: Box::new(init),
770            condition,
771            update: Box::new(update),
772            statements,
773        }
774    }
775
776    fn parse_print_function(&mut self) -> Statement<'a> {
777        let mut expressions = Vec::new();
778        let mut expect_more = false;
779        self.next_token();
780
781        loop {
782            if self.current_token.kind == TokenKind::RightCurlyBrace
783                || self.current_token.kind == TokenKind::RightParen
784                || self.current_token.kind == TokenKind::Eof
785                || self.current_token.kind == TokenKind::GreaterThan
786                || self.current_token.kind == TokenKind::Append
787                || self.current_token.kind == TokenKind::Pipe
788            {
789                break;
790            }
791
792            if self.current_token.kind == TokenKind::NewLine
793                || self.current_token.kind == TokenKind::Semicolon
794            {
795                if expect_more {
796                    self.next_token();
797                    continue;
798                }
799                break;
800            }
801
802            if self.current_token.kind == TokenKind::Comma {
803                self.next_token();
804                expect_more = true;
805                continue;
806            }
807
808            let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
809            let expression = self.parse_expression();
810            expressions.push(expression);
811            if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
812                while self.current_token.kind == TokenKind::Comma {
813                    self.next_token();
814                    expressions.push(self.parse_expression());
815                }
816                if self.current_token.kind != TokenKind::RightParen {
817                    todo!()
818                }
819                self.next_token();
820            }
821            expect_more = false;
822        }
823        if self.current_token.kind == TokenKind::RightParen {
824            self.next_token();
825        }
826
827        if self.current_token.kind == TokenKind::GreaterThan
828            || self.current_token.kind == TokenKind::Append
829        {
830            let append = self.current_token.kind == TokenKind::Append;
831            self.next_token();
832            let target = self.parse_expression();
833            return Statement::PrintRedirect {
834                expressions,
835                target,
836                append,
837            };
838        }
839        if self.current_token.kind == TokenKind::Pipe {
840            self.next_token();
841            let target = self.parse_expression();
842            return Statement::PrintPipe {
843                expressions,
844                target,
845            };
846        }
847
848        Statement::Print(expressions)
849    }
850
851    fn parse_printf_function(&mut self) -> Statement<'a> {
852        self.next_token();
853        let expressions = if self.current_token.kind == TokenKind::LeftParen {
854            self.next_token_in_regex_context();
855            let mut expressions = Vec::new();
856            while self.current_token.kind != TokenKind::RightParen
857                && self.current_token.kind != TokenKind::Eof
858            {
859                if self.current_token.kind == TokenKind::Comma {
860                    self.next_token();
861                    continue;
862                }
863                expressions.push(self.parse_expression());
864            }
865            if self.current_token.kind == TokenKind::RightParen {
866                self.next_token();
867            }
868            expressions
869        } else {
870            self.parse_expression_list_until_action_end_from_current()
871        };
872
873        Statement::Printf(expressions)
874    }
875
876    fn parse_gsub_function(&mut self) -> Statement<'a> {
877        self.next_token();
878        if self.current_token.kind != TokenKind::LeftParen {
879            todo!()
880        }
881
882        self.next_token_in_regex_context();
883        let pattern = self.parse_expression();
884
885        if self.current_token.kind != TokenKind::Comma {
886            todo!()
887        }
888        self.next_token();
889        let replacement = self.parse_expression();
890
891        let target = if self.current_token.kind == TokenKind::Comma {
892            self.next_token();
893            Some(self.parse_expression())
894        } else {
895            None
896        };
897
898        if self.current_token.kind != TokenKind::RightParen {
899            todo!()
900        }
901        self.next_token();
902
903        Statement::Gsub {
904            pattern,
905            replacement,
906            target,
907        }
908    }
909
910    fn parse_sub_function(&mut self) -> Statement<'a> {
911        self.next_token();
912        if self.current_token.kind != TokenKind::LeftParen {
913            todo!()
914        }
915
916        self.next_token_in_regex_context();
917        let pattern = self.parse_expression();
918
919        if self.current_token.kind != TokenKind::Comma {
920            todo!()
921        }
922        self.next_token();
923        let replacement = self.parse_expression();
924
925        if self.current_token.kind == TokenKind::Comma {
926            todo!()
927        }
928
929        if self.current_token.kind != TokenKind::RightParen {
930            todo!()
931        }
932        self.next_token();
933
934        Statement::Sub {
935            pattern,
936            replacement,
937        }
938    }
939
940    fn parse_system_function(&mut self) -> Statement<'a> {
941        self.next_token();
942        if self.current_token.kind != TokenKind::LeftParen {
943            todo!()
944        }
945        self.next_token();
946        let command = self.parse_expression();
947        if self.current_token.kind != TokenKind::RightParen {
948            todo!()
949        }
950        self.next_token();
951        Statement::System(command)
952    }
953
954    fn parse_expression_list_until_action_end_from_current(&mut self) -> Vec<Expression<'a>> {
955        let mut expressions = Vec::new();
956        let mut expect_more = false;
957
958        loop {
959            if self.current_token.kind == TokenKind::RightCurlyBrace
960                || self.current_token.kind == TokenKind::RightParen
961                || self.current_token.kind == TokenKind::Eof
962            {
963                break;
964            }
965
966            if self.current_token.kind == TokenKind::NewLine
967                || self.current_token.kind == TokenKind::Semicolon
968            {
969                if expect_more {
970                    self.next_token();
971                    continue;
972                }
973                break;
974            }
975
976            if self.current_token.kind == TokenKind::Comma {
977                self.next_token();
978                expect_more = true;
979                continue;
980            }
981
982            let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
983            let expression = self.parse_expression();
984            expressions.push(expression);
985            if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
986                while self.current_token.kind == TokenKind::Comma {
987                    self.next_token();
988                    expressions.push(self.parse_expression());
989                }
990                if self.current_token.kind != TokenKind::RightParen {
991                    todo!()
992                }
993                self.next_token();
994            }
995            expect_more = false;
996        }
997
998        if self.current_token.kind == TokenKind::RightParen {
999            self.next_token();
1000        }
1001
1002        expressions
1003    }
1004
1005    fn parse_expression(&mut self) -> Expression<'a> {
1006        self.parse_expression_with_min_precedence(0)
1007    }
1008
1009    fn parse_expression_with_min_precedence(&mut self, min_precedence: u8) -> Expression<'a> {
1010        const CONCAT_LEFT_PRECEDENCE: u8 = 6;
1011        const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
1012        let mut left = self.parse_primary_expression();
1013
1014        loop {
1015            if self.current_token.kind == TokenKind::QuestionMark {
1016                if min_precedence > 0 {
1017                    break;
1018                }
1019                self.next_token_in_regex_context();
1020                let then_expr = self.parse_expression_with_min_precedence(0);
1021                if self.current_token.kind != TokenKind::Colon {
1022                    todo!()
1023                }
1024                self.next_token_in_regex_context();
1025                let else_expr = self.parse_expression_with_min_precedence(0);
1026                left = Expression::Ternary {
1027                    condition: Box::new(left),
1028                    then_expr: Box::new(then_expr),
1029                    else_expr: Box::new(else_expr),
1030                };
1031                continue;
1032            }
1033
1034            if infix_operator_precedence(&self.current_token.kind).is_none()
1035                && is_expression_start(&self.current_token.kind)
1036            {
1037                if CONCAT_LEFT_PRECEDENCE < min_precedence {
1038                    break;
1039                }
1040
1041                let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE);
1042                left = Expression::Concatenation {
1043                    left: Box::new(left),
1044                    right: Box::new(right),
1045                };
1046                continue;
1047            }
1048
1049            let (left_precedence, right_precedence) =
1050                match infix_operator_precedence(&self.current_token.kind) {
1051                    Some(value) => value,
1052                    None => break,
1053                };
1054
1055            if left_precedence < min_precedence {
1056                break;
1057            }
1058
1059            let operator = self.current_token.clone();
1060            if matches!(
1061                operator.kind,
1062                TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
1063            ) {
1064                self.next_token_in_regex_context();
1065            } else {
1066                self.next_token();
1067            }
1068            let right = self.parse_expression_with_min_precedence(right_precedence);
1069
1070            left = Expression::Infix {
1071                left: Box::new(left),
1072                operator,
1073                right: Box::new(right),
1074            };
1075        }
1076
1077        left
1078    }
1079
1080    fn parse_primary_expression(&mut self) -> Expression<'a> {
1081        if self.current_token.kind == TokenKind::Minus {
1082            let operator = self.current_token.clone();
1083            self.next_token();
1084            let right = self.parse_primary_expression();
1085            return Expression::Infix {
1086                left: Box::new(Expression::Number(0.0)),
1087                operator,
1088                right: Box::new(right),
1089            };
1090        }
1091        if self.current_token.kind == TokenKind::Plus {
1092            self.next_token();
1093            return self.parse_primary_expression();
1094        }
1095        if self.current_token.kind == TokenKind::ExclamationMark {
1096            self.next_token_in_regex_context();
1097            let expression = self.parse_primary_expression();
1098            return Expression::Not(Box::new(expression));
1099        }
1100        if self.current_token.kind == TokenKind::Increment {
1101            self.next_token();
1102            let expression = self.parse_primary_expression();
1103            return Expression::PreIncrement(Box::new(expression));
1104        }
1105        if self.current_token.kind == TokenKind::Decrement {
1106            self.next_token();
1107            let expression = self.parse_primary_expression();
1108            return Expression::PreDecrement(Box::new(expression));
1109        }
1110
1111        let mut expression = self.parse_primary_atom();
1112        if self.current_token.kind == TokenKind::Increment {
1113            self.next_token();
1114            expression = Expression::PostIncrement(Box::new(expression));
1115        } else if self.current_token.kind == TokenKind::Decrement {
1116            self.next_token();
1117            expression = Expression::PostDecrement(Box::new(expression));
1118        }
1119        expression
1120    }
1121
1122    fn parse_primary_atom(&mut self) -> Expression<'a> {
1123        match self.current_token.kind {
1124            TokenKind::String => {
1125                let expression = Expression::String(self.current_token.literal);
1126                self.next_token();
1127                expression
1128            }
1129            TokenKind::Regex => {
1130                let expression = Expression::Regex(self.current_token.literal);
1131                self.next_token();
1132                expression
1133            }
1134            TokenKind::Number => {
1135                let expression = if let Ok(value) = self.current_token.literal.parse::<f64>() {
1136                    Expression::Number(value)
1137                } else {
1138                    todo!()
1139                };
1140                self.next_token();
1141                expression
1142            }
1143            TokenKind::DollarSign => {
1144                self.next_token();
1145                let expression = self.parse_primary_atom();
1146                Expression::Field(Box::new(expression))
1147            }
1148            TokenKind::LeftParen => {
1149                self.next_token();
1150                let expression = self.parse_expression();
1151                if self.current_token.kind == TokenKind::RightParen {
1152                    self.next_token();
1153                }
1154                expression
1155            }
1156            TokenKind::Identifier => {
1157                let identifier = self.current_token.clone();
1158                self.next_token();
1159                if self.current_token.kind == TokenKind::LeftParen
1160                    && self.token_is_immediately_after(&identifier)
1161                {
1162                    let args = self.parse_call_arguments();
1163                    return Expression::FunctionCall {
1164                        name: identifier.literal,
1165                        args,
1166                    };
1167                }
1168                if self.current_token.kind == TokenKind::LeftSquareBracket {
1169                    self.next_token_in_regex_context();
1170                    let index = self.parse_array_index_expression();
1171                    if self.current_token.kind != TokenKind::RightSquareBracket {
1172                        todo!()
1173                    }
1174                    self.next_token();
1175                    Expression::ArrayAccess {
1176                        identifier: identifier.literal,
1177                        index: Box::new(index),
1178                    }
1179                } else {
1180                    Expression::Identifier(identifier.literal)
1181                }
1182            }
1183            TokenKind::Length => {
1184                self.next_token();
1185                if self.current_token.kind == TokenKind::LeftParen {
1186                    self.next_token();
1187                    if self.current_token.kind == TokenKind::RightParen {
1188                        self.next_token();
1189                        Expression::Length(None)
1190                    } else {
1191                        let expression = self.parse_expression();
1192                        if self.current_token.kind != TokenKind::RightParen {
1193                            todo!()
1194                        }
1195                        self.next_token();
1196                        Expression::Length(Some(Box::new(expression)))
1197                    }
1198                } else {
1199                    Expression::Length(None)
1200                }
1201            }
1202            TokenKind::Substr => {
1203                self.next_token();
1204                if self.current_token.kind != TokenKind::LeftParen {
1205                    todo!()
1206                }
1207                self.next_token();
1208                let string = self.parse_expression();
1209                if self.current_token.kind != TokenKind::Comma {
1210                    todo!()
1211                }
1212                self.next_token();
1213                let start = self.parse_expression();
1214                let mut length = None;
1215                if self.current_token.kind == TokenKind::Comma {
1216                    self.next_token();
1217                    length = Some(Box::new(self.parse_expression()));
1218                }
1219                if self.current_token.kind != TokenKind::RightParen {
1220                    todo!()
1221                }
1222                self.next_token();
1223                Expression::Substr {
1224                    string: Box::new(string),
1225                    start: Box::new(start),
1226                    length,
1227                }
1228            }
1229            TokenKind::Rand => {
1230                self.next_token();
1231                if self.current_token.kind == TokenKind::LeftParen {
1232                    self.next_token();
1233                    if self.current_token.kind != TokenKind::RightParen {
1234                        todo!()
1235                    }
1236                    self.next_token();
1237                }
1238                Expression::Rand
1239            }
1240            TokenKind::Cos
1241            | TokenKind::Exp
1242            | TokenKind::Index
1243            | TokenKind::Int
1244            | TokenKind::Log
1245            | TokenKind::Sin
1246            | TokenKind::Sprintf
1247            | TokenKind::Split
1248            | TokenKind::Sqrt
1249            | TokenKind::Srand => {
1250                let name = self.current_token.literal;
1251                self.next_token();
1252                if self.current_token.kind == TokenKind::LeftParen {
1253                    let args = self.parse_call_arguments();
1254                    return Expression::FunctionCall { name, args };
1255                }
1256                Expression::Number(0.0)
1257            }
1258            _ => {
1259                panic!(
1260                    "parse_primary_expression not yet implemented, found token: {:?}",
1261                    self.current_token
1262                )
1263            }
1264        }
1265    }
1266
1267    pub fn parse_program(&mut self) -> Program<'_> {
1268        let mut program = Program::new();
1269
1270        while !self.is_eof() {
1271            match self.parse_next_rule() {
1272                Some(Rule::Begin(action)) => program.add_begin_block(action),
1273                Some(Rule::End(action)) => program.add_end_block(action),
1274                Some(rule) => program.add_rule(rule),
1275                None => {}
1276            }
1277            self.next_token_in_regex_context();
1278        }
1279
1280        for definition in self.function_definitions.drain(..) {
1281            program.add_function_definition(definition);
1282        }
1283
1284        program
1285    }
1286
1287    fn parse_call_arguments(&mut self) -> Vec<Expression<'a>> {
1288        if self.current_token.kind != TokenKind::LeftParen {
1289            return vec![];
1290        }
1291        self.next_token_in_regex_context();
1292        let mut args = Vec::new();
1293        while self.current_token.kind != TokenKind::RightParen
1294            && self.current_token.kind != TokenKind::Eof
1295        {
1296            if self.current_token.kind == TokenKind::Comma {
1297                self.next_token();
1298                continue;
1299            }
1300            args.push(self.parse_expression());
1301        }
1302        if self.current_token.kind == TokenKind::RightParen {
1303            self.next_token();
1304        }
1305        args
1306    }
1307}
1308
1309fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
1310    match kind {
1311        TokenKind::Assign
1312        | TokenKind::AddAssign
1313        | TokenKind::SubtractAssign
1314        | TokenKind::MultiplyAssign
1315        | TokenKind::DivideAssign
1316        | TokenKind::ModuloAssign
1317        | TokenKind::PowerAssign => Some((0, 0)),
1318        TokenKind::Or => Some((1, 2)),
1319        TokenKind::And => Some((3, 4)),
1320        TokenKind::Equal
1321        | TokenKind::NotEqual
1322        | TokenKind::GreaterThan
1323        | TokenKind::GreaterThanOrEqual
1324        | TokenKind::In
1325        | TokenKind::LessThan
1326        | TokenKind::LessThanOrEqual
1327        | TokenKind::Tilde
1328        | TokenKind::NoMatch => Some((5, 6)),
1329        TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
1330        TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
1331        TokenKind::Caret => Some((13, 12)),
1332        _ => None,
1333    }
1334}
1335
1336fn is_expression_start(kind: &TokenKind) -> bool {
1337    matches!(
1338        kind,
1339        TokenKind::String
1340            | TokenKind::Regex
1341            | TokenKind::Number
1342            | TokenKind::DollarSign
1343            | TokenKind::LeftParen
1344            | TokenKind::Identifier
1345            | TokenKind::Cos
1346            | TokenKind::Exp
1347            | TokenKind::Index
1348            | TokenKind::Int
1349            | TokenKind::Length
1350            | TokenKind::Log
1351            | TokenKind::Rand
1352            | TokenKind::Sin
1353            | TokenKind::Sprintf
1354            | TokenKind::Split
1355            | TokenKind::Sqrt
1356            | TokenKind::Srand
1357            | TokenKind::Substr
1358            | TokenKind::Increment
1359            | TokenKind::Decrement
1360    )
1361}
1362
1363fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
1364    let (kind, literal) = match token.kind {
1365        TokenKind::AddAssign => (TokenKind::Plus, "+"),
1366        TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
1367        TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
1368        TokenKind::DivideAssign => (TokenKind::Division, "/"),
1369        TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
1370        TokenKind::PowerAssign => (TokenKind::Caret, "^"),
1371        _ => todo!(),
1372    };
1373
1374    Token::new(kind, literal, token.span.start)
1375}
1376
1377#[cfg(test)]
1378mod tests {
1379    use super::*;
1380
1381    #[test]
1382    fn create_parser() {
1383        let mut parser = Parser::new(Lexer::new("42 == 42"));
1384
1385        assert_eq!(parser.current_token.literal, "42");
1386        parser.next_token();
1387        assert_eq!(parser.current_token.literal, "==");
1388    }
1389
1390    #[test]
1391    fn parse_empty_program() {
1392        let mut parser = Parser::new(Lexer::new(""));
1393
1394        let program = parser.parse_program();
1395
1396        assert_eq!(program.len(), 0);
1397    }
1398
1399    #[test]
1400    fn parse_action_without_pattern() {
1401        let mut parser = Parser::new(Lexer::new("{ print }"));
1402
1403        let program = parser.parse_program();
1404
1405        assert_eq!(program.len(), 1);
1406        assert_eq!("{ print }", program.to_string());
1407    }
1408
1409    #[test]
1410    fn parse_action_with_leading_newlines() {
1411        let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
1412
1413        let program = parser.parse_program();
1414
1415        assert_eq!(program.len(), 1);
1416        assert_eq!("{ print }", program.to_string());
1417    }
1418
1419    #[test]
1420    fn parse_begin_block() {
1421        let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
1422
1423        let program = parser.parse_program();
1424
1425        assert_eq!(program.len(), 1);
1426        assert_eq!("BEGIN { print }", program.to_string());
1427    }
1428
1429    #[test]
1430    fn parse_end_block() {
1431        let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1432
1433        let program = parser.parse_program();
1434
1435        assert_eq!(program.len(), 1);
1436        assert_eq!("END { print 42 }", program.to_string());
1437    }
1438
1439    #[test]
1440    fn parse_regex_pattern_action() {
1441        let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1442
1443        let program = parser.parse_program();
1444
1445        assert_eq!(program.len(), 1);
1446        assert_eq!("/foo/ { print }", program.to_string());
1447    }
1448
1449    #[test]
1450    fn parse_print_infix_expression() {
1451        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1452
1453        let program = parser.parse_program();
1454        let mut begin_blocks = program.begin_blocks_iter();
1455        let Action { statements } = begin_blocks.next().expect("expected begin block");
1456
1457        let exprs = match &statements[0] {
1458            Statement::Print(expressions) => expressions,
1459            _ => panic!("expected print statement"),
1460        };
1461
1462        match &exprs[0] {
1463            Expression::Infix {
1464                left,
1465                operator,
1466                right,
1467            } => {
1468                assert!(matches!(**left, Expression::Number(1.0)));
1469                assert_eq!(operator.kind, TokenKind::Plus);
1470                assert!(matches!(**right, Expression::Number(2.0)));
1471            }
1472            _ => panic!("expected infix expression"),
1473        }
1474    }
1475
1476    #[test]
1477    fn parse_print_parenthesized_expression() {
1478        let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1479
1480        let program = parser.parse_program();
1481        let mut begin_blocks = program.begin_blocks_iter();
1482        let Action { statements } = begin_blocks.next().expect("expected begin block");
1483
1484        let exprs = match &statements[0] {
1485            Statement::Print(expressions) => expressions,
1486            _ => panic!("expected print statement"),
1487        };
1488
1489        match &exprs[0] {
1490            Expression::Infix {
1491                left,
1492                operator,
1493                right,
1494            } => {
1495                assert_eq!(operator.kind, TokenKind::Asterisk);
1496                assert!(matches!(**right, Expression::Number(3.0)));
1497                assert!(matches!(**left, Expression::Infix { .. }));
1498            }
1499            _ => panic!("expected infix expression"),
1500        }
1501    }
1502
1503    #[test]
1504    fn parse_print_multiplication_has_higher_precedence_than_addition() {
1505        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1506
1507        let program = parser.parse_program();
1508        let mut begin_blocks = program.begin_blocks_iter();
1509        let Action { statements } = begin_blocks.next().expect("expected begin block");
1510
1511        let exprs = match &statements[0] {
1512            Statement::Print(expressions) => expressions,
1513            _ => panic!("expected print statement"),
1514        };
1515
1516        match &exprs[0] {
1517            Expression::Infix {
1518                left,
1519                operator,
1520                right,
1521            } => {
1522                assert_eq!(operator.kind, TokenKind::Plus);
1523                assert!(matches!(**left, Expression::Number(1.0)));
1524                match &**right {
1525                    Expression::Infix {
1526                        operator: right_op, ..
1527                    } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1528                    _ => panic!("expected nested infix expression"),
1529                }
1530            }
1531            _ => panic!("expected infix expression"),
1532        }
1533    }
1534
1535    #[test]
1536    fn parse_print_power_is_right_associative() {
1537        let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1538
1539        let program = parser.parse_program();
1540        let mut begin_blocks = program.begin_blocks_iter();
1541        let Action { statements } = begin_blocks.next().expect("expected begin block");
1542
1543        let exprs = match &statements[0] {
1544            Statement::Print(expressions) => expressions,
1545            _ => panic!("expected print statement"),
1546        };
1547
1548        match &exprs[0] {
1549            Expression::Infix {
1550                left,
1551                operator,
1552                right,
1553            } => {
1554                assert_eq!(operator.kind, TokenKind::Caret);
1555                assert!(matches!(**left, Expression::Number(2.0)));
1556                match &**right {
1557                    Expression::Infix {
1558                        operator: right_op, ..
1559                    } => assert_eq!(right_op.kind, TokenKind::Caret),
1560                    _ => panic!("expected nested infix expression"),
1561                }
1562            }
1563            _ => panic!("expected infix expression"),
1564        }
1565    }
1566
1567    #[test]
1568    fn parse_print_minus_is_left_associative() {
1569        let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1570
1571        let program = parser.parse_program();
1572        let mut begin_blocks = program.begin_blocks_iter();
1573        let Action { statements } = begin_blocks.next().expect("expected begin block");
1574
1575        let exprs = match &statements[0] {
1576            Statement::Print(expressions) => expressions,
1577            _ => panic!("expected print statement"),
1578        };
1579
1580        match &exprs[0] {
1581            Expression::Infix {
1582                left,
1583                operator,
1584                right,
1585            } => {
1586                assert_eq!(operator.kind, TokenKind::Minus);
1587                match &**left {
1588                    Expression::Infix {
1589                        operator: left_op, ..
1590                    } => assert_eq!(left_op.kind, TokenKind::Minus),
1591                    _ => panic!("expected nested infix expression"),
1592                }
1593                assert!(matches!(**right, Expression::Number(1.0)));
1594            }
1595            _ => panic!("expected infix expression"),
1596        }
1597    }
1598
1599    #[test]
1600    fn parse_print_concatenation() {
1601        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
1602
1603        let program = parser.parse_program();
1604        let mut begin_blocks = program.begin_blocks_iter();
1605        let Action { statements } = begin_blocks.next().expect("expected begin block");
1606
1607        let exprs = match &statements[0] {
1608            Statement::Print(expressions) => expressions,
1609            _ => panic!("expected print statement"),
1610        };
1611
1612        assert_eq!(exprs.len(), 1);
1613        match &exprs[0] {
1614            Expression::Concatenation { left, right } => {
1615                assert!(matches!(**left, Expression::String("Value:")));
1616                assert!(matches!(**right, Expression::Number(42.0)));
1617            }
1618            _ => panic!("expected concatenation expression"),
1619        }
1620    }
1621
1622    #[test]
1623    fn parse_continue_statement() {
1624        let mut parser = Parser::new(Lexer::new(r#"{ continue }"#));
1625
1626        let program = parser.parse_program();
1627        let mut rules = program.rules_iter();
1628        let rule = rules.next().expect("expected rule");
1629
1630        let statements = match rule {
1631            Rule::Action(Action { statements }) => statements,
1632            _ => panic!("expected action rule"),
1633        };
1634
1635        assert!(matches!(statements[0], Statement::Continue));
1636    }
1637
1638    #[test]
1639    fn parse_identifier_followed_by_spaced_parentheses_as_concatenation() {
1640        let mut parser = Parser::new(Lexer::new(r#"{ x = $1; print x (++i) }"#));
1641
1642        let program = parser.parse_program();
1643        let mut rules = program.rules_iter();
1644        let rule = rules.next().expect("expected rule");
1645
1646        let statements = match rule {
1647            Rule::Action(Action { statements }) => statements,
1648            _ => panic!("expected action rule"),
1649        };
1650
1651        let exprs = match &statements[1] {
1652            Statement::Print(expressions) => expressions,
1653            _ => panic!("expected print statement"),
1654        };
1655
1656        assert_eq!(exprs.len(), 1);
1657        match &exprs[0] {
1658            Expression::Concatenation { left, right } => {
1659                assert!(matches!(**left, Expression::Identifier("x")));
1660                assert!(matches!(**right, Expression::PreIncrement(_)));
1661            }
1662            _ => panic!("expected concatenation expression"),
1663        }
1664    }
1665
1666    #[test]
1667    fn parse_print_field_expression() {
1668        let mut parser = Parser::new(Lexer::new("{ print $1 }"));
1669
1670        let program = parser.parse_program();
1671        let mut rules = program.rules_iter();
1672        let rule = rules.next().expect("expected rule");
1673
1674        let statements = match rule {
1675            Rule::Action(Action { statements }) => statements,
1676            _ => panic!("expected action rule"),
1677        };
1678
1679        let exprs = match &statements[0] {
1680            Statement::Print(expressions) => expressions,
1681            _ => panic!("expected print statement"),
1682        };
1683
1684        match &exprs[0] {
1685            Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
1686            _ => panic!("expected field expression"),
1687        }
1688    }
1689
1690    #[test]
1691    fn parse_print_with_commas() {
1692        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
1693
1694        let program = parser.parse_program();
1695
1696        assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
1697    }
1698
1699    #[test]
1700    fn parse_number_of_fields_identifier() {
1701        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
1702
1703        let program = parser.parse_program();
1704
1705        assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
1706    }
1707
1708    #[test]
1709    fn parse_printf_with_format_and_arguments() {
1710        let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
1711
1712        let program = parser.parse_program();
1713
1714        assert_eq!(
1715            r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
1716            program.to_string()
1717        );
1718    }
1719
1720    #[test]
1721    fn parse_add_assignment_and_pre_increment() {
1722        let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
1723
1724        let program = parser.parse_program();
1725
1726        assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
1727    }
1728
1729    #[test]
1730    fn parse_regex_match_pattern_action() {
1731        let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
1732
1733        let program = parser.parse_program();
1734
1735        assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
1736    }
1737
1738    #[test]
1739    fn parse_print_with_line_continuation_after_comma() {
1740        let mut parser = Parser::new(Lexer::new(
1741            "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
1742        ));
1743
1744        let program = parser.parse_program();
1745
1746        assert_eq!(
1747            "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
1748            program.to_string()
1749        );
1750    }
1751
1752    #[test]
1753    fn parse_gsub_statement() {
1754        let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
1755
1756        let program = parser.parse_program();
1757
1758        assert_eq!(
1759            r#"{ gsub(/USA/, "United States"); print }"#,
1760            program.to_string()
1761        );
1762    }
1763
1764    #[test]
1765    fn parse_gsub_statement_with_target() {
1766        let mut parser = Parser::new(Lexer::new(r#"{ gsub(/[ \t]+/, "", t) }"#));
1767
1768        let program = parser.parse_program();
1769
1770        assert_eq!(r#"{ gsub(/[ \t]+/, "", t) }"#, program.to_string());
1771    }
1772
1773    #[test]
1774    fn parse_system_statement() {
1775        let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
1776
1777        let program = parser.parse_program();
1778
1779        assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
1780    }
1781
1782    #[test]
1783    fn parse_print_length_builtin_expression() {
1784        let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
1785
1786        let program = parser.parse_program();
1787
1788        assert_eq!(r#"{ print length, $0 }"#, program.to_string());
1789    }
1790
1791    #[test]
1792    fn parse_length_expression_as_rule_pattern() {
1793        let mut parser = Parser::new(Lexer::new(
1794            r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1795        ));
1796
1797        let program = parser.parse_program();
1798
1799        assert_eq!(
1800            r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1801            program.to_string()
1802        );
1803    }
1804
1805    #[test]
1806    fn parse_field_assignment_with_substr() {
1807        let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
1808
1809        let program = parser.parse_program();
1810
1811        assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
1812    }
1813
1814    #[test]
1815    fn parse_assignment_with_concatenation_and_substr() {
1816        let mut parser = Parser::new(Lexer::new(r#"{ s = s " " substr($1, 1, 3) }"#));
1817
1818        let program = parser.parse_program();
1819
1820        assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
1821    }
1822
1823    #[test]
1824    fn parse_field_divide_assignment() {
1825        let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
1826
1827        let program = parser.parse_program();
1828
1829        assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
1830    }
1831
1832    #[test]
1833    fn parse_chained_assignment() {
1834        let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
1835
1836        let program = parser.parse_program();
1837
1838        assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
1839    }
1840
1841    #[test]
1842    fn parse_if_statement_with_block() {
1843        let mut parser = Parser::new(Lexer::new(
1844            r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1845        ));
1846
1847        let program = parser.parse_program();
1848
1849        assert_eq!(
1850            r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1851            program.to_string()
1852        );
1853    }
1854
1855    #[test]
1856    fn parse_while_with_post_increment() {
1857        let mut parser = Parser::new(Lexer::new(
1858            r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1859        ));
1860
1861        let program = parser.parse_program();
1862
1863        assert_eq!(
1864            r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1865            program.to_string()
1866        );
1867    }
1868
1869    #[test]
1870    fn parse_while_with_single_body_statement() {
1871        let mut parser = Parser::new(Lexer::new(r#"{ while (n > 1) print n }"#));
1872
1873        let program = parser.parse_program();
1874
1875        assert_eq!(r#"{ while (n > 1) { print n } }"#, program.to_string());
1876    }
1877
1878    #[test]
1879    fn parse_do_while_with_post_increment() {
1880        let mut parser = Parser::new(Lexer::new(
1881            r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1882        ));
1883
1884        let program = parser.parse_program();
1885
1886        assert_eq!(
1887            r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1888            program.to_string()
1889        );
1890    }
1891
1892    #[test]
1893    fn parse_post_decrement_statement() {
1894        let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
1895
1896        let program = parser.parse_program();
1897
1898        assert_eq!(r#"{ k--; n-- }"#, program.to_string());
1899    }
1900
1901    #[test]
1902    fn parse_rand_expression() {
1903        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
1904
1905        let program = parser.parse_program();
1906
1907        assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
1908    }
1909
1910    #[test]
1911    fn parse_math_builtin_expressions() {
1912        let mut parser = Parser::new(Lexer::new(
1913            r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1914        ));
1915
1916        let program = parser.parse_program();
1917
1918        assert_eq!(
1919            r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1920            program.to_string()
1921        );
1922    }
1923
1924    #[test]
1925    fn parse_index_builtin_expression() {
1926        let mut parser = Parser::new(Lexer::new(r#"{ print index(1, $1) }"#));
1927
1928        let program = parser.parse_program();
1929
1930        assert_eq!(r#"{ print index(1, $1) }"#, program.to_string());
1931    }
1932
1933    #[test]
1934    fn parse_in_membership_expression() {
1935        let mut parser = Parser::new(Lexer::new(r#"{ print 1 in x }"#));
1936
1937        let program = parser.parse_program();
1938
1939        assert_eq!(r#"{ print 1 in x }"#, program.to_string());
1940    }
1941
1942    #[test]
1943    fn parse_for_loop_with_single_body_statement() {
1944        let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; i++) print $i }"#));
1945
1946        let program = parser.parse_program();
1947
1948        assert_eq!(
1949            r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
1950            program.to_string()
1951        );
1952    }
1953
1954    #[test]
1955    fn parse_if_with_single_statement_body() {
1956        let mut parser = Parser::new(Lexer::new(
1957            r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
1958        ));
1959
1960        let program = parser.parse_program();
1961
1962        assert_eq!(
1963            r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
1964            program.to_string()
1965        );
1966    }
1967
1968    #[test]
1969    fn parse_exit_statement() {
1970        let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
1971
1972        let program = parser.parse_program();
1973
1974        assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
1975    }
1976
1977    #[test]
1978    fn parse_exit_statement_with_status() {
1979        let mut parser = Parser::new(Lexer::new(r#"$1 < 5000 { exit NR }"#));
1980
1981        let program = parser.parse_program();
1982
1983        assert_eq!(r#"$1 < 5000 { exit NR }"#, program.to_string());
1984    }
1985
1986    #[test]
1987    fn parse_user_defined_function_call_statement() {
1988        let mut parser = Parser::new(Lexer::new(
1989            "BEGIN { myabort(1) }\nfunction myabort(n) { exit n }",
1990        ));
1991
1992        let program = parser.parse_program();
1993
1994        let definition = program
1995            .function_definition("myabort")
1996            .expect("expected function definition");
1997        assert_eq!(definition.parameters, vec!["n"]);
1998        assert_eq!(definition.statements.len(), 1);
1999    }
2000
2001    #[test]
2002    fn parse_delete_array_element_statement() {
2003        let mut parser = Parser::new(Lexer::new(r#"{ delete x[i, j] }"#));
2004
2005        let program = parser.parse_program();
2006
2007        assert_eq!(r#"{ delete x[i, j] }"#, program.to_string());
2008    }
2009
2010    #[test]
2011    fn parse_array_add_assignment_and_access() {
2012        let mut parser = Parser::new(Lexer::new(
2013            r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2014        ));
2015
2016        let program = parser.parse_program();
2017
2018        assert_eq!(
2019            r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2020            program.to_string()
2021        );
2022    }
2023
2024    #[test]
2025    fn parse_for_in_loop() {
2026        let mut parser = Parser::new(Lexer::new(
2027            r#"END { for (name in area) print name ":" area[name] }"#,
2028        ));
2029
2030        let program = parser.parse_program();
2031
2032        assert_eq!(
2033            r#"END { for (name in area) { print name ":" area[name] } }"#,
2034            program.to_string()
2035        );
2036    }
2037
2038    #[test]
2039    fn parse_print_redirection() {
2040        let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
2041
2042        let program = parser.parse_program();
2043
2044        assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
2045    }
2046
2047    #[test]
2048    fn parse_print_pipe() {
2049        let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
2050
2051        let program = parser.parse_program();
2052
2053        assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
2054    }
2055}