Skip to main content

rawk_core/
parser.rs

1use crate::{
2    Lexer, Program,
3    ast::{Action, Expression, FunctionDefinition, Rule, Statement},
4    token::{Token, TokenKind},
5};
6
7#[derive(Debug)]
8pub struct Parser<'a> {
9    lexer: Lexer<'a>,
10    current_token: Token<'a>,
11    function_definitions: Vec<FunctionDefinition<'a>>,
12}
13
14impl<'a> Parser<'a> {
15    pub fn new(mut lexer: Lexer<'a>) -> Self {
16        let current_token = lexer.next_token_regex_aware();
17        Parser {
18            lexer,
19            current_token,
20            function_definitions: Vec::new(),
21        }
22    }
23
24    fn next_token(&mut self) {
25        self.current_token = self.lexer.next_token();
26    }
27
28    fn next_token_in_regex_context(&mut self) {
29        self.current_token = self.lexer.next_token_regex_aware();
30    }
31
32    fn is_eof(&self) -> bool {
33        self.current_token.kind == TokenKind::Eof
34    }
35
36    fn is_statement_terminator(&self) -> bool {
37        matches!(
38            self.current_token.kind,
39            TokenKind::Semicolon | TokenKind::NewLine | TokenKind::RightCurlyBrace | TokenKind::Eof
40        )
41    }
42
43    fn token_is_immediately_after(&self, previous: &Token<'a>) -> bool {
44        self.current_token.span.start == previous.span.start + previous.literal.len()
45    }
46
47    fn parse_array_index_expression(&mut self) -> Expression<'a> {
48        let mut index = self.parse_expression();
49        while self.current_token.kind == TokenKind::Comma {
50            let operator = self.current_token.clone();
51            self.next_token_in_regex_context();
52            let right = self.parse_expression();
53            index = Expression::Infix {
54                left: Box::new(index),
55                operator,
56                right: Box::new(right),
57            };
58        }
59        index
60    }
61
62    fn parse_next_rule(&mut self) -> Option<Rule<'a>> {
63        match &self.current_token.kind {
64            TokenKind::Begin => {
65                self.next_token();
66                match self.parse_action() {
67                    Rule::Action(action) => Some(Rule::Begin(action)),
68                    _ => panic!("Expected action after BEGIN"),
69                }
70            }
71            TokenKind::NewLine => {
72                self.next_token_in_regex_context();
73                self.parse_next_rule()
74            }
75            TokenKind::Eof => None,
76            TokenKind::LeftCurlyBrace => Some(self.parse_action()),
77            TokenKind::Function => {
78                self.parse_function_definition();
79                None
80            }
81            TokenKind::End => {
82                self.next_token();
83                match self.parse_action() {
84                    Rule::Action(action) => Some(Rule::End(action)),
85                    _ => panic!("Expected action after END"),
86                }
87            }
88            TokenKind::Regex
89            | TokenKind::String
90            | TokenKind::Number
91            | TokenKind::DollarSign
92            | TokenKind::LeftParen
93            | TokenKind::Identifier
94            | TokenKind::Cos
95            | TokenKind::Exp
96            | TokenKind::Index
97            | TokenKind::Int
98            | TokenKind::Length
99            | TokenKind::Log
100            | TokenKind::Match
101            | TokenKind::Rand
102            | TokenKind::Sin
103            | TokenKind::Sprintf
104            | TokenKind::Split
105            | TokenKind::Sqrt
106            | TokenKind::Srand
107            | TokenKind::Substr
108            | TokenKind::ExclamationMark
109            | TokenKind::Increment
110            | TokenKind::Decrement => self.parse_pattern_rule(),
111            _ => panic!(
112                "parse_next_rule not yet implemented, found token: {:?}",
113                self.current_token
114            ),
115        }
116    }
117
118    fn parse_pattern_rule(&mut self) -> Option<Rule<'a>> {
119        let mut pattern = self.parse_expression();
120        if self.current_token.kind == TokenKind::Comma {
121            let operator = self.current_token.clone();
122            self.next_token_in_regex_context();
123            let right = self.parse_expression();
124            pattern = Expression::Infix {
125                left: Box::new(pattern),
126                operator,
127                right: Box::new(right),
128            };
129        }
130        let pattern = Some(pattern);
131
132        if self.current_token.kind == TokenKind::LeftCurlyBrace {
133            match self.parse_action() {
134                Rule::Action(action) => Some(Rule::PatternAction {
135                    pattern,
136                    action: Some(action),
137                }),
138                _ => panic!("Expected action after pattern"),
139            }
140        } else {
141            Some(Rule::PatternAction {
142                pattern,
143                action: None,
144            })
145        }
146    }
147
148    fn parse_action(&mut self) -> Rule<'a> {
149        self.next_token(); // consume '{'
150
151        let pattern = None;
152
153        let mut statements = Vec::new();
154        while self.current_token.kind != TokenKind::RightCurlyBrace
155            && self.current_token.kind != TokenKind::Eof
156        {
157            while self.current_token.kind == TokenKind::NewLine
158                || self.current_token.kind == TokenKind::Semicolon
159            {
160                self.next_token();
161            }
162
163            if self.current_token.kind == TokenKind::RightCurlyBrace
164                || self.current_token.kind == TokenKind::Eof
165            {
166                break;
167            }
168
169            statements.push(self.parse_statement());
170        }
171
172        if pattern.is_some() {
173            Rule::PatternAction {
174                pattern,
175                action: Some(Action { statements }),
176            }
177        } else {
178            Rule::Action(Action { statements })
179        }
180    }
181
182    fn parse_statement(&mut self) -> Statement<'a> {
183        match self.current_token.kind {
184            TokenKind::Print => self.parse_print_function(),
185            TokenKind::Printf => self.parse_printf_function(),
186            TokenKind::System => self.parse_system_function(),
187            TokenKind::Split => self.parse_split_statement(),
188            TokenKind::Sub => self.parse_sub_function(),
189            TokenKind::Gsub => self.parse_gsub_function(),
190            TokenKind::Break => self.parse_break_statement(),
191            TokenKind::Continue => self.parse_continue_statement(),
192            TokenKind::Delete => self.parse_delete_statement(),
193            TokenKind::If => self.parse_if_statement(),
194            TokenKind::Do => self.parse_do_statement(),
195            TokenKind::While => self.parse_while_statement(),
196            TokenKind::For => self.parse_for_statement(),
197            TokenKind::Return => self.parse_return_statement(),
198            TokenKind::Next => self.parse_next_statement(),
199            TokenKind::Exit => self.parse_exit_statement(),
200            TokenKind::Identifier => self.parse_assignment_statement(),
201            TokenKind::DollarSign => self.parse_field_assignment_statement(),
202            TokenKind::Increment => self.parse_pre_increment_statement(),
203            TokenKind::Decrement => self.parse_pre_decrement_statement(),
204            TokenKind::Number
205            | TokenKind::String
206            | TokenKind::Regex
207            | TokenKind::LeftParen
208            | TokenKind::Close
209            | TokenKind::Cos
210            | TokenKind::Exp
211            | TokenKind::Index
212            | TokenKind::Int
213            | TokenKind::Length
214            | TokenKind::Log
215            | TokenKind::Match
216            | TokenKind::Rand
217            | TokenKind::Sin
218            | TokenKind::Sprintf
219            | TokenKind::Sqrt
220            | TokenKind::Srand
221            | TokenKind::Substr
222            | TokenKind::ToLower
223            | TokenKind::ToUpper => Statement::Expression(self.parse_expression()),
224            _ => todo!(),
225        }
226    }
227
228    fn parse_function_definition(&mut self) {
229        self.next_token();
230        if self.current_token.kind != TokenKind::Identifier {
231            todo!()
232        }
233        let name = self.current_token.literal;
234        self.next_token();
235        if self.current_token.kind != TokenKind::LeftParen {
236            todo!()
237        }
238        self.next_token();
239
240        let mut parameters = Vec::new();
241        while self.current_token.kind != TokenKind::RightParen {
242            if self.current_token.kind != TokenKind::Identifier {
243                todo!()
244            }
245            parameters.push(self.current_token.literal);
246            self.next_token();
247            if self.current_token.kind == TokenKind::Comma {
248                self.next_token();
249            } else if self.current_token.kind != TokenKind::RightParen {
250                todo!()
251            }
252        }
253
254        self.next_token();
255        while self.current_token.kind == TokenKind::NewLine {
256            self.next_token();
257        }
258        if self.current_token.kind != TokenKind::LeftCurlyBrace {
259            todo!()
260        }
261
262        let mut statements = Vec::new();
263        self.next_token(); // consume '{'
264        while self.current_token.kind != TokenKind::RightCurlyBrace
265            && self.current_token.kind != TokenKind::Eof
266        {
267            while self.current_token.kind == TokenKind::NewLine
268                || self.current_token.kind == TokenKind::Semicolon
269            {
270                self.next_token();
271            }
272
273            if self.current_token.kind == TokenKind::RightCurlyBrace
274                || self.current_token.kind == TokenKind::Eof
275            {
276                break;
277            }
278
279            statements.push(self.parse_statement());
280        }
281        self.function_definitions.push(FunctionDefinition {
282            name,
283            parameters,
284            statements,
285        });
286    }
287
288    fn parse_assignment_statement(&mut self) -> Statement<'a> {
289        let identifier = self.current_token.clone();
290        self.next_token();
291        self.parse_assignment_statement_with_identifier(identifier)
292    }
293
294    fn parse_assignment_statement_with_identifier(&mut self, identifier: Token<'a>) -> Statement<'a> {
295        if self.current_token.kind == TokenKind::LeftParen
296            && self.token_is_immediately_after(&identifier)
297        {
298            let args = self.parse_call_arguments();
299            return Statement::Expression(Expression::FunctionCall {
300                name: identifier.literal,
301                args,
302            });
303        }
304        if self.current_token.kind == TokenKind::LeftSquareBracket {
305            self.next_token_in_regex_context();
306            let index = self.parse_array_index_expression();
307            if self.current_token.kind != TokenKind::RightSquareBracket {
308                todo!()
309            }
310            self.next_token();
311            if self.current_token.kind == TokenKind::Assign {
312                self.next_token();
313                let value = self.parse_expression();
314                return Statement::ArrayAssignment {
315                    identifier: identifier.literal,
316                    index,
317                    value,
318                };
319            }
320            if self.current_token.kind == TokenKind::AddAssign {
321                self.next_token();
322                let value = self.parse_expression();
323                return Statement::ArrayAddAssignment {
324                    identifier: identifier.literal,
325                    index,
326                    value,
327                };
328            }
329            if self.current_token.kind == TokenKind::Increment {
330                self.next_token();
331                return Statement::ArrayPostIncrement {
332                    identifier: identifier.literal,
333                    index,
334                };
335            }
336            if self.current_token.kind == TokenKind::Decrement {
337                self.next_token();
338                return Statement::ArrayPostDecrement {
339                    identifier: identifier.literal,
340                    index,
341                };
342            }
343            todo!()
344        }
345        if self.current_token.kind == TokenKind::Assign {
346            self.next_token();
347            if self.current_token.kind == TokenKind::Split {
348                return self.parse_split_assignment_statement(identifier.literal);
349            }
350            let value = self.parse_expression();
351            Statement::Assignment {
352                identifier: identifier.literal,
353                value,
354            }
355        } else if self.current_token.kind == TokenKind::Increment {
356            self.next_token();
357            Statement::PostIncrement {
358                identifier: identifier.literal,
359            }
360        } else if self.current_token.kind == TokenKind::Decrement {
361            self.next_token();
362            Statement::PostDecrement {
363                identifier: identifier.literal,
364            }
365        } else if self.current_token.kind == TokenKind::AddAssign {
366            self.next_token();
367            let value = self.parse_expression();
368            Statement::AddAssignment {
369                identifier: identifier.literal,
370                value,
371            }
372        } else if matches!(
373            self.current_token.kind,
374            TokenKind::SubtractAssign
375                | TokenKind::MultiplyAssign
376                | TokenKind::DivideAssign
377                | TokenKind::ModuloAssign
378                | TokenKind::PowerAssign
379        ) {
380            let assign_token = self.current_token.clone();
381            self.next_token();
382            let right_value = self.parse_expression();
383            Statement::Assignment {
384                identifier: identifier.literal,
385                value: Expression::Infix {
386                    left: Box::new(Expression::Identifier(identifier.literal)),
387                    operator: compound_assign_operator(&assign_token),
388                    right: Box::new(right_value),
389                },
390            }
391        } else {
392            todo!()
393        }
394    }
395
396    fn parse_delete_statement(&mut self) -> Statement<'a> {
397        self.next_token();
398        if self.current_token.kind != TokenKind::Identifier {
399            todo!()
400        }
401        let identifier = self.current_token.literal;
402        self.next_token();
403        if self.current_token.kind != TokenKind::LeftSquareBracket {
404            return Statement::Delete {
405                identifier,
406                index: None,
407            };
408        }
409
410        self.next_token_in_regex_context();
411        let index = self.parse_array_index_expression();
412        if self.current_token.kind != TokenKind::RightSquareBracket {
413            todo!()
414        }
415        self.next_token();
416        Statement::Delete {
417            identifier,
418            index: Some(index),
419        }
420    }
421
422    fn parse_break_statement(&mut self) -> Statement<'a> {
423        self.next_token();
424        Statement::Break
425    }
426
427    fn parse_continue_statement(&mut self) -> Statement<'a> {
428        self.next_token();
429        Statement::Continue
430    }
431
432    fn parse_pre_increment_statement(&mut self) -> Statement<'a> {
433        self.next_token();
434        if self.current_token.kind != TokenKind::Identifier {
435            todo!()
436        }
437        let identifier = self.current_token.literal;
438        self.next_token();
439        Statement::PreIncrement { identifier }
440    }
441
442    fn parse_pre_decrement_statement(&mut self) -> Statement<'a> {
443        self.next_token();
444        if self.current_token.kind != TokenKind::Identifier {
445            todo!()
446        }
447        let identifier = self.current_token.literal;
448        self.next_token();
449        Statement::PreDecrement { identifier }
450    }
451
452    fn parse_split_assignment_statement(&mut self, identifier: &'a str) -> Statement<'a> {
453        self.next_token();
454        if self.current_token.kind != TokenKind::LeftParen {
455            todo!()
456        }
457        self.next_token_in_regex_context();
458        let string = self.parse_expression();
459        if self.current_token.kind != TokenKind::Comma {
460            todo!()
461        }
462        self.next_token();
463        if self.current_token.kind != TokenKind::Identifier {
464            todo!()
465        }
466        let array = self.current_token.literal;
467        self.next_token();
468        let separator = if self.current_token.kind == TokenKind::Comma {
469            self.next_token_in_regex_context();
470            Some(self.parse_expression())
471        } else {
472            None
473        };
474        if self.current_token.kind != TokenKind::RightParen {
475            todo!()
476        }
477        self.next_token();
478        Statement::SplitAssignment {
479            identifier,
480            string,
481            array,
482            separator,
483        }
484    }
485
486    fn parse_split_statement(&mut self) -> Statement<'a> {
487        self.next_token();
488        if self.current_token.kind != TokenKind::LeftParen {
489            todo!()
490        }
491        self.next_token_in_regex_context();
492        let string = self.parse_expression();
493        if self.current_token.kind != TokenKind::Comma {
494            todo!()
495        }
496        self.next_token();
497        if self.current_token.kind != TokenKind::Identifier {
498            todo!()
499        }
500        let array = self.current_token.literal;
501        self.next_token();
502        let separator = if self.current_token.kind == TokenKind::Comma {
503            self.next_token_in_regex_context();
504            Some(self.parse_expression())
505        } else {
506            None
507        };
508        if self.current_token.kind != TokenKind::RightParen {
509            todo!()
510        }
511        self.next_token();
512        Statement::Split {
513            string,
514            array,
515            separator,
516        }
517    }
518
519    fn parse_field_assignment_statement(&mut self) -> Statement<'a> {
520        self.next_token();
521        let field = self.parse_primary_expression();
522        let assign_token = self.current_token.clone();
523        self.next_token();
524        let right_value = self.parse_expression();
525
526        let value = if assign_token.kind == TokenKind::Assign {
527            right_value
528        } else {
529            let operator = compound_assign_operator(&assign_token);
530            Expression::Infix {
531                left: Box::new(Expression::Field(Box::new(field.clone()))),
532                operator,
533                right: Box::new(right_value),
534            }
535        };
536        Statement::FieldAssignment { field, value }
537    }
538
539    fn parse_if_statement(&mut self) -> Statement<'a> {
540        self.next_token();
541        if self.current_token.kind != TokenKind::LeftParen {
542            todo!()
543        }
544        self.next_token_in_regex_context();
545        let condition = self.parse_condition_in_parens();
546        if self.current_token.kind != TokenKind::RightParen {
547            todo!()
548        }
549        self.next_token();
550        let then_statements = self.parse_control_statement_body();
551
552        while self.current_token.kind == TokenKind::NewLine
553            || self.current_token.kind == TokenKind::Semicolon
554        {
555            self.next_token();
556        }
557
558        if self.current_token.kind == TokenKind::Else {
559            self.next_token();
560            let else_statements = self.parse_control_statement_body();
561            return Statement::IfElse {
562                condition,
563                then_statements,
564                else_statements,
565            };
566        }
567
568        Statement::If {
569            condition,
570            then_statements,
571        }
572    }
573
574    fn parse_exit_statement(&mut self) -> Statement<'a> {
575        self.next_token();
576        let status = if self.is_statement_terminator() {
577            None
578        } else {
579            Some(self.parse_expression())
580        };
581        Statement::Exit(status)
582    }
583
584    fn parse_return_statement(&mut self) -> Statement<'a> {
585        self.next_token();
586        let value = if self.is_statement_terminator() {
587            None
588        } else {
589            Some(self.parse_expression())
590        };
591        Statement::Return(value)
592    }
593
594    fn parse_next_statement(&mut self) -> Statement<'a> {
595        self.next_token();
596        Statement::Next
597    }
598
599    fn parse_statement_block(&mut self) -> Vec<Statement<'a>> {
600        self.next_token(); // consume '{'
601        let mut statements = Vec::new();
602        while self.current_token.kind != TokenKind::RightCurlyBrace
603            && self.current_token.kind != TokenKind::Eof
604        {
605            while self.current_token.kind == TokenKind::NewLine
606                || self.current_token.kind == TokenKind::Semicolon
607            {
608                self.next_token();
609            }
610
611            if self.current_token.kind == TokenKind::RightCurlyBrace
612                || self.current_token.kind == TokenKind::Eof
613            {
614                break;
615            }
616            statements.push(self.parse_statement());
617        }
618        if self.current_token.kind == TokenKind::RightCurlyBrace {
619            self.next_token();
620        }
621        statements
622    }
623
624    fn parse_control_statement_body(&mut self) -> Vec<Statement<'a>> {
625        while self.current_token.kind == TokenKind::NewLine {
626            self.next_token();
627        }
628
629        if self.current_token.kind == TokenKind::LeftCurlyBrace {
630            return self.parse_statement_block();
631        }
632
633        if self.current_token.kind == TokenKind::Semicolon {
634            self.next_token();
635            return vec![Statement::Empty];
636        }
637
638        vec![self.parse_statement()]
639    }
640
641    fn parse_while_statement(&mut self) -> Statement<'a> {
642        self.next_token();
643        if self.current_token.kind != TokenKind::LeftParen {
644            todo!()
645        }
646        self.next_token_in_regex_context();
647        let condition = self.parse_condition_in_parens();
648        if self.current_token.kind != TokenKind::RightParen {
649            todo!()
650        }
651        self.next_token();
652        let statements = self.parse_control_statement_body();
653        Statement::While {
654            condition,
655            statements,
656        }
657    }
658
659    fn parse_do_statement(&mut self) -> Statement<'a> {
660        self.next_token();
661        let statements = self.parse_control_statement_body();
662
663        while self.current_token.kind == TokenKind::NewLine
664            || self.current_token.kind == TokenKind::Semicolon
665        {
666            self.next_token();
667        }
668
669        if self.current_token.kind != TokenKind::While {
670            todo!()
671        }
672        self.next_token();
673        if self.current_token.kind != TokenKind::LeftParen {
674            todo!()
675        }
676        self.next_token_in_regex_context();
677        let condition = self.parse_condition_in_parens();
678        if self.current_token.kind != TokenKind::RightParen {
679            todo!()
680        }
681        self.next_token();
682        Statement::DoWhile {
683            condition,
684            statements,
685        }
686    }
687
688    fn parse_for_statement(&mut self) -> Statement<'a> {
689        self.next_token();
690        if self.current_token.kind != TokenKind::LeftParen {
691            todo!()
692        }
693        self.next_token();
694        while self.current_token.kind == TokenKind::NewLine {
695            self.next_token();
696        }
697
698        let init = if self.current_token.kind == TokenKind::Semicolon {
699            Statement::Empty
700        } else if self.current_token.kind == TokenKind::Identifier {
701            let variable = self.current_token.clone();
702            self.next_token();
703            if self.current_token.kind == TokenKind::In {
704                self.next_token();
705                if self.current_token.kind != TokenKind::Identifier {
706                    todo!()
707                }
708                let array = self.current_token.literal;
709                self.next_token();
710                if self.current_token.kind != TokenKind::RightParen {
711                    todo!()
712                }
713                self.next_token();
714                let statements = self.parse_control_statement_body();
715                return Statement::ForIn {
716                    variable: variable.literal,
717                    array,
718                    statements,
719                };
720            }
721            self.parse_assignment_statement_with_identifier(variable)
722        } else {
723            self.parse_statement()
724        };
725        while self.current_token.kind == TokenKind::NewLine {
726            self.next_token();
727        }
728        if self.current_token.kind != TokenKind::Semicolon {
729            todo!()
730        }
731        self.next_token_in_regex_context();
732        while self.current_token.kind == TokenKind::NewLine {
733            self.next_token_in_regex_context();
734        }
735
736        let condition = if self.current_token.kind == TokenKind::Semicolon {
737            Expression::Number(1.0)
738        } else {
739            self.parse_expression()
740        };
741        while self.current_token.kind == TokenKind::NewLine {
742            self.next_token();
743        }
744        if self.current_token.kind != TokenKind::Semicolon {
745            todo!()
746        }
747        self.next_token_in_regex_context();
748        while self.current_token.kind == TokenKind::NewLine {
749            self.next_token_in_regex_context();
750        }
751
752        let update = if self.current_token.kind == TokenKind::RightParen {
753            Statement::Empty
754        } else {
755            self.parse_statement()
756        };
757        while self.current_token.kind == TokenKind::NewLine {
758            self.next_token();
759        }
760        if self.current_token.kind != TokenKind::RightParen {
761            todo!()
762        }
763        self.next_token();
764        let statements = self.parse_control_statement_body();
765
766        Statement::For {
767            init: Box::new(init),
768            condition,
769            update: Box::new(update),
770            statements,
771        }
772    }
773
774    fn parse_print_function(&mut self) -> Statement<'a> {
775        let mut expressions = Vec::new();
776        let mut expect_more = false;
777        self.next_token();
778
779        loop {
780            if self.current_token.kind == TokenKind::RightCurlyBrace
781                || self.current_token.kind == TokenKind::RightParen
782                || self.current_token.kind == TokenKind::Eof
783                || self.current_token.kind == TokenKind::GreaterThan
784                || self.current_token.kind == TokenKind::Append
785                || self.current_token.kind == TokenKind::Pipe
786            {
787                break;
788            }
789
790            if self.current_token.kind == TokenKind::NewLine
791                || self.current_token.kind == TokenKind::Semicolon
792            {
793                if expect_more {
794                    self.next_token();
795                    continue;
796                }
797                break;
798            }
799
800            if self.current_token.kind == TokenKind::Comma {
801                self.next_token();
802                expect_more = true;
803                continue;
804            }
805
806            let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
807            let expression = self.parse_expression();
808            expressions.push(expression);
809            if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
810                while self.current_token.kind == TokenKind::Comma {
811                    self.next_token();
812                    expressions.push(self.parse_expression());
813                }
814                if self.current_token.kind != TokenKind::RightParen {
815                    todo!()
816                }
817                self.next_token();
818            }
819            expect_more = false;
820        }
821        if self.current_token.kind == TokenKind::RightParen {
822            self.next_token();
823        }
824
825        if self.current_token.kind == TokenKind::GreaterThan
826            || self.current_token.kind == TokenKind::Append
827        {
828            let append = self.current_token.kind == TokenKind::Append;
829            self.next_token();
830            let target = self.parse_expression();
831            return Statement::PrintRedirect {
832                expressions,
833                target,
834                append,
835            };
836        }
837        if self.current_token.kind == TokenKind::Pipe {
838            self.next_token();
839            let target = self.parse_expression();
840            return Statement::PrintPipe {
841                expressions,
842                target,
843            };
844        }
845
846        Statement::Print(expressions)
847    }
848
849    fn parse_printf_function(&mut self) -> Statement<'a> {
850        self.next_token();
851        let expressions = if self.current_token.kind == TokenKind::LeftParen {
852            self.next_token_in_regex_context();
853            let mut expressions = Vec::new();
854            while self.current_token.kind != TokenKind::RightParen
855                && self.current_token.kind != TokenKind::Eof
856            {
857                if self.current_token.kind == TokenKind::Comma {
858                    self.next_token();
859                    continue;
860                }
861                expressions.push(self.parse_expression());
862            }
863            if self.current_token.kind == TokenKind::RightParen {
864                self.next_token();
865            }
866            expressions
867        } else {
868            self.parse_expression_list_until_action_end_from_current()
869        };
870
871        Statement::Printf(expressions)
872    }
873
874    fn parse_gsub_function(&mut self) -> Statement<'a> {
875        self.next_token();
876        if self.current_token.kind != TokenKind::LeftParen {
877            todo!()
878        }
879
880        self.next_token_in_regex_context();
881        let pattern = self.parse_expression();
882
883        if self.current_token.kind != TokenKind::Comma {
884            todo!()
885        }
886        self.next_token();
887        let replacement = self.parse_expression();
888
889        let target = if self.current_token.kind == TokenKind::Comma {
890            self.next_token();
891            Some(self.parse_expression())
892        } else {
893            None
894        };
895
896        if self.current_token.kind != TokenKind::RightParen {
897            todo!()
898        }
899        self.next_token();
900
901        Statement::Gsub {
902            pattern,
903            replacement,
904            target,
905        }
906    }
907
908    fn parse_sub_function(&mut self) -> Statement<'a> {
909        self.next_token();
910        if self.current_token.kind != TokenKind::LeftParen {
911            todo!()
912        }
913
914        self.next_token_in_regex_context();
915        let pattern = self.parse_expression();
916
917        if self.current_token.kind != TokenKind::Comma {
918            todo!()
919        }
920        self.next_token();
921        let replacement = self.parse_expression();
922
923        if self.current_token.kind == TokenKind::Comma {
924            todo!()
925        }
926
927        if self.current_token.kind != TokenKind::RightParen {
928            todo!()
929        }
930        self.next_token();
931
932        Statement::Sub {
933            pattern,
934            replacement,
935        }
936    }
937
938    fn parse_system_function(&mut self) -> Statement<'a> {
939        self.next_token();
940        if self.current_token.kind != TokenKind::LeftParen {
941            todo!()
942        }
943        self.next_token();
944        let command = self.parse_expression();
945        if self.current_token.kind != TokenKind::RightParen {
946            todo!()
947        }
948        self.next_token();
949        Statement::System(command)
950    }
951
952    fn parse_expression_list_until_action_end_from_current(&mut self) -> Vec<Expression<'a>> {
953        let mut expressions = Vec::new();
954        let mut expect_more = false;
955
956        loop {
957            if self.current_token.kind == TokenKind::RightCurlyBrace
958                || self.current_token.kind == TokenKind::RightParen
959                || self.current_token.kind == TokenKind::Eof
960            {
961                break;
962            }
963
964            if self.current_token.kind == TokenKind::NewLine
965                || self.current_token.kind == TokenKind::Semicolon
966            {
967                if expect_more {
968                    self.next_token();
969                    continue;
970                }
971                break;
972            }
973
974            if self.current_token.kind == TokenKind::Comma {
975                self.next_token();
976                expect_more = true;
977                continue;
978            }
979
980            let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
981            let expression = self.parse_expression();
982            expressions.push(expression);
983            if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
984                while self.current_token.kind == TokenKind::Comma {
985                    self.next_token();
986                    expressions.push(self.parse_expression());
987                }
988                if self.current_token.kind != TokenKind::RightParen {
989                    todo!()
990                }
991                self.next_token();
992            }
993            expect_more = false;
994        }
995
996        if self.current_token.kind == TokenKind::RightParen {
997            self.next_token();
998        }
999
1000        expressions
1001    }
1002
1003    fn parse_expression(&mut self) -> Expression<'a> {
1004        self.parse_expression_with_min_precedence(0)
1005    }
1006
1007    fn parse_expression_with_min_precedence(&mut self, min_precedence: u8) -> Expression<'a> {
1008        let left = self.parse_primary_expression();
1009        self.parse_expression_suffix(left, min_precedence)
1010    }
1011
1012    fn parse_expression_suffix(
1013        &mut self,
1014        mut left: Expression<'a>,
1015        min_precedence: u8,
1016    ) -> Expression<'a> {
1017        const CONCAT_LEFT_PRECEDENCE: u8 = 6;
1018        const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
1019
1020        loop {
1021            if self.current_token.kind == TokenKind::QuestionMark {
1022                if min_precedence > 0 {
1023                    break;
1024                }
1025                self.next_token_in_regex_context();
1026                let then_expr = self.parse_expression_with_min_precedence(0);
1027                if self.current_token.kind != TokenKind::Colon {
1028                    todo!()
1029                }
1030                self.next_token_in_regex_context();
1031                let else_expr = self.parse_expression_with_min_precedence(0);
1032                left = Expression::Ternary {
1033                    condition: Box::new(left),
1034                    then_expr: Box::new(then_expr),
1035                    else_expr: Box::new(else_expr),
1036                };
1037                continue;
1038            }
1039
1040            if infix_operator_precedence(&self.current_token.kind).is_none()
1041                && is_expression_start(&self.current_token.kind)
1042            {
1043                if CONCAT_LEFT_PRECEDENCE < min_precedence {
1044                    break;
1045                }
1046
1047                let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE);
1048                left = Expression::Concatenation {
1049                    left: Box::new(left),
1050                    right: Box::new(right),
1051                };
1052                continue;
1053            }
1054
1055            let (left_precedence, right_precedence) =
1056                match infix_operator_precedence(&self.current_token.kind) {
1057                    Some(value) => value,
1058                    None => break,
1059                };
1060
1061            if left_precedence < min_precedence {
1062                break;
1063            }
1064
1065            let operator = self.current_token.clone();
1066            if matches!(
1067                operator.kind,
1068                TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
1069            ) {
1070                self.next_token_in_regex_context();
1071            } else {
1072                self.next_token();
1073            }
1074            let right = self.parse_expression_with_min_precedence(right_precedence);
1075
1076            left = Expression::Infix {
1077                left: Box::new(left),
1078                operator,
1079                right: Box::new(right),
1080            };
1081        }
1082
1083        left
1084    }
1085
1086    fn parse_condition_in_parens(&mut self) -> Expression<'a> {
1087        let mut condition = self.parse_expression();
1088        if self.current_token.kind == TokenKind::Comma {
1089            while self.current_token.kind == TokenKind::Comma {
1090                let operator = self.current_token.clone();
1091                self.next_token_in_regex_context();
1092                let right = self.parse_expression();
1093                condition = Expression::Infix {
1094                    left: Box::new(condition),
1095                    operator,
1096                    right: Box::new(right),
1097                };
1098            }
1099            if self.current_token.kind != TokenKind::RightParen {
1100                todo!()
1101            }
1102            self.next_token();
1103            condition = self.parse_expression_suffix(condition, 0);
1104        }
1105        condition
1106    }
1107
1108    fn parse_primary_expression(&mut self) -> Expression<'a> {
1109        if self.current_token.kind == TokenKind::Minus {
1110            let operator = self.current_token.clone();
1111            self.next_token();
1112            let right = self.parse_primary_expression();
1113            return Expression::Infix {
1114                left: Box::new(Expression::Number(0.0)),
1115                operator,
1116                right: Box::new(right),
1117            };
1118        }
1119        if self.current_token.kind == TokenKind::Plus {
1120            self.next_token();
1121            return self.parse_primary_expression();
1122        }
1123        if self.current_token.kind == TokenKind::ExclamationMark {
1124            self.next_token_in_regex_context();
1125            let expression = self.parse_primary_expression();
1126            return Expression::Not(Box::new(expression));
1127        }
1128        if self.current_token.kind == TokenKind::Increment {
1129            self.next_token();
1130            let expression = self.parse_primary_expression();
1131            return Expression::PreIncrement(Box::new(expression));
1132        }
1133        if self.current_token.kind == TokenKind::Decrement {
1134            self.next_token();
1135            let expression = self.parse_primary_expression();
1136            return Expression::PreDecrement(Box::new(expression));
1137        }
1138
1139        let mut expression = self.parse_primary_atom();
1140        if self.current_token.kind == TokenKind::Increment {
1141            self.next_token();
1142            expression = Expression::PostIncrement(Box::new(expression));
1143        } else if self.current_token.kind == TokenKind::Decrement {
1144            self.next_token();
1145            expression = Expression::PostDecrement(Box::new(expression));
1146        }
1147        expression
1148    }
1149
1150    fn parse_primary_atom(&mut self) -> Expression<'a> {
1151        match self.current_token.kind {
1152            TokenKind::String => {
1153                let expression = Expression::String(self.current_token.literal);
1154                self.next_token();
1155                expression
1156            }
1157            TokenKind::Regex => {
1158                let expression = Expression::Regex(self.current_token.literal);
1159                self.next_token();
1160                expression
1161            }
1162            TokenKind::Number => {
1163                let expression = if let Ok(value) = self.current_token.literal.parse::<f64>() {
1164                    Expression::Number(value)
1165                } else {
1166                    todo!()
1167                };
1168                self.next_token();
1169                expression
1170            }
1171            TokenKind::DollarSign => {
1172                self.next_token();
1173                let expression = self.parse_primary_atom();
1174                Expression::Field(Box::new(expression))
1175            }
1176            TokenKind::LeftParen => {
1177                self.next_token();
1178                let expression = self.parse_expression();
1179                if self.current_token.kind == TokenKind::RightParen {
1180                    self.next_token();
1181                }
1182                expression
1183            }
1184            TokenKind::Identifier => {
1185                let identifier = self.current_token.clone();
1186                self.next_token();
1187                if self.current_token.kind == TokenKind::LeftParen
1188                    && self.token_is_immediately_after(&identifier)
1189                {
1190                    let args = self.parse_call_arguments();
1191                    return Expression::FunctionCall {
1192                        name: identifier.literal,
1193                        args,
1194                    };
1195                }
1196                if self.current_token.kind == TokenKind::LeftSquareBracket {
1197                    self.next_token_in_regex_context();
1198                    let index = self.parse_array_index_expression();
1199                    if self.current_token.kind != TokenKind::RightSquareBracket {
1200                        todo!()
1201                    }
1202                    self.next_token();
1203                    Expression::ArrayAccess {
1204                        identifier: identifier.literal,
1205                        index: Box::new(index),
1206                    }
1207                } else {
1208                    Expression::Identifier(identifier.literal)
1209                }
1210            }
1211            TokenKind::Length => {
1212                self.next_token();
1213                if self.current_token.kind == TokenKind::LeftParen {
1214                    self.next_token();
1215                    if self.current_token.kind == TokenKind::RightParen {
1216                        self.next_token();
1217                        Expression::Length(None)
1218                    } else {
1219                        let expression = self.parse_expression();
1220                        if self.current_token.kind != TokenKind::RightParen {
1221                            todo!()
1222                        }
1223                        self.next_token();
1224                        Expression::Length(Some(Box::new(expression)))
1225                    }
1226                } else {
1227                    Expression::Length(None)
1228                }
1229            }
1230            TokenKind::Substr => {
1231                self.next_token();
1232                if self.current_token.kind != TokenKind::LeftParen {
1233                    todo!()
1234                }
1235                self.next_token();
1236                let string = self.parse_expression();
1237                if self.current_token.kind != TokenKind::Comma {
1238                    todo!()
1239                }
1240                self.next_token();
1241                let start = self.parse_expression();
1242                let mut length = None;
1243                if self.current_token.kind == TokenKind::Comma {
1244                    self.next_token();
1245                    length = Some(Box::new(self.parse_expression()));
1246                }
1247                if self.current_token.kind != TokenKind::RightParen {
1248                    todo!()
1249                }
1250                self.next_token();
1251                Expression::Substr {
1252                    string: Box::new(string),
1253                    start: Box::new(start),
1254                    length,
1255                }
1256            }
1257            TokenKind::Rand => {
1258                self.next_token();
1259                if self.current_token.kind == TokenKind::LeftParen {
1260                    self.next_token();
1261                    if self.current_token.kind != TokenKind::RightParen {
1262                        todo!()
1263                    }
1264                    self.next_token();
1265                }
1266                Expression::Rand
1267            }
1268            TokenKind::Close
1269            | TokenKind::Cos
1270            | TokenKind::Exp
1271            | TokenKind::Index
1272            | TokenKind::Int
1273            | TokenKind::Log
1274            | TokenKind::Match
1275            | TokenKind::Sin
1276            | TokenKind::Sprintf
1277            | TokenKind::Split
1278            | TokenKind::Sqrt
1279            | TokenKind::Srand => {
1280                let name = self.current_token.literal;
1281                self.next_token();
1282                if self.current_token.kind == TokenKind::LeftParen {
1283                    let args = self.parse_call_arguments();
1284                    return Expression::FunctionCall { name, args };
1285                }
1286                Expression::Number(0.0)
1287            }
1288            _ => {
1289                panic!(
1290                    "parse_primary_expression not yet implemented, found token: {:?}",
1291                    self.current_token
1292                )
1293            }
1294        }
1295    }
1296
1297    pub fn parse_program(&mut self) -> Program<'_> {
1298        let mut program = Program::new();
1299
1300        while !self.is_eof() {
1301            match self.parse_next_rule() {
1302                Some(Rule::Begin(action)) => program.add_begin_block(action),
1303                Some(Rule::End(action)) => program.add_end_block(action),
1304                Some(rule) => program.add_rule(rule),
1305                None => {}
1306            }
1307            self.next_token_in_regex_context();
1308        }
1309
1310        for definition in self.function_definitions.drain(..) {
1311            program.add_function_definition(definition);
1312        }
1313
1314        program
1315    }
1316
1317    fn parse_call_arguments(&mut self) -> Vec<Expression<'a>> {
1318        if self.current_token.kind != TokenKind::LeftParen {
1319            return vec![];
1320        }
1321        self.next_token_in_regex_context();
1322        let mut args = Vec::new();
1323        while self.current_token.kind != TokenKind::RightParen
1324            && self.current_token.kind != TokenKind::Eof
1325        {
1326            if self.current_token.kind == TokenKind::Comma {
1327                self.next_token();
1328                continue;
1329            }
1330            args.push(self.parse_expression());
1331        }
1332        if self.current_token.kind == TokenKind::RightParen {
1333            self.next_token();
1334        }
1335        args
1336    }
1337}
1338
1339fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
1340    match kind {
1341        TokenKind::Assign
1342        | TokenKind::AddAssign
1343        | TokenKind::SubtractAssign
1344        | TokenKind::MultiplyAssign
1345        | TokenKind::DivideAssign
1346        | TokenKind::ModuloAssign
1347        | TokenKind::PowerAssign => Some((0, 0)),
1348        TokenKind::Or => Some((1, 2)),
1349        TokenKind::And => Some((3, 4)),
1350        TokenKind::Equal
1351        | TokenKind::NotEqual
1352        | TokenKind::GreaterThan
1353        | TokenKind::GreaterThanOrEqual
1354        | TokenKind::In
1355        | TokenKind::LessThan
1356        | TokenKind::LessThanOrEqual
1357        | TokenKind::Tilde
1358        | TokenKind::NoMatch => Some((5, 6)),
1359        TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
1360        TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
1361        TokenKind::Caret => Some((13, 12)),
1362        _ => None,
1363    }
1364}
1365
1366fn is_expression_start(kind: &TokenKind) -> bool {
1367    matches!(
1368        kind,
1369        TokenKind::String
1370            | TokenKind::Regex
1371            | TokenKind::Number
1372            | TokenKind::DollarSign
1373            | TokenKind::LeftParen
1374            | TokenKind::Identifier
1375            | TokenKind::Cos
1376            | TokenKind::Exp
1377            | TokenKind::Index
1378            | TokenKind::Int
1379            | TokenKind::Length
1380            | TokenKind::Log
1381            | TokenKind::Match
1382            | TokenKind::Rand
1383            | TokenKind::Sin
1384            | TokenKind::Sprintf
1385            | TokenKind::Split
1386            | TokenKind::Sqrt
1387            | TokenKind::Srand
1388            | TokenKind::Substr
1389            | TokenKind::Increment
1390            | TokenKind::Decrement
1391    )
1392}
1393
1394fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
1395    let (kind, literal) = match token.kind {
1396        TokenKind::AddAssign => (TokenKind::Plus, "+"),
1397        TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
1398        TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
1399        TokenKind::DivideAssign => (TokenKind::Division, "/"),
1400        TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
1401        TokenKind::PowerAssign => (TokenKind::Caret, "^"),
1402        _ => todo!(),
1403    };
1404
1405    Token::new(kind, literal, token.span.start)
1406}
1407
1408#[cfg(test)]
1409mod tests {
1410    use super::*;
1411
1412    #[test]
1413    fn create_parser() {
1414        let mut parser = Parser::new(Lexer::new("42 == 42"));
1415
1416        assert_eq!(parser.current_token.literal, "42");
1417        parser.next_token();
1418        assert_eq!(parser.current_token.literal, "==");
1419    }
1420
1421    #[test]
1422    fn parse_empty_program() {
1423        let mut parser = Parser::new(Lexer::new(""));
1424
1425        let program = parser.parse_program();
1426
1427        assert_eq!(program.len(), 0);
1428    }
1429
1430    #[test]
1431    fn parse_action_without_pattern() {
1432        let mut parser = Parser::new(Lexer::new("{ print }"));
1433
1434        let program = parser.parse_program();
1435
1436        assert_eq!(program.len(), 1);
1437        assert_eq!("{ print }", program.to_string());
1438    }
1439
1440    #[test]
1441    fn parse_action_with_leading_newlines() {
1442        let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
1443
1444        let program = parser.parse_program();
1445
1446        assert_eq!(program.len(), 1);
1447        assert_eq!("{ print }", program.to_string());
1448    }
1449
1450    #[test]
1451    fn parse_begin_block() {
1452        let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
1453
1454        let program = parser.parse_program();
1455
1456        assert_eq!(program.len(), 1);
1457        assert_eq!("BEGIN { print }", program.to_string());
1458    }
1459
1460    #[test]
1461    fn parse_end_block() {
1462        let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1463
1464        let program = parser.parse_program();
1465
1466        assert_eq!(program.len(), 1);
1467        assert_eq!("END { print 42 }", program.to_string());
1468    }
1469
1470    #[test]
1471    fn parse_regex_pattern_action() {
1472        let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1473
1474        let program = parser.parse_program();
1475
1476        assert_eq!(program.len(), 1);
1477        assert_eq!("/foo/ { print }", program.to_string());
1478    }
1479
1480    #[test]
1481    fn parse_print_infix_expression() {
1482        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1483
1484        let program = parser.parse_program();
1485        let mut begin_blocks = program.begin_blocks_iter();
1486        let Action { statements } = begin_blocks.next().expect("expected begin block");
1487
1488        let exprs = match &statements[0] {
1489            Statement::Print(expressions) => expressions,
1490            _ => panic!("expected print statement"),
1491        };
1492
1493        match &exprs[0] {
1494            Expression::Infix {
1495                left,
1496                operator,
1497                right,
1498            } => {
1499                assert!(matches!(**left, Expression::Number(1.0)));
1500                assert_eq!(operator.kind, TokenKind::Plus);
1501                assert!(matches!(**right, Expression::Number(2.0)));
1502            }
1503            _ => panic!("expected infix expression"),
1504        }
1505    }
1506
1507    #[test]
1508    fn parse_print_parenthesized_expression() {
1509        let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1510
1511        let program = parser.parse_program();
1512        let mut begin_blocks = program.begin_blocks_iter();
1513        let Action { statements } = begin_blocks.next().expect("expected begin block");
1514
1515        let exprs = match &statements[0] {
1516            Statement::Print(expressions) => expressions,
1517            _ => panic!("expected print statement"),
1518        };
1519
1520        match &exprs[0] {
1521            Expression::Infix {
1522                left,
1523                operator,
1524                right,
1525            } => {
1526                assert_eq!(operator.kind, TokenKind::Asterisk);
1527                assert!(matches!(**right, Expression::Number(3.0)));
1528                assert!(matches!(**left, Expression::Infix { .. }));
1529            }
1530            _ => panic!("expected infix expression"),
1531        }
1532    }
1533
1534    #[test]
1535    fn parse_print_multiplication_has_higher_precedence_than_addition() {
1536        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1537
1538        let program = parser.parse_program();
1539        let mut begin_blocks = program.begin_blocks_iter();
1540        let Action { statements } = begin_blocks.next().expect("expected begin block");
1541
1542        let exprs = match &statements[0] {
1543            Statement::Print(expressions) => expressions,
1544            _ => panic!("expected print statement"),
1545        };
1546
1547        match &exprs[0] {
1548            Expression::Infix {
1549                left,
1550                operator,
1551                right,
1552            } => {
1553                assert_eq!(operator.kind, TokenKind::Plus);
1554                assert!(matches!(**left, Expression::Number(1.0)));
1555                match &**right {
1556                    Expression::Infix {
1557                        operator: right_op, ..
1558                    } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1559                    _ => panic!("expected nested infix expression"),
1560                }
1561            }
1562            _ => panic!("expected infix expression"),
1563        }
1564    }
1565
1566    #[test]
1567    fn parse_print_power_is_right_associative() {
1568        let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1569
1570        let program = parser.parse_program();
1571        let mut begin_blocks = program.begin_blocks_iter();
1572        let Action { statements } = begin_blocks.next().expect("expected begin block");
1573
1574        let exprs = match &statements[0] {
1575            Statement::Print(expressions) => expressions,
1576            _ => panic!("expected print statement"),
1577        };
1578
1579        match &exprs[0] {
1580            Expression::Infix {
1581                left,
1582                operator,
1583                right,
1584            } => {
1585                assert_eq!(operator.kind, TokenKind::Caret);
1586                assert!(matches!(**left, Expression::Number(2.0)));
1587                match &**right {
1588                    Expression::Infix {
1589                        operator: right_op, ..
1590                    } => assert_eq!(right_op.kind, TokenKind::Caret),
1591                    _ => panic!("expected nested infix expression"),
1592                }
1593            }
1594            _ => panic!("expected infix expression"),
1595        }
1596    }
1597
1598    #[test]
1599    fn parse_print_minus_is_left_associative() {
1600        let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1601
1602        let program = parser.parse_program();
1603        let mut begin_blocks = program.begin_blocks_iter();
1604        let Action { statements } = begin_blocks.next().expect("expected begin block");
1605
1606        let exprs = match &statements[0] {
1607            Statement::Print(expressions) => expressions,
1608            _ => panic!("expected print statement"),
1609        };
1610
1611        match &exprs[0] {
1612            Expression::Infix {
1613                left,
1614                operator,
1615                right,
1616            } => {
1617                assert_eq!(operator.kind, TokenKind::Minus);
1618                match &**left {
1619                    Expression::Infix {
1620                        operator: left_op, ..
1621                    } => assert_eq!(left_op.kind, TokenKind::Minus),
1622                    _ => panic!("expected nested infix expression"),
1623                }
1624                assert!(matches!(**right, Expression::Number(1.0)));
1625            }
1626            _ => panic!("expected infix expression"),
1627        }
1628    }
1629
1630    #[test]
1631    fn parse_print_concatenation() {
1632        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
1633
1634        let program = parser.parse_program();
1635        let mut begin_blocks = program.begin_blocks_iter();
1636        let Action { statements } = begin_blocks.next().expect("expected begin block");
1637
1638        let exprs = match &statements[0] {
1639            Statement::Print(expressions) => expressions,
1640            _ => panic!("expected print statement"),
1641        };
1642
1643        assert_eq!(exprs.len(), 1);
1644        match &exprs[0] {
1645            Expression::Concatenation { left, right } => {
1646                assert!(matches!(**left, Expression::String("Value:")));
1647                assert!(matches!(**right, Expression::Number(42.0)));
1648            }
1649            _ => panic!("expected concatenation expression"),
1650        }
1651    }
1652
1653    #[test]
1654    fn parse_continue_statement() {
1655        let mut parser = Parser::new(Lexer::new(r#"{ continue }"#));
1656
1657        let program = parser.parse_program();
1658        let mut rules = program.rules_iter();
1659        let rule = rules.next().expect("expected rule");
1660
1661        let statements = match rule {
1662            Rule::Action(Action { statements }) => statements,
1663            _ => panic!("expected action rule"),
1664        };
1665
1666        assert!(matches!(statements[0], Statement::Continue));
1667    }
1668
1669    #[test]
1670    fn parse_identifier_followed_by_spaced_parentheses_as_concatenation() {
1671        let mut parser = Parser::new(Lexer::new(r#"{ x = $1; print x (++i) }"#));
1672
1673        let program = parser.parse_program();
1674        let mut rules = program.rules_iter();
1675        let rule = rules.next().expect("expected rule");
1676
1677        let statements = match rule {
1678            Rule::Action(Action { statements }) => statements,
1679            _ => panic!("expected action rule"),
1680        };
1681
1682        let exprs = match &statements[1] {
1683            Statement::Print(expressions) => expressions,
1684            _ => panic!("expected print statement"),
1685        };
1686
1687        assert_eq!(exprs.len(), 1);
1688        match &exprs[0] {
1689            Expression::Concatenation { left, right } => {
1690                assert!(matches!(**left, Expression::Identifier("x")));
1691                assert!(matches!(**right, Expression::PreIncrement(_)));
1692            }
1693            _ => panic!("expected concatenation expression"),
1694        }
1695    }
1696
1697    #[test]
1698    fn parse_print_field_expression() {
1699        let mut parser = Parser::new(Lexer::new("{ print $1 }"));
1700
1701        let program = parser.parse_program();
1702        let mut rules = program.rules_iter();
1703        let rule = rules.next().expect("expected rule");
1704
1705        let statements = match rule {
1706            Rule::Action(Action { statements }) => statements,
1707            _ => panic!("expected action rule"),
1708        };
1709
1710        let exprs = match &statements[0] {
1711            Statement::Print(expressions) => expressions,
1712            _ => panic!("expected print statement"),
1713        };
1714
1715        match &exprs[0] {
1716            Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
1717            _ => panic!("expected field expression"),
1718        }
1719    }
1720
1721    #[test]
1722    fn parse_print_with_commas() {
1723        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
1724
1725        let program = parser.parse_program();
1726
1727        assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
1728    }
1729
1730    #[test]
1731    fn parse_number_of_fields_identifier() {
1732        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
1733
1734        let program = parser.parse_program();
1735
1736        assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
1737    }
1738
1739    #[test]
1740    fn parse_printf_with_format_and_arguments() {
1741        let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
1742
1743        let program = parser.parse_program();
1744
1745        assert_eq!(
1746            r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
1747            program.to_string()
1748        );
1749    }
1750
1751    #[test]
1752    fn parse_add_assignment_and_pre_increment() {
1753        let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
1754
1755        let program = parser.parse_program();
1756
1757        assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
1758    }
1759
1760    #[test]
1761    fn parse_regex_match_pattern_action() {
1762        let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
1763
1764        let program = parser.parse_program();
1765
1766        assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
1767    }
1768
1769    #[test]
1770    fn parse_not_pattern_action() {
1771        let mut parser = Parser::new(Lexer::new(r#"!($1 < 2000) { print $1 }"#));
1772
1773        let program = parser.parse_program();
1774        let mut rules = program.rules_iter();
1775        let rule = rules.next().expect("expected rule");
1776
1777        match rule {
1778            Rule::PatternAction {
1779                pattern: Some(Expression::Not(inner)),
1780                action: Some(Action { statements }),
1781            } => {
1782                assert!(matches!(**inner, Expression::Infix { .. }));
1783                assert!(matches!(statements[0], Statement::Print(_)));
1784            }
1785            _ => panic!("expected negated pattern action"),
1786        }
1787    }
1788
1789    #[test]
1790    fn parse_print_with_line_continuation_after_comma() {
1791        let mut parser = Parser::new(Lexer::new(
1792            "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
1793        ));
1794
1795        let program = parser.parse_program();
1796
1797        assert_eq!(
1798            "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
1799            program.to_string()
1800        );
1801    }
1802
1803    #[test]
1804    fn parse_gsub_statement() {
1805        let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
1806
1807        let program = parser.parse_program();
1808
1809        assert_eq!(
1810            r#"{ gsub(/USA/, "United States"); print }"#,
1811            program.to_string()
1812        );
1813    }
1814
1815    #[test]
1816    fn parse_gsub_statement_with_target() {
1817        let mut parser = Parser::new(Lexer::new(r#"{ gsub(/[ \t]+/, "", t) }"#));
1818
1819        let program = parser.parse_program();
1820
1821        assert_eq!(r#"{ gsub(/[ \t]+/, "", t) }"#, program.to_string());
1822    }
1823
1824    #[test]
1825    fn parse_system_statement() {
1826        let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
1827
1828        let program = parser.parse_program();
1829
1830        assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
1831    }
1832
1833    #[test]
1834    fn parse_print_length_builtin_expression() {
1835        let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
1836
1837        let program = parser.parse_program();
1838
1839        assert_eq!(r#"{ print length, $0 }"#, program.to_string());
1840    }
1841
1842    #[test]
1843    fn parse_length_expression_as_rule_pattern() {
1844        let mut parser = Parser::new(Lexer::new(
1845            r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1846        ));
1847
1848        let program = parser.parse_program();
1849
1850        assert_eq!(
1851            r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1852            program.to_string()
1853        );
1854    }
1855
1856    #[test]
1857    fn parse_field_assignment_with_substr() {
1858        let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
1859
1860        let program = parser.parse_program();
1861
1862        assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
1863    }
1864
1865    #[test]
1866    fn parse_assignment_with_concatenation_and_substr() {
1867        let mut parser = Parser::new(Lexer::new(r#"{ s = s " " substr($1, 1, 3) }"#));
1868
1869        let program = parser.parse_program();
1870
1871        assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
1872    }
1873
1874    #[test]
1875    fn parse_field_divide_assignment() {
1876        let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
1877
1878        let program = parser.parse_program();
1879
1880        assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
1881    }
1882
1883    #[test]
1884    fn parse_chained_assignment() {
1885        let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
1886
1887        let program = parser.parse_program();
1888
1889        assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
1890    }
1891
1892    #[test]
1893    fn parse_if_statement_with_block() {
1894        let mut parser = Parser::new(Lexer::new(
1895            r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1896        ));
1897
1898        let program = parser.parse_program();
1899
1900        assert_eq!(
1901            r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1902            program.to_string()
1903        );
1904    }
1905
1906    #[test]
1907    fn parse_while_with_post_increment() {
1908        let mut parser = Parser::new(Lexer::new(
1909            r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1910        ));
1911
1912        let program = parser.parse_program();
1913
1914        assert_eq!(
1915            r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1916            program.to_string()
1917        );
1918    }
1919
1920    #[test]
1921    fn parse_while_with_single_body_statement() {
1922        let mut parser = Parser::new(Lexer::new(r#"{ while (n > 1) print n }"#));
1923
1924        let program = parser.parse_program();
1925
1926        assert_eq!(r#"{ while (n > 1) { print n } }"#, program.to_string());
1927    }
1928
1929    #[test]
1930    fn parse_do_while_with_post_increment() {
1931        let mut parser = Parser::new(Lexer::new(
1932            r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1933        ));
1934
1935        let program = parser.parse_program();
1936
1937        assert_eq!(
1938            r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1939            program.to_string()
1940        );
1941    }
1942
1943    #[test]
1944    fn parse_for_with_empty_body_statement() {
1945        let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; s += $(i++)) ; print s }"#));
1946
1947        let program = parser.parse_program();
1948
1949        assert_eq!(
1950            r#"{ for (i = 1; i <= NF; s += $i++) {  }; print s }"#,
1951            program.to_string()
1952        );
1953    }
1954
1955    #[test]
1956    fn parse_post_decrement_statement() {
1957        let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
1958
1959        let program = parser.parse_program();
1960
1961        assert_eq!(r#"{ k--; n-- }"#, program.to_string());
1962    }
1963
1964    #[test]
1965    fn parse_rand_expression() {
1966        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
1967
1968        let program = parser.parse_program();
1969
1970        assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
1971    }
1972
1973    #[test]
1974    fn parse_math_builtin_expressions() {
1975        let mut parser = Parser::new(Lexer::new(
1976            r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1977        ));
1978
1979        let program = parser.parse_program();
1980
1981        assert_eq!(
1982            r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1983            program.to_string()
1984        );
1985    }
1986
1987    #[test]
1988    fn parse_index_builtin_expression() {
1989        let mut parser = Parser::new(Lexer::new(r#"{ print index(1, $1) }"#));
1990
1991        let program = parser.parse_program();
1992
1993        assert_eq!(r#"{ print index(1, $1) }"#, program.to_string());
1994    }
1995
1996    #[test]
1997    fn parse_match_builtin_expression() {
1998        let mut parser = Parser::new(Lexer::new(r#"{ print match($NF, $1), RSTART, RLENGTH }"#));
1999
2000        let program = parser.parse_program();
2001
2002        assert_eq!(
2003            r#"{ print match($NF, $1), RSTART, RLENGTH }"#,
2004            program.to_string()
2005        );
2006    }
2007
2008    #[test]
2009    fn parse_in_membership_expression() {
2010        let mut parser = Parser::new(Lexer::new(r#"{ print 1 in x }"#));
2011
2012        let program = parser.parse_program();
2013
2014        assert_eq!(r#"{ print 1 in x }"#, program.to_string());
2015    }
2016
2017    #[test]
2018    fn parse_parenthesized_composite_membership_expression() {
2019        let mut parser = Parser::new(Lexer::new(r#"{ if (($0, $1) in x) print "yes" }"#));
2020
2021        let program = parser.parse_program();
2022
2023        assert_eq!(r#"{ if ($0, $1 in x) { print "yes" } }"#, program.to_string());
2024    }
2025
2026    #[test]
2027    fn parse_for_loop_with_single_body_statement() {
2028        let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; i++) print $i }"#));
2029
2030        let program = parser.parse_program();
2031
2032        assert_eq!(
2033            r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
2034            program.to_string()
2035        );
2036    }
2037
2038    #[test]
2039    fn parse_if_with_single_statement_body() {
2040        let mut parser = Parser::new(Lexer::new(
2041            r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
2042        ));
2043
2044        let program = parser.parse_program();
2045
2046        assert_eq!(
2047            r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
2048            program.to_string()
2049        );
2050    }
2051
2052    #[test]
2053    fn parse_exit_statement() {
2054        let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
2055
2056        let program = parser.parse_program();
2057
2058        assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
2059    }
2060
2061    #[test]
2062    fn parse_exit_statement_with_status() {
2063        let mut parser = Parser::new(Lexer::new(r#"$1 < 5000 { exit NR }"#));
2064
2065        let program = parser.parse_program();
2066
2067        assert_eq!(r#"$1 < 5000 { exit NR }"#, program.to_string());
2068    }
2069
2070    #[test]
2071    fn parse_user_defined_function_call_statement() {
2072        let mut parser = Parser::new(Lexer::new(
2073            "BEGIN { myabort(1) }\nfunction myabort(n) { exit n }",
2074        ));
2075
2076        let program = parser.parse_program();
2077
2078        let definition = program
2079            .function_definition("myabort")
2080            .expect("expected function definition");
2081        assert_eq!(definition.parameters, vec!["n"]);
2082        assert_eq!(definition.statements.len(), 1);
2083    }
2084
2085    #[test]
2086    fn parse_delete_array_element_statement() {
2087        let mut parser = Parser::new(Lexer::new(r#"{ delete x[i, j] }"#));
2088
2089        let program = parser.parse_program();
2090
2091        assert_eq!(r#"{ delete x[i, j] }"#, program.to_string());
2092    }
2093
2094    #[test]
2095    fn parse_array_add_assignment_and_access() {
2096        let mut parser = Parser::new(Lexer::new(
2097            r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2098        ));
2099
2100        let program = parser.parse_program();
2101
2102        assert_eq!(
2103            r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2104            program.to_string()
2105        );
2106    }
2107
2108    #[test]
2109    fn parse_for_in_loop() {
2110        let mut parser = Parser::new(Lexer::new(
2111            r#"END { for (name in area) print name ":" area[name] }"#,
2112        ));
2113
2114        let program = parser.parse_program();
2115
2116        assert_eq!(
2117            r#"END { for (name in area) { print name ":" area[name] } }"#,
2118            program.to_string()
2119        );
2120    }
2121
2122    #[test]
2123    fn parse_print_redirection() {
2124        let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
2125
2126        let program = parser.parse_program();
2127
2128        assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
2129    }
2130
2131    #[test]
2132    fn parse_print_pipe() {
2133        let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
2134
2135        let program = parser.parse_program();
2136
2137        assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
2138    }
2139}