Skip to main content

rawk_core/
parser.rs

1use crate::{
2    Lexer, Program,
3    ast::{Action, Expression, FunctionDefinition, Rule, Statement},
4    token::{Token, TokenKind},
5};
6
7#[derive(Debug)]
8pub struct Parser<'a> {
9    lexer: Lexer<'a>,
10    current_token: Token<'a>,
11    function_definitions: Vec<FunctionDefinition<'a>>,
12}
13
14impl<'a> Parser<'a> {
15    pub fn new(mut lexer: Lexer<'a>) -> Self {
16        let current_token = lexer.next_token_regex_aware();
17        Parser {
18            lexer,
19            current_token,
20            function_definitions: Vec::new(),
21        }
22    }
23
24    fn next_token(&mut self) {
25        self.current_token = self.lexer.next_token();
26    }
27
28    fn next_token_in_regex_context(&mut self) {
29        self.current_token = self.lexer.next_token_regex_aware();
30    }
31
32    fn is_eof(&self) -> bool {
33        self.current_token.kind == TokenKind::Eof
34    }
35
36    fn is_statement_terminator(&self) -> bool {
37        matches!(
38            self.current_token.kind,
39            TokenKind::Semicolon | TokenKind::NewLine | TokenKind::RightCurlyBrace | TokenKind::Eof
40        )
41    }
42
43    fn token_is_immediately_after(&self, previous: &Token<'a>) -> bool {
44        self.current_token.span.start == previous.span.start + previous.literal.len()
45    }
46
47    fn parse_number_expression(&self) -> Option<Expression<'a>> {
48        let literal = self.current_token.literal;
49        if let Some(hex_digits) = literal
50            .strip_prefix("0x")
51            .or_else(|| literal.strip_prefix("0X"))
52        {
53            let value = u64::from_str_radix(hex_digits, 16).ok()? as f64;
54            return Some(Expression::HexNumber { literal, value });
55        }
56
57        literal.parse::<f64>().ok().map(Expression::Number)
58    }
59
60    fn parse_array_index_expression(&mut self) -> Expression<'a> {
61        let mut index = self.parse_expression();
62        while self.current_token.kind == TokenKind::Comma {
63            let operator = self.current_token.clone();
64            self.next_token_in_regex_context();
65            let right = self.parse_expression();
66            index = Expression::Infix {
67                left: Box::new(index),
68                operator,
69                right: Box::new(right),
70            };
71        }
72        index
73    }
74
75    fn parse_next_rule(&mut self) -> Option<Rule<'a>> {
76        match &self.current_token.kind {
77            TokenKind::Begin => {
78                self.next_token();
79                match self.parse_action() {
80                    Rule::Action(action) => Some(Rule::Begin(action)),
81                    _ => panic!("Expected action after BEGIN"),
82                }
83            }
84            TokenKind::NewLine => {
85                self.next_token_in_regex_context();
86                self.parse_next_rule()
87            }
88            TokenKind::Eof => None,
89            TokenKind::LeftCurlyBrace => Some(self.parse_action()),
90            TokenKind::Function => {
91                self.parse_function_definition();
92                None
93            }
94            TokenKind::End => {
95                self.next_token();
96                match self.parse_action() {
97                    Rule::Action(action) => Some(Rule::End(action)),
98                    _ => panic!("Expected action after END"),
99                }
100            }
101            TokenKind::Regex
102            | TokenKind::String
103            | TokenKind::Number
104            | TokenKind::DollarSign
105            | TokenKind::LeftParen
106            | TokenKind::Identifier
107            | TokenKind::Cos
108            | TokenKind::Exp
109            | TokenKind::Index
110            | TokenKind::Int
111            | TokenKind::Length
112            | TokenKind::Log
113            | TokenKind::Match
114            | TokenKind::Rand
115            | TokenKind::Sin
116            | TokenKind::Sprintf
117            | TokenKind::Split
118            | TokenKind::Sqrt
119            | TokenKind::Srand
120            | TokenKind::Substr
121            | TokenKind::ExclamationMark
122            | TokenKind::Increment
123            | TokenKind::Decrement => self.parse_pattern_rule(),
124            _ => panic!(
125                "parse_next_rule not yet implemented, found token: {:?}",
126                self.current_token
127            ),
128        }
129    }
130
131    fn parse_pattern_rule(&mut self) -> Option<Rule<'a>> {
132        let mut pattern = self.parse_expression();
133        if self.current_token.kind == TokenKind::Comma {
134            let operator = self.current_token.clone();
135            self.next_token_in_regex_context();
136            let right = self.parse_expression();
137            pattern = Expression::Infix {
138                left: Box::new(pattern),
139                operator,
140                right: Box::new(right),
141            };
142        }
143        let pattern = Some(pattern);
144
145        if self.current_token.kind == TokenKind::LeftCurlyBrace {
146            match self.parse_action() {
147                Rule::Action(action) => Some(Rule::PatternAction {
148                    pattern,
149                    action: Some(action),
150                }),
151                _ => panic!("Expected action after pattern"),
152            }
153        } else {
154            Some(Rule::PatternAction {
155                pattern,
156                action: None,
157            })
158        }
159    }
160
161    fn parse_action(&mut self) -> Rule<'a> {
162        self.next_token(); // consume '{'
163
164        let pattern = None;
165
166        let mut statements = Vec::new();
167        while self.current_token.kind != TokenKind::RightCurlyBrace
168            && self.current_token.kind != TokenKind::Eof
169        {
170            while self.current_token.kind == TokenKind::NewLine
171                || self.current_token.kind == TokenKind::Semicolon
172            {
173                self.next_token();
174            }
175
176            if self.current_token.kind == TokenKind::RightCurlyBrace
177                || self.current_token.kind == TokenKind::Eof
178            {
179                break;
180            }
181
182            statements.push(self.parse_statement());
183        }
184
185        if pattern.is_some() {
186            Rule::PatternAction {
187                pattern,
188                action: Some(Action { statements }),
189            }
190        } else {
191            Rule::Action(Action { statements })
192        }
193    }
194
195    fn parse_statement(&mut self) -> Statement<'a> {
196        match self.current_token.kind {
197            TokenKind::Print => self.parse_print_function(),
198            TokenKind::Printf => self.parse_printf_function(),
199            TokenKind::System => self.parse_system_function(),
200            TokenKind::Split => self.parse_split_statement(),
201            TokenKind::Sub => self.parse_sub_function(),
202            TokenKind::Gsub => self.parse_gsub_function(),
203            TokenKind::Break => self.parse_break_statement(),
204            TokenKind::Continue => self.parse_continue_statement(),
205            TokenKind::Delete => self.parse_delete_statement(),
206            TokenKind::If => self.parse_if_statement(),
207            TokenKind::Do => self.parse_do_statement(),
208            TokenKind::While => self.parse_while_statement(),
209            TokenKind::For => self.parse_for_statement(),
210            TokenKind::Return => self.parse_return_statement(),
211            TokenKind::Next => self.parse_next_statement(),
212            TokenKind::Exit => self.parse_exit_statement(),
213            TokenKind::Identifier => self.parse_assignment_statement(),
214            TokenKind::DollarSign => self.parse_field_assignment_statement(),
215            TokenKind::Increment => self.parse_pre_increment_statement(),
216            TokenKind::Decrement => self.parse_pre_decrement_statement(),
217            TokenKind::Number
218            | TokenKind::String
219            | TokenKind::Regex
220            | TokenKind::LeftParen
221            | TokenKind::Close
222            | TokenKind::Cos
223            | TokenKind::Exp
224            | TokenKind::Index
225            | TokenKind::Int
226            | TokenKind::Length
227            | TokenKind::Log
228            | TokenKind::Match
229            | TokenKind::Rand
230            | TokenKind::Sin
231            | TokenKind::Sprintf
232            | TokenKind::Sqrt
233            | TokenKind::Srand
234            | TokenKind::Substr
235            | TokenKind::ToLower
236            | TokenKind::ToUpper => Statement::Expression(self.parse_expression()),
237            _ => todo!(),
238        }
239    }
240
241    fn parse_function_definition(&mut self) {
242        self.next_token();
243        if self.current_token.kind != TokenKind::Identifier {
244            todo!()
245        }
246        let name = self.current_token.literal;
247        self.next_token();
248        if self.current_token.kind != TokenKind::LeftParen {
249            todo!()
250        }
251        self.next_token();
252
253        let mut parameters = Vec::new();
254        while self.current_token.kind != TokenKind::RightParen {
255            if self.current_token.kind != TokenKind::Identifier {
256                todo!()
257            }
258            parameters.push(self.current_token.literal);
259            self.next_token();
260            if self.current_token.kind == TokenKind::Comma {
261                self.next_token();
262            } else if self.current_token.kind != TokenKind::RightParen {
263                todo!()
264            }
265        }
266
267        self.next_token();
268        while self.current_token.kind == TokenKind::NewLine {
269            self.next_token();
270        }
271        if self.current_token.kind != TokenKind::LeftCurlyBrace {
272            todo!()
273        }
274
275        let mut statements = Vec::new();
276        self.next_token(); // consume '{'
277        while self.current_token.kind != TokenKind::RightCurlyBrace
278            && self.current_token.kind != TokenKind::Eof
279        {
280            while self.current_token.kind == TokenKind::NewLine
281                || self.current_token.kind == TokenKind::Semicolon
282            {
283                self.next_token();
284            }
285
286            if self.current_token.kind == TokenKind::RightCurlyBrace
287                || self.current_token.kind == TokenKind::Eof
288            {
289                break;
290            }
291
292            statements.push(self.parse_statement());
293        }
294        self.function_definitions.push(FunctionDefinition {
295            name,
296            parameters,
297            statements,
298        });
299    }
300
301    fn parse_assignment_statement(&mut self) -> Statement<'a> {
302        let identifier = self.current_token.clone();
303        self.next_token();
304        self.parse_assignment_statement_with_identifier(identifier)
305    }
306
307    fn parse_assignment_statement_with_identifier(
308        &mut self,
309        identifier: Token<'a>,
310    ) -> Statement<'a> {
311        if self.current_token.kind == TokenKind::LeftParen
312            && self.token_is_immediately_after(&identifier)
313        {
314            let args = self.parse_call_arguments();
315            return Statement::Expression(Expression::FunctionCall {
316                name: identifier.literal,
317                args,
318            });
319        }
320        if self.current_token.kind == TokenKind::LeftSquareBracket {
321            self.next_token_in_regex_context();
322            let index = self.parse_array_index_expression();
323            if self.current_token.kind != TokenKind::RightSquareBracket {
324                todo!()
325            }
326            self.next_token();
327            if self.current_token.kind == TokenKind::Assign {
328                self.next_token();
329                let value = self.parse_expression();
330                return Statement::ArrayAssignment {
331                    identifier: identifier.literal,
332                    index,
333                    value,
334                };
335            }
336            if self.current_token.kind == TokenKind::AddAssign {
337                self.next_token();
338                let value = self.parse_expression();
339                return Statement::ArrayAddAssignment {
340                    identifier: identifier.literal,
341                    index,
342                    value,
343                };
344            }
345            if self.current_token.kind == TokenKind::Increment {
346                self.next_token();
347                return Statement::ArrayPostIncrement {
348                    identifier: identifier.literal,
349                    index,
350                };
351            }
352            if self.current_token.kind == TokenKind::Decrement {
353                self.next_token();
354                return Statement::ArrayPostDecrement {
355                    identifier: identifier.literal,
356                    index,
357                };
358            }
359            todo!()
360        }
361        if self.current_token.kind == TokenKind::Assign {
362            self.next_token();
363            if self.current_token.kind == TokenKind::Split {
364                return self.parse_split_assignment_statement(identifier.literal);
365            }
366            let value = self.parse_expression();
367            Statement::Assignment {
368                identifier: identifier.literal,
369                value,
370            }
371        } else if self.current_token.kind == TokenKind::Increment {
372            self.next_token();
373            Statement::PostIncrement {
374                identifier: identifier.literal,
375            }
376        } else if self.current_token.kind == TokenKind::Decrement {
377            self.next_token();
378            Statement::PostDecrement {
379                identifier: identifier.literal,
380            }
381        } else if self.current_token.kind == TokenKind::AddAssign {
382            self.next_token();
383            let value = self.parse_expression();
384            Statement::AddAssignment {
385                identifier: identifier.literal,
386                value,
387            }
388        } else if matches!(
389            self.current_token.kind,
390            TokenKind::SubtractAssign
391                | TokenKind::MultiplyAssign
392                | TokenKind::DivideAssign
393                | TokenKind::ModuloAssign
394                | TokenKind::PowerAssign
395        ) {
396            let assign_token = self.current_token.clone();
397            self.next_token();
398            let right_value = self.parse_expression();
399            Statement::Assignment {
400                identifier: identifier.literal,
401                value: Expression::Infix {
402                    left: Box::new(Expression::Identifier(identifier.literal)),
403                    operator: compound_assign_operator(&assign_token),
404                    right: Box::new(right_value),
405                },
406            }
407        } else {
408            todo!()
409        }
410    }
411
412    fn parse_delete_statement(&mut self) -> Statement<'a> {
413        self.next_token();
414        if self.current_token.kind != TokenKind::Identifier {
415            todo!()
416        }
417        let identifier = self.current_token.literal;
418        self.next_token();
419        if self.current_token.kind != TokenKind::LeftSquareBracket {
420            return Statement::Delete {
421                identifier,
422                index: None,
423            };
424        }
425
426        self.next_token_in_regex_context();
427        let index = self.parse_array_index_expression();
428        if self.current_token.kind != TokenKind::RightSquareBracket {
429            todo!()
430        }
431        self.next_token();
432        Statement::Delete {
433            identifier,
434            index: Some(index),
435        }
436    }
437
438    fn parse_break_statement(&mut self) -> Statement<'a> {
439        self.next_token();
440        Statement::Break
441    }
442
443    fn parse_continue_statement(&mut self) -> Statement<'a> {
444        self.next_token();
445        Statement::Continue
446    }
447
448    fn parse_pre_increment_statement(&mut self) -> Statement<'a> {
449        self.next_token();
450        if self.current_token.kind != TokenKind::Identifier {
451            todo!()
452        }
453        let identifier = self.current_token.literal;
454        self.next_token();
455        Statement::PreIncrement { identifier }
456    }
457
458    fn parse_pre_decrement_statement(&mut self) -> Statement<'a> {
459        self.next_token();
460        if self.current_token.kind != TokenKind::Identifier {
461            todo!()
462        }
463        let identifier = self.current_token.literal;
464        self.next_token();
465        Statement::PreDecrement { identifier }
466    }
467
468    fn parse_split_assignment_statement(&mut self, identifier: &'a str) -> Statement<'a> {
469        self.next_token();
470        if self.current_token.kind != TokenKind::LeftParen {
471            todo!()
472        }
473        self.next_token_in_regex_context();
474        let string = self.parse_expression();
475        if self.current_token.kind != TokenKind::Comma {
476            todo!()
477        }
478        self.next_token();
479        if self.current_token.kind != TokenKind::Identifier {
480            todo!()
481        }
482        let array = self.current_token.literal;
483        self.next_token();
484        let separator = if self.current_token.kind == TokenKind::Comma {
485            self.next_token_in_regex_context();
486            Some(self.parse_expression())
487        } else {
488            None
489        };
490        if self.current_token.kind != TokenKind::RightParen {
491            todo!()
492        }
493        self.next_token();
494        Statement::SplitAssignment {
495            identifier,
496            string,
497            array,
498            separator,
499        }
500    }
501
502    fn parse_split_statement(&mut self) -> Statement<'a> {
503        self.next_token();
504        if self.current_token.kind != TokenKind::LeftParen {
505            todo!()
506        }
507        self.next_token_in_regex_context();
508        let string = self.parse_expression();
509        if self.current_token.kind != TokenKind::Comma {
510            todo!()
511        }
512        self.next_token();
513        if self.current_token.kind != TokenKind::Identifier {
514            todo!()
515        }
516        let array = self.current_token.literal;
517        self.next_token();
518        let separator = if self.current_token.kind == TokenKind::Comma {
519            self.next_token_in_regex_context();
520            Some(self.parse_expression())
521        } else {
522            None
523        };
524        if self.current_token.kind != TokenKind::RightParen {
525            todo!()
526        }
527        self.next_token();
528        Statement::Split {
529            string,
530            array,
531            separator,
532        }
533    }
534
535    fn parse_field_assignment_statement(&mut self) -> Statement<'a> {
536        self.next_token();
537        let field = self.parse_primary_expression();
538        let assign_token = self.current_token.clone();
539        self.next_token();
540        let right_value = self.parse_expression();
541
542        let value = if assign_token.kind == TokenKind::Assign {
543            right_value
544        } else {
545            let operator = compound_assign_operator(&assign_token);
546            Expression::Infix {
547                left: Box::new(Expression::Field(Box::new(field.clone()))),
548                operator,
549                right: Box::new(right_value),
550            }
551        };
552        Statement::FieldAssignment { field, value }
553    }
554
555    fn parse_if_statement(&mut self) -> Statement<'a> {
556        self.next_token();
557        if self.current_token.kind != TokenKind::LeftParen {
558            todo!()
559        }
560        self.next_token_in_regex_context();
561        let condition = self.parse_condition_in_parens();
562        if self.current_token.kind != TokenKind::RightParen {
563            todo!()
564        }
565        self.next_token();
566        let then_statements = self.parse_control_statement_body();
567
568        while self.current_token.kind == TokenKind::NewLine
569            || self.current_token.kind == TokenKind::Semicolon
570        {
571            self.next_token();
572        }
573
574        if self.current_token.kind == TokenKind::Else {
575            self.next_token();
576            let else_statements = self.parse_control_statement_body();
577            return Statement::IfElse {
578                condition,
579                then_statements,
580                else_statements,
581            };
582        }
583
584        Statement::If {
585            condition,
586            then_statements,
587        }
588    }
589
590    fn parse_exit_statement(&mut self) -> Statement<'a> {
591        self.next_token();
592        let status = if self.is_statement_terminator() {
593            None
594        } else {
595            Some(self.parse_expression())
596        };
597        Statement::Exit(status)
598    }
599
600    fn parse_return_statement(&mut self) -> Statement<'a> {
601        self.next_token();
602        let value = if self.is_statement_terminator() {
603            None
604        } else {
605            Some(self.parse_expression())
606        };
607        Statement::Return(value)
608    }
609
610    fn parse_next_statement(&mut self) -> Statement<'a> {
611        self.next_token();
612        Statement::Next
613    }
614
615    fn parse_statement_block(&mut self) -> Vec<Statement<'a>> {
616        self.next_token(); // consume '{'
617        let mut statements = Vec::new();
618        while self.current_token.kind != TokenKind::RightCurlyBrace
619            && self.current_token.kind != TokenKind::Eof
620        {
621            while self.current_token.kind == TokenKind::NewLine
622                || self.current_token.kind == TokenKind::Semicolon
623            {
624                self.next_token();
625            }
626
627            if self.current_token.kind == TokenKind::RightCurlyBrace
628                || self.current_token.kind == TokenKind::Eof
629            {
630                break;
631            }
632            statements.push(self.parse_statement());
633        }
634        if self.current_token.kind == TokenKind::RightCurlyBrace {
635            self.next_token();
636        }
637        statements
638    }
639
640    fn parse_control_statement_body(&mut self) -> Vec<Statement<'a>> {
641        while self.current_token.kind == TokenKind::NewLine {
642            self.next_token();
643        }
644
645        if self.current_token.kind == TokenKind::LeftCurlyBrace {
646            return self.parse_statement_block();
647        }
648
649        if self.current_token.kind == TokenKind::Semicolon {
650            self.next_token();
651            return vec![Statement::Empty];
652        }
653
654        vec![self.parse_statement()]
655    }
656
657    fn parse_while_statement(&mut self) -> Statement<'a> {
658        self.next_token();
659        if self.current_token.kind != TokenKind::LeftParen {
660            todo!()
661        }
662        self.next_token_in_regex_context();
663        let condition = self.parse_condition_in_parens();
664        if self.current_token.kind != TokenKind::RightParen {
665            todo!()
666        }
667        self.next_token();
668        let statements = self.parse_control_statement_body();
669        Statement::While {
670            condition,
671            statements,
672        }
673    }
674
675    fn parse_do_statement(&mut self) -> Statement<'a> {
676        self.next_token();
677        let statements = self.parse_control_statement_body();
678
679        while self.current_token.kind == TokenKind::NewLine
680            || self.current_token.kind == TokenKind::Semicolon
681        {
682            self.next_token();
683        }
684
685        if self.current_token.kind != TokenKind::While {
686            todo!()
687        }
688        self.next_token();
689        if self.current_token.kind != TokenKind::LeftParen {
690            todo!()
691        }
692        self.next_token_in_regex_context();
693        let condition = self.parse_condition_in_parens();
694        if self.current_token.kind != TokenKind::RightParen {
695            todo!()
696        }
697        self.next_token();
698        Statement::DoWhile {
699            condition,
700            statements,
701        }
702    }
703
704    fn parse_for_statement(&mut self) -> Statement<'a> {
705        self.next_token();
706        if self.current_token.kind != TokenKind::LeftParen {
707            todo!()
708        }
709        self.next_token();
710        while self.current_token.kind == TokenKind::NewLine {
711            self.next_token();
712        }
713
714        let init = if self.current_token.kind == TokenKind::Semicolon {
715            Statement::Empty
716        } else if self.current_token.kind == TokenKind::Identifier {
717            let variable = self.current_token.clone();
718            self.next_token();
719            if self.current_token.kind == TokenKind::In {
720                self.next_token();
721                if self.current_token.kind != TokenKind::Identifier {
722                    todo!()
723                }
724                let array = self.current_token.literal;
725                self.next_token();
726                if self.current_token.kind != TokenKind::RightParen {
727                    todo!()
728                }
729                self.next_token();
730                let statements = self.parse_control_statement_body();
731                return Statement::ForIn {
732                    variable: variable.literal,
733                    array,
734                    statements,
735                };
736            }
737            self.parse_assignment_statement_with_identifier(variable)
738        } else {
739            self.parse_statement()
740        };
741        while self.current_token.kind == TokenKind::NewLine {
742            self.next_token();
743        }
744        if self.current_token.kind != TokenKind::Semicolon {
745            todo!()
746        }
747        self.next_token_in_regex_context();
748        while self.current_token.kind == TokenKind::NewLine {
749            self.next_token_in_regex_context();
750        }
751
752        let condition = if self.current_token.kind == TokenKind::Semicolon {
753            Expression::Number(1.0)
754        } else {
755            self.parse_expression()
756        };
757        while self.current_token.kind == TokenKind::NewLine {
758            self.next_token();
759        }
760        if self.current_token.kind != TokenKind::Semicolon {
761            todo!()
762        }
763        self.next_token_in_regex_context();
764        while self.current_token.kind == TokenKind::NewLine {
765            self.next_token_in_regex_context();
766        }
767
768        let update = if self.current_token.kind == TokenKind::RightParen {
769            Statement::Empty
770        } else {
771            self.parse_statement()
772        };
773        while self.current_token.kind == TokenKind::NewLine {
774            self.next_token();
775        }
776        if self.current_token.kind != TokenKind::RightParen {
777            todo!()
778        }
779        self.next_token();
780        let statements = self.parse_control_statement_body();
781
782        Statement::For {
783            init: Box::new(init),
784            condition,
785            update: Box::new(update),
786            statements,
787        }
788    }
789
790    fn parse_print_function(&mut self) -> Statement<'a> {
791        let mut expressions = Vec::new();
792        let mut expect_more = false;
793        self.next_token();
794
795        loop {
796            if self.current_token.kind == TokenKind::RightCurlyBrace
797                || self.current_token.kind == TokenKind::RightParen
798                || self.current_token.kind == TokenKind::Eof
799                || self.current_token.kind == TokenKind::GreaterThan
800                || self.current_token.kind == TokenKind::Append
801                || self.current_token.kind == TokenKind::Pipe
802            {
803                break;
804            }
805
806            if self.current_token.kind == TokenKind::NewLine
807                || self.current_token.kind == TokenKind::Semicolon
808            {
809                if expect_more {
810                    self.next_token();
811                    continue;
812                }
813                break;
814            }
815
816            if self.current_token.kind == TokenKind::Comma {
817                self.next_token();
818                expect_more = true;
819                continue;
820            }
821
822            let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
823            let expression = self.parse_expression();
824            expressions.push(expression);
825            if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
826                while self.current_token.kind == TokenKind::Comma {
827                    self.next_token();
828                    expressions.push(self.parse_expression());
829                }
830                if self.current_token.kind != TokenKind::RightParen {
831                    todo!()
832                }
833                self.next_token();
834            }
835            expect_more = false;
836        }
837        if self.current_token.kind == TokenKind::RightParen {
838            self.next_token();
839        }
840
841        if self.current_token.kind == TokenKind::GreaterThan
842            || self.current_token.kind == TokenKind::Append
843        {
844            let append = self.current_token.kind == TokenKind::Append;
845            self.next_token();
846            let target = self.parse_expression();
847            return Statement::PrintRedirect {
848                expressions,
849                target,
850                append,
851            };
852        }
853        if self.current_token.kind == TokenKind::Pipe {
854            self.next_token();
855            let target = self.parse_expression();
856            return Statement::PrintPipe {
857                expressions,
858                target,
859            };
860        }
861
862        Statement::Print(expressions)
863    }
864
865    fn parse_printf_function(&mut self) -> Statement<'a> {
866        self.next_token();
867        let expressions = if self.current_token.kind == TokenKind::LeftParen {
868            self.next_token_in_regex_context();
869            let mut expressions = Vec::new();
870            while self.current_token.kind != TokenKind::RightParen
871                && self.current_token.kind != TokenKind::Eof
872            {
873                if self.current_token.kind == TokenKind::Comma {
874                    self.next_token();
875                    continue;
876                }
877                expressions.push(self.parse_expression());
878            }
879            if self.current_token.kind == TokenKind::RightParen {
880                self.next_token();
881            }
882            expressions
883        } else {
884            self.parse_expression_list_until_action_end_from_current()
885        };
886
887        Statement::Printf(expressions)
888    }
889
890    fn parse_gsub_function(&mut self) -> Statement<'a> {
891        self.next_token();
892        if self.current_token.kind != TokenKind::LeftParen {
893            todo!()
894        }
895
896        self.next_token_in_regex_context();
897        let pattern = self.parse_expression();
898
899        if self.current_token.kind != TokenKind::Comma {
900            todo!()
901        }
902        self.next_token();
903        let replacement = self.parse_expression();
904
905        let target = if self.current_token.kind == TokenKind::Comma {
906            self.next_token();
907            Some(self.parse_expression())
908        } else {
909            None
910        };
911
912        if self.current_token.kind != TokenKind::RightParen {
913            todo!()
914        }
915        self.next_token();
916
917        Statement::Gsub {
918            pattern,
919            replacement,
920            target,
921        }
922    }
923
924    fn parse_sub_function(&mut self) -> Statement<'a> {
925        self.next_token();
926        if self.current_token.kind != TokenKind::LeftParen {
927            todo!()
928        }
929
930        self.next_token_in_regex_context();
931        let pattern = self.parse_expression();
932
933        if self.current_token.kind != TokenKind::Comma {
934            todo!()
935        }
936        self.next_token();
937        let replacement = self.parse_expression();
938
939        if self.current_token.kind == TokenKind::Comma {
940            todo!()
941        }
942
943        if self.current_token.kind != TokenKind::RightParen {
944            todo!()
945        }
946        self.next_token();
947
948        Statement::Sub {
949            pattern,
950            replacement,
951        }
952    }
953
954    fn parse_system_function(&mut self) -> Statement<'a> {
955        self.next_token();
956        if self.current_token.kind != TokenKind::LeftParen {
957            todo!()
958        }
959        self.next_token();
960        let command = self.parse_expression();
961        if self.current_token.kind != TokenKind::RightParen {
962            todo!()
963        }
964        self.next_token();
965        Statement::System(command)
966    }
967
968    fn parse_expression_list_until_action_end_from_current(&mut self) -> Vec<Expression<'a>> {
969        let mut expressions = Vec::new();
970        let mut expect_more = false;
971
972        loop {
973            if self.current_token.kind == TokenKind::RightCurlyBrace
974                || self.current_token.kind == TokenKind::RightParen
975                || self.current_token.kind == TokenKind::Eof
976            {
977                break;
978            }
979
980            if self.current_token.kind == TokenKind::NewLine
981                || self.current_token.kind == TokenKind::Semicolon
982            {
983                if expect_more {
984                    self.next_token();
985                    continue;
986                }
987                break;
988            }
989
990            if self.current_token.kind == TokenKind::Comma {
991                self.next_token();
992                expect_more = true;
993                continue;
994            }
995
996            let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
997            let expression = self.parse_expression();
998            expressions.push(expression);
999            if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
1000                while self.current_token.kind == TokenKind::Comma {
1001                    self.next_token();
1002                    expressions.push(self.parse_expression());
1003                }
1004                if self.current_token.kind != TokenKind::RightParen {
1005                    todo!()
1006                }
1007                self.next_token();
1008            }
1009            expect_more = false;
1010        }
1011
1012        if self.current_token.kind == TokenKind::RightParen {
1013            self.next_token();
1014        }
1015
1016        expressions
1017    }
1018
1019    fn parse_expression(&mut self) -> Expression<'a> {
1020        self.parse_expression_with_min_precedence(0)
1021    }
1022
1023    fn parse_expression_with_min_precedence(&mut self, min_precedence: u8) -> Expression<'a> {
1024        let left = self.parse_primary_expression();
1025        self.parse_expression_suffix(left, min_precedence)
1026    }
1027
1028    fn parse_expression_suffix(
1029        &mut self,
1030        mut left: Expression<'a>,
1031        min_precedence: u8,
1032    ) -> Expression<'a> {
1033        const CONCAT_LEFT_PRECEDENCE: u8 = 6;
1034        const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
1035
1036        loop {
1037            if self.current_token.kind == TokenKind::QuestionMark {
1038                if min_precedence > 0 {
1039                    break;
1040                }
1041                self.next_token_in_regex_context();
1042                let then_expr = self.parse_expression_with_min_precedence(0);
1043                if self.current_token.kind != TokenKind::Colon {
1044                    todo!()
1045                }
1046                self.next_token_in_regex_context();
1047                let else_expr = self.parse_expression_with_min_precedence(0);
1048                left = Expression::Ternary {
1049                    condition: Box::new(left),
1050                    then_expr: Box::new(then_expr),
1051                    else_expr: Box::new(else_expr),
1052                };
1053                continue;
1054            }
1055
1056            if infix_operator_precedence(&self.current_token.kind).is_none()
1057                && is_expression_start(&self.current_token.kind)
1058            {
1059                if CONCAT_LEFT_PRECEDENCE < min_precedence {
1060                    break;
1061                }
1062
1063                let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE);
1064                left = Expression::Concatenation {
1065                    left: Box::new(left),
1066                    right: Box::new(right),
1067                };
1068                continue;
1069            }
1070
1071            let (left_precedence, right_precedence) =
1072                match infix_operator_precedence(&self.current_token.kind) {
1073                    Some(value) => value,
1074                    None => break,
1075                };
1076
1077            if left_precedence < min_precedence {
1078                break;
1079            }
1080
1081            let operator = self.current_token.clone();
1082            if matches!(
1083                operator.kind,
1084                TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
1085            ) {
1086                self.next_token_in_regex_context();
1087            } else {
1088                self.next_token();
1089            }
1090            let right = self.parse_expression_with_min_precedence(right_precedence);
1091
1092            left = Expression::Infix {
1093                left: Box::new(left),
1094                operator,
1095                right: Box::new(right),
1096            };
1097        }
1098
1099        left
1100    }
1101
1102    fn parse_condition_in_parens(&mut self) -> Expression<'a> {
1103        let mut condition = self.parse_expression();
1104        if self.current_token.kind == TokenKind::Comma {
1105            while self.current_token.kind == TokenKind::Comma {
1106                let operator = self.current_token.clone();
1107                self.next_token_in_regex_context();
1108                let right = self.parse_expression();
1109                condition = Expression::Infix {
1110                    left: Box::new(condition),
1111                    operator,
1112                    right: Box::new(right),
1113                };
1114            }
1115            if self.current_token.kind != TokenKind::RightParen {
1116                todo!()
1117            }
1118            self.next_token();
1119            condition = self.parse_expression_suffix(condition, 0);
1120        }
1121        condition
1122    }
1123
1124    fn parse_primary_expression(&mut self) -> Expression<'a> {
1125        if self.current_token.kind == TokenKind::Minus {
1126            let operator = self.current_token.clone();
1127            self.next_token();
1128            let right = self.parse_primary_expression();
1129            return Expression::Infix {
1130                left: Box::new(Expression::Number(0.0)),
1131                operator,
1132                right: Box::new(right),
1133            };
1134        }
1135        if self.current_token.kind == TokenKind::Plus {
1136            self.next_token();
1137            return self.parse_primary_expression();
1138        }
1139        if self.current_token.kind == TokenKind::ExclamationMark {
1140            self.next_token_in_regex_context();
1141            let expression = self.parse_primary_expression();
1142            return Expression::Not(Box::new(expression));
1143        }
1144        if self.current_token.kind == TokenKind::Increment {
1145            self.next_token();
1146            let expression = self.parse_primary_expression();
1147            return Expression::PreIncrement(Box::new(expression));
1148        }
1149        if self.current_token.kind == TokenKind::Decrement {
1150            self.next_token();
1151            let expression = self.parse_primary_expression();
1152            return Expression::PreDecrement(Box::new(expression));
1153        }
1154
1155        let mut expression = self.parse_primary_atom();
1156        if self.current_token.kind == TokenKind::Increment {
1157            self.next_token();
1158            expression = Expression::PostIncrement(Box::new(expression));
1159        } else if self.current_token.kind == TokenKind::Decrement {
1160            self.next_token();
1161            expression = Expression::PostDecrement(Box::new(expression));
1162        }
1163        expression
1164    }
1165
1166    fn parse_primary_atom(&mut self) -> Expression<'a> {
1167        match self.current_token.kind {
1168            TokenKind::String => {
1169                let expression = Expression::String(self.current_token.literal);
1170                self.next_token();
1171                expression
1172            }
1173            TokenKind::Regex => {
1174                let expression = Expression::Regex(self.current_token.literal);
1175                self.next_token();
1176                expression
1177            }
1178            TokenKind::Number => {
1179                let expression = self
1180                    .parse_number_expression()
1181                    .unwrap_or_else(|| panic!("failed to parse numeric literal: {}", self.current_token.literal));
1182                self.next_token();
1183                expression
1184            }
1185            TokenKind::DollarSign => {
1186                self.next_token();
1187                let expression = self.parse_primary_atom();
1188                Expression::Field(Box::new(expression))
1189            }
1190            TokenKind::LeftParen => {
1191                self.next_token();
1192                let expression = self.parse_expression();
1193                if self.current_token.kind == TokenKind::RightParen {
1194                    self.next_token();
1195                }
1196                expression
1197            }
1198            TokenKind::Identifier => {
1199                let identifier = self.current_token.clone();
1200                self.next_token();
1201                if self.current_token.kind == TokenKind::LeftParen
1202                    && self.token_is_immediately_after(&identifier)
1203                {
1204                    let args = self.parse_call_arguments();
1205                    return Expression::FunctionCall {
1206                        name: identifier.literal,
1207                        args,
1208                    };
1209                }
1210                if self.current_token.kind == TokenKind::LeftSquareBracket {
1211                    self.next_token_in_regex_context();
1212                    let index = self.parse_array_index_expression();
1213                    if self.current_token.kind != TokenKind::RightSquareBracket {
1214                        todo!()
1215                    }
1216                    self.next_token();
1217                    Expression::ArrayAccess {
1218                        identifier: identifier.literal,
1219                        index: Box::new(index),
1220                    }
1221                } else {
1222                    Expression::Identifier(identifier.literal)
1223                }
1224            }
1225            TokenKind::Length => {
1226                self.next_token();
1227                if self.current_token.kind == TokenKind::LeftParen {
1228                    self.next_token();
1229                    if self.current_token.kind == TokenKind::RightParen {
1230                        self.next_token();
1231                        Expression::Length(None)
1232                    } else {
1233                        let expression = self.parse_expression();
1234                        if self.current_token.kind != TokenKind::RightParen {
1235                            todo!()
1236                        }
1237                        self.next_token();
1238                        Expression::Length(Some(Box::new(expression)))
1239                    }
1240                } else {
1241                    Expression::Length(None)
1242                }
1243            }
1244            TokenKind::Substr => {
1245                self.next_token();
1246                if self.current_token.kind != TokenKind::LeftParen {
1247                    todo!()
1248                }
1249                self.next_token();
1250                let string = self.parse_expression();
1251                if self.current_token.kind != TokenKind::Comma {
1252                    todo!()
1253                }
1254                self.next_token();
1255                let start = self.parse_expression();
1256                let mut length = None;
1257                if self.current_token.kind == TokenKind::Comma {
1258                    self.next_token();
1259                    length = Some(Box::new(self.parse_expression()));
1260                }
1261                if self.current_token.kind != TokenKind::RightParen {
1262                    todo!()
1263                }
1264                self.next_token();
1265                Expression::Substr {
1266                    string: Box::new(string),
1267                    start: Box::new(start),
1268                    length,
1269                }
1270            }
1271            TokenKind::Rand => {
1272                self.next_token();
1273                if self.current_token.kind == TokenKind::LeftParen {
1274                    self.next_token();
1275                    if self.current_token.kind != TokenKind::RightParen {
1276                        todo!()
1277                    }
1278                    self.next_token();
1279                }
1280                Expression::Rand
1281            }
1282            TokenKind::Close
1283            | TokenKind::Cos
1284            | TokenKind::Exp
1285            | TokenKind::Index
1286            | TokenKind::Int
1287            | TokenKind::Log
1288            | TokenKind::Match
1289            | TokenKind::Sin
1290            | TokenKind::Sprintf
1291            | TokenKind::Split
1292            | TokenKind::Sqrt
1293            | TokenKind::Srand => {
1294                let name = self.current_token.literal;
1295                self.next_token();
1296                if self.current_token.kind == TokenKind::LeftParen {
1297                    let args = self.parse_call_arguments();
1298                    return Expression::FunctionCall { name, args };
1299                }
1300                Expression::Number(0.0)
1301            }
1302            _ => {
1303                panic!(
1304                    "parse_primary_expression not yet implemented, found token: {:?}",
1305                    self.current_token
1306                )
1307            }
1308        }
1309    }
1310
1311    pub fn parse_program(&mut self) -> Program<'_> {
1312        let mut program = Program::new();
1313
1314        while !self.is_eof() {
1315            match self.parse_next_rule() {
1316                Some(Rule::Begin(action)) => program.add_begin_block(action),
1317                Some(Rule::End(action)) => program.add_end_block(action),
1318                Some(rule) => program.add_rule(rule),
1319                None => {}
1320            }
1321            self.next_token_in_regex_context();
1322        }
1323
1324        for definition in self.function_definitions.drain(..) {
1325            program.add_function_definition(definition);
1326        }
1327
1328        program
1329    }
1330
1331    fn parse_call_arguments(&mut self) -> Vec<Expression<'a>> {
1332        if self.current_token.kind != TokenKind::LeftParen {
1333            return vec![];
1334        }
1335        self.next_token_in_regex_context();
1336        let mut args = Vec::new();
1337        while self.current_token.kind != TokenKind::RightParen
1338            && self.current_token.kind != TokenKind::Eof
1339        {
1340            if self.current_token.kind == TokenKind::Comma {
1341                self.next_token();
1342                continue;
1343            }
1344            args.push(self.parse_expression());
1345        }
1346        if self.current_token.kind == TokenKind::RightParen {
1347            self.next_token();
1348        }
1349        args
1350    }
1351}
1352
1353fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
1354    match kind {
1355        TokenKind::Assign
1356        | TokenKind::AddAssign
1357        | TokenKind::SubtractAssign
1358        | TokenKind::MultiplyAssign
1359        | TokenKind::DivideAssign
1360        | TokenKind::ModuloAssign
1361        | TokenKind::PowerAssign => Some((0, 0)),
1362        TokenKind::Or => Some((1, 2)),
1363        TokenKind::And => Some((3, 4)),
1364        TokenKind::Equal
1365        | TokenKind::NotEqual
1366        | TokenKind::GreaterThan
1367        | TokenKind::GreaterThanOrEqual
1368        | TokenKind::In
1369        | TokenKind::LessThan
1370        | TokenKind::LessThanOrEqual
1371        | TokenKind::Tilde
1372        | TokenKind::NoMatch => Some((5, 6)),
1373        TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
1374        TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
1375        TokenKind::Caret => Some((13, 12)),
1376        _ => None,
1377    }
1378}
1379
1380fn is_expression_start(kind: &TokenKind) -> bool {
1381    matches!(
1382        kind,
1383        TokenKind::String
1384            | TokenKind::Regex
1385            | TokenKind::Number
1386            | TokenKind::DollarSign
1387            | TokenKind::LeftParen
1388            | TokenKind::Identifier
1389            | TokenKind::Cos
1390            | TokenKind::Exp
1391            | TokenKind::Index
1392            | TokenKind::Int
1393            | TokenKind::Length
1394            | TokenKind::Log
1395            | TokenKind::Match
1396            | TokenKind::Rand
1397            | TokenKind::Sin
1398            | TokenKind::Sprintf
1399            | TokenKind::Split
1400            | TokenKind::Sqrt
1401            | TokenKind::Srand
1402            | TokenKind::Substr
1403            | TokenKind::Increment
1404            | TokenKind::Decrement
1405    )
1406}
1407
1408fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
1409    let (kind, literal) = match token.kind {
1410        TokenKind::AddAssign => (TokenKind::Plus, "+"),
1411        TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
1412        TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
1413        TokenKind::DivideAssign => (TokenKind::Division, "/"),
1414        TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
1415        TokenKind::PowerAssign => (TokenKind::Caret, "^"),
1416        _ => todo!(),
1417    };
1418
1419    Token::new(kind, literal, token.span.start)
1420}
1421
1422#[cfg(test)]
1423mod tests {
1424    use super::*;
1425
1426    #[test]
1427    fn create_parser() {
1428        let mut parser = Parser::new(Lexer::new("42 == 42"));
1429
1430        assert_eq!(parser.current_token.literal, "42");
1431        parser.next_token();
1432        assert_eq!(parser.current_token.literal, "==");
1433    }
1434
1435    #[test]
1436    fn parse_empty_program() {
1437        let mut parser = Parser::new(Lexer::new(""));
1438
1439        let program = parser.parse_program();
1440
1441        assert_eq!(program.len(), 0);
1442    }
1443
1444    #[test]
1445    fn parse_action_without_pattern() {
1446        let mut parser = Parser::new(Lexer::new("{ print }"));
1447
1448        let program = parser.parse_program();
1449
1450        assert_eq!(program.len(), 1);
1451        assert_eq!("{ print }", program.to_string());
1452    }
1453
1454    #[test]
1455    fn parse_action_with_leading_newlines() {
1456        let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
1457
1458        let program = parser.parse_program();
1459
1460        assert_eq!(program.len(), 1);
1461        assert_eq!("{ print }", program.to_string());
1462    }
1463
1464    #[test]
1465    fn parse_begin_block() {
1466        let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
1467
1468        let program = parser.parse_program();
1469
1470        assert_eq!(program.len(), 1);
1471        assert_eq!("BEGIN { print }", program.to_string());
1472    }
1473
1474    #[test]
1475    fn parse_end_block() {
1476        let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1477
1478        let program = parser.parse_program();
1479
1480        assert_eq!(program.len(), 1);
1481        assert_eq!("END { print 42 }", program.to_string());
1482    }
1483
1484    #[test]
1485    fn parse_regex_pattern_action() {
1486        let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1487
1488        let program = parser.parse_program();
1489
1490        assert_eq!(program.len(), 1);
1491        assert_eq!("/foo/ { print }", program.to_string());
1492    }
1493
1494    #[test]
1495    fn parse_print_infix_expression() {
1496        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1497
1498        let program = parser.parse_program();
1499        let mut begin_blocks = program.begin_blocks_iter();
1500        let Action { statements } = begin_blocks.next().expect("expected begin block");
1501
1502        let exprs = match &statements[0] {
1503            Statement::Print(expressions) => expressions,
1504            _ => panic!("expected print statement"),
1505        };
1506
1507        match &exprs[0] {
1508            Expression::Infix {
1509                left,
1510                operator,
1511                right,
1512            } => {
1513                assert!(matches!(**left, Expression::Number(1.0)));
1514                assert_eq!(operator.kind, TokenKind::Plus);
1515                assert!(matches!(**right, Expression::Number(2.0)));
1516            }
1517            _ => panic!("expected infix expression"),
1518        }
1519    }
1520
1521    #[test]
1522    fn parse_print_parenthesized_expression() {
1523        let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1524
1525        let program = parser.parse_program();
1526        let mut begin_blocks = program.begin_blocks_iter();
1527        let Action { statements } = begin_blocks.next().expect("expected begin block");
1528
1529        let exprs = match &statements[0] {
1530            Statement::Print(expressions) => expressions,
1531            _ => panic!("expected print statement"),
1532        };
1533
1534        match &exprs[0] {
1535            Expression::Infix {
1536                left,
1537                operator,
1538                right,
1539            } => {
1540                assert_eq!(operator.kind, TokenKind::Asterisk);
1541                assert!(matches!(**right, Expression::Number(3.0)));
1542                assert!(matches!(**left, Expression::Infix { .. }));
1543            }
1544            _ => panic!("expected infix expression"),
1545        }
1546    }
1547
1548    #[test]
1549    fn parse_print_multiplication_has_higher_precedence_than_addition() {
1550        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1551
1552        let program = parser.parse_program();
1553        let mut begin_blocks = program.begin_blocks_iter();
1554        let Action { statements } = begin_blocks.next().expect("expected begin block");
1555
1556        let exprs = match &statements[0] {
1557            Statement::Print(expressions) => expressions,
1558            _ => panic!("expected print statement"),
1559        };
1560
1561        match &exprs[0] {
1562            Expression::Infix {
1563                left,
1564                operator,
1565                right,
1566            } => {
1567                assert_eq!(operator.kind, TokenKind::Plus);
1568                assert!(matches!(**left, Expression::Number(1.0)));
1569                match &**right {
1570                    Expression::Infix {
1571                        operator: right_op, ..
1572                    } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1573                    _ => panic!("expected nested infix expression"),
1574                }
1575            }
1576            _ => panic!("expected infix expression"),
1577        }
1578    }
1579
1580    #[test]
1581    fn parse_print_power_is_right_associative() {
1582        let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1583
1584        let program = parser.parse_program();
1585        let mut begin_blocks = program.begin_blocks_iter();
1586        let Action { statements } = begin_blocks.next().expect("expected begin block");
1587
1588        let exprs = match &statements[0] {
1589            Statement::Print(expressions) => expressions,
1590            _ => panic!("expected print statement"),
1591        };
1592
1593        match &exprs[0] {
1594            Expression::Infix {
1595                left,
1596                operator,
1597                right,
1598            } => {
1599                assert_eq!(operator.kind, TokenKind::Caret);
1600                assert!(matches!(**left, Expression::Number(2.0)));
1601                match &**right {
1602                    Expression::Infix {
1603                        operator: right_op, ..
1604                    } => assert_eq!(right_op.kind, TokenKind::Caret),
1605                    _ => panic!("expected nested infix expression"),
1606                }
1607            }
1608            _ => panic!("expected infix expression"),
1609        }
1610    }
1611
1612    #[test]
1613    fn parse_print_minus_is_left_associative() {
1614        let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1615
1616        let program = parser.parse_program();
1617        let mut begin_blocks = program.begin_blocks_iter();
1618        let Action { statements } = begin_blocks.next().expect("expected begin block");
1619
1620        let exprs = match &statements[0] {
1621            Statement::Print(expressions) => expressions,
1622            _ => panic!("expected print statement"),
1623        };
1624
1625        match &exprs[0] {
1626            Expression::Infix {
1627                left,
1628                operator,
1629                right,
1630            } => {
1631                assert_eq!(operator.kind, TokenKind::Minus);
1632                match &**left {
1633                    Expression::Infix {
1634                        operator: left_op, ..
1635                    } => assert_eq!(left_op.kind, TokenKind::Minus),
1636                    _ => panic!("expected nested infix expression"),
1637                }
1638                assert!(matches!(**right, Expression::Number(1.0)));
1639            }
1640            _ => panic!("expected infix expression"),
1641        }
1642    }
1643
1644    #[test]
1645    fn parse_print_concatenation() {
1646        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
1647
1648        let program = parser.parse_program();
1649        let mut begin_blocks = program.begin_blocks_iter();
1650        let Action { statements } = begin_blocks.next().expect("expected begin block");
1651
1652        let exprs = match &statements[0] {
1653            Statement::Print(expressions) => expressions,
1654            _ => panic!("expected print statement"),
1655        };
1656
1657        assert_eq!(exprs.len(), 1);
1658        match &exprs[0] {
1659            Expression::Concatenation { left, right } => {
1660                assert!(matches!(**left, Expression::String("Value:")));
1661                assert!(matches!(**right, Expression::Number(42.0)));
1662            }
1663            _ => panic!("expected concatenation expression"),
1664        }
1665    }
1666
1667    #[test]
1668    fn parse_continue_statement() {
1669        let mut parser = Parser::new(Lexer::new(r#"{ continue }"#));
1670
1671        let program = parser.parse_program();
1672        let mut rules = program.rules_iter();
1673        let rule = rules.next().expect("expected rule");
1674
1675        let statements = match rule {
1676            Rule::Action(Action { statements }) => statements,
1677            _ => panic!("expected action rule"),
1678        };
1679
1680        assert!(matches!(statements[0], Statement::Continue));
1681    }
1682
1683    #[test]
1684    fn parse_identifier_followed_by_spaced_parentheses_as_concatenation() {
1685        let mut parser = Parser::new(Lexer::new(r#"{ x = $1; print x (++i) }"#));
1686
1687        let program = parser.parse_program();
1688        let mut rules = program.rules_iter();
1689        let rule = rules.next().expect("expected rule");
1690
1691        let statements = match rule {
1692            Rule::Action(Action { statements }) => statements,
1693            _ => panic!("expected action rule"),
1694        };
1695
1696        let exprs = match &statements[1] {
1697            Statement::Print(expressions) => expressions,
1698            _ => panic!("expected print statement"),
1699        };
1700
1701        assert_eq!(exprs.len(), 1);
1702        match &exprs[0] {
1703            Expression::Concatenation { left, right } => {
1704                assert!(matches!(**left, Expression::Identifier("x")));
1705                assert!(matches!(**right, Expression::PreIncrement(_)));
1706            }
1707            _ => panic!("expected concatenation expression"),
1708        }
1709    }
1710
1711    #[test]
1712    fn parse_print_field_expression() {
1713        let mut parser = Parser::new(Lexer::new("{ print $1 }"));
1714
1715        let program = parser.parse_program();
1716        let mut rules = program.rules_iter();
1717        let rule = rules.next().expect("expected rule");
1718
1719        let statements = match rule {
1720            Rule::Action(Action { statements }) => statements,
1721            _ => panic!("expected action rule"),
1722        };
1723
1724        let exprs = match &statements[0] {
1725            Statement::Print(expressions) => expressions,
1726            _ => panic!("expected print statement"),
1727        };
1728
1729        match &exprs[0] {
1730            Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
1731            _ => panic!("expected field expression"),
1732        }
1733    }
1734
1735    #[test]
1736    fn parse_print_with_commas() {
1737        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
1738
1739        let program = parser.parse_program();
1740
1741        assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
1742    }
1743
1744    #[test]
1745    fn parse_number_of_fields_identifier() {
1746        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
1747
1748        let program = parser.parse_program();
1749
1750        assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
1751    }
1752
1753    #[test]
1754    fn parse_printf_with_format_and_arguments() {
1755        let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
1756
1757        let program = parser.parse_program();
1758
1759        assert_eq!(
1760            r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
1761            program.to_string()
1762        );
1763    }
1764
1765    #[test]
1766    fn parse_add_assignment_and_pre_increment() {
1767        let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
1768
1769        let program = parser.parse_program();
1770
1771        assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
1772    }
1773
1774    #[test]
1775    fn parse_regex_match_pattern_action() {
1776        let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
1777
1778        let program = parser.parse_program();
1779
1780        assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
1781    }
1782
1783    #[test]
1784    fn parse_not_pattern_action() {
1785        let mut parser = Parser::new(Lexer::new(r#"!($1 < 2000) { print $1 }"#));
1786
1787        let program = parser.parse_program();
1788        let mut rules = program.rules_iter();
1789        let rule = rules.next().expect("expected rule");
1790
1791        match rule {
1792            Rule::PatternAction {
1793                pattern: Some(Expression::Not(inner)),
1794                action: Some(Action { statements }),
1795            } => {
1796                assert!(matches!(**inner, Expression::Infix { .. }));
1797                assert!(matches!(statements[0], Statement::Print(_)));
1798            }
1799            _ => panic!("expected negated pattern action"),
1800        }
1801    }
1802
1803    #[test]
1804    fn parse_print_with_line_continuation_after_comma() {
1805        let mut parser = Parser::new(Lexer::new(
1806            "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
1807        ));
1808
1809        let program = parser.parse_program();
1810
1811        assert_eq!(
1812            "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
1813            program.to_string()
1814        );
1815    }
1816
1817    #[test]
1818    fn parse_gsub_statement() {
1819        let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
1820
1821        let program = parser.parse_program();
1822
1823        assert_eq!(
1824            r#"{ gsub(/USA/, "United States"); print }"#,
1825            program.to_string()
1826        );
1827    }
1828
1829    #[test]
1830    fn parse_gsub_statement_with_target() {
1831        let mut parser = Parser::new(Lexer::new(r#"{ gsub(/[ \t]+/, "", t) }"#));
1832
1833        let program = parser.parse_program();
1834
1835        assert_eq!(r#"{ gsub(/[ \t]+/, "", t) }"#, program.to_string());
1836    }
1837
1838    #[test]
1839    fn parse_system_statement() {
1840        let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
1841
1842        let program = parser.parse_program();
1843
1844        assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
1845    }
1846
1847    #[test]
1848    fn parse_print_length_builtin_expression() {
1849        let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
1850
1851        let program = parser.parse_program();
1852
1853        assert_eq!(r#"{ print length, $0 }"#, program.to_string());
1854    }
1855
1856    #[test]
1857    fn parse_length_expression_as_rule_pattern() {
1858        let mut parser = Parser::new(Lexer::new(
1859            r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1860        ));
1861
1862        let program = parser.parse_program();
1863
1864        assert_eq!(
1865            r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1866            program.to_string()
1867        );
1868    }
1869
1870    #[test]
1871    fn parse_field_assignment_with_substr() {
1872        let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
1873
1874        let program = parser.parse_program();
1875
1876        assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
1877    }
1878
1879    #[test]
1880    fn parse_assignment_with_concatenation_and_substr() {
1881        let mut parser = Parser::new(Lexer::new(r#"{ s = s " " substr($1, 1, 3) }"#));
1882
1883        let program = parser.parse_program();
1884
1885        assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
1886    }
1887
1888    #[test]
1889    fn parse_field_divide_assignment() {
1890        let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
1891
1892        let program = parser.parse_program();
1893
1894        assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
1895    }
1896
1897    #[test]
1898    fn parse_chained_assignment() {
1899        let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
1900
1901        let program = parser.parse_program();
1902
1903        assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
1904    }
1905
1906    #[test]
1907    fn parse_if_statement_with_block() {
1908        let mut parser = Parser::new(Lexer::new(
1909            r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1910        ));
1911
1912        let program = parser.parse_program();
1913
1914        assert_eq!(
1915            r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
1916            program.to_string()
1917        );
1918    }
1919
1920    #[test]
1921    fn parse_while_with_post_increment() {
1922        let mut parser = Parser::new(Lexer::new(
1923            r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1924        ));
1925
1926        let program = parser.parse_program();
1927
1928        assert_eq!(
1929            r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
1930            program.to_string()
1931        );
1932    }
1933
1934    #[test]
1935    fn parse_while_with_single_body_statement() {
1936        let mut parser = Parser::new(Lexer::new(r#"{ while (n > 1) print n }"#));
1937
1938        let program = parser.parse_program();
1939
1940        assert_eq!(r#"{ while (n > 1) { print n } }"#, program.to_string());
1941    }
1942
1943    #[test]
1944    fn parse_do_while_with_post_increment() {
1945        let mut parser = Parser::new(Lexer::new(
1946            r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1947        ));
1948
1949        let program = parser.parse_program();
1950
1951        assert_eq!(
1952            r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
1953            program.to_string()
1954        );
1955    }
1956
1957    #[test]
1958    fn parse_for_with_empty_body_statement() {
1959        let mut parser = Parser::new(Lexer::new(
1960            r#"{ for (i = 1; i <= NF; s += $(i++)) ; print s }"#,
1961        ));
1962
1963        let program = parser.parse_program();
1964
1965        assert_eq!(
1966            r#"{ for (i = 1; i <= NF; s += $i++) {  }; print s }"#,
1967            program.to_string()
1968        );
1969    }
1970
1971    #[test]
1972    fn parse_post_decrement_statement() {
1973        let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
1974
1975        let program = parser.parse_program();
1976
1977        assert_eq!(r#"{ k--; n-- }"#, program.to_string());
1978    }
1979
1980    #[test]
1981    fn parse_rand_expression() {
1982        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
1983
1984        let program = parser.parse_program();
1985
1986        assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
1987    }
1988
1989    #[test]
1990    fn parse_math_builtin_expressions() {
1991        let mut parser = Parser::new(Lexer::new(
1992            r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1993        ));
1994
1995        let program = parser.parse_program();
1996
1997        assert_eq!(
1998            r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
1999            program.to_string()
2000        );
2001    }
2002
2003    #[test]
2004    fn parse_index_builtin_expression() {
2005        let mut parser = Parser::new(Lexer::new(r#"{ print index(1, $1) }"#));
2006
2007        let program = parser.parse_program();
2008
2009        assert_eq!(r#"{ print index(1, $1) }"#, program.to_string());
2010    }
2011
2012    #[test]
2013    fn parse_match_builtin_expression() {
2014        let mut parser = Parser::new(Lexer::new(r#"{ print match($NF, $1), RSTART, RLENGTH }"#));
2015
2016        let program = parser.parse_program();
2017
2018        assert_eq!(
2019            r#"{ print match($NF, $1), RSTART, RLENGTH }"#,
2020            program.to_string()
2021        );
2022    }
2023
2024    #[test]
2025    fn parse_in_membership_expression() {
2026        let mut parser = Parser::new(Lexer::new(r#"{ print 1 in x }"#));
2027
2028        let program = parser.parse_program();
2029
2030        assert_eq!(r#"{ print 1 in x }"#, program.to_string());
2031    }
2032
2033    #[test]
2034    fn parse_parenthesized_composite_membership_expression() {
2035        let mut parser = Parser::new(Lexer::new(r#"{ if (($0, $1) in x) print "yes" }"#));
2036
2037        let program = parser.parse_program();
2038
2039        assert_eq!(
2040            r#"{ if ($0, $1 in x) { print "yes" } }"#,
2041            program.to_string()
2042        );
2043    }
2044
2045    #[test]
2046    fn parse_for_loop_with_single_body_statement() {
2047        let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; i++) print $i }"#));
2048
2049        let program = parser.parse_program();
2050
2051        assert_eq!(
2052            r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
2053            program.to_string()
2054        );
2055    }
2056
2057    #[test]
2058    fn parse_if_with_single_statement_body() {
2059        let mut parser = Parser::new(Lexer::new(
2060            r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
2061        ));
2062
2063        let program = parser.parse_program();
2064
2065        assert_eq!(
2066            r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
2067            program.to_string()
2068        );
2069    }
2070
2071    #[test]
2072    fn parse_exit_statement() {
2073        let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
2074
2075        let program = parser.parse_program();
2076
2077        assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
2078    }
2079
2080    #[test]
2081    fn parse_exit_statement_with_status() {
2082        let mut parser = Parser::new(Lexer::new(r#"$1 < 5000 { exit NR }"#));
2083
2084        let program = parser.parse_program();
2085
2086        assert_eq!(r#"$1 < 5000 { exit NR }"#, program.to_string());
2087    }
2088
2089    #[test]
2090    fn parse_user_defined_function_call_statement() {
2091        let mut parser = Parser::new(Lexer::new(
2092            "BEGIN { myabort(1) }\nfunction myabort(n) { exit n }",
2093        ));
2094
2095        let program = parser.parse_program();
2096
2097        let definition = program
2098            .function_definition("myabort")
2099            .expect("expected function definition");
2100        assert_eq!(definition.parameters, vec!["n"]);
2101        assert_eq!(definition.statements.len(), 1);
2102    }
2103
2104    #[test]
2105    fn parse_delete_array_element_statement() {
2106        let mut parser = Parser::new(Lexer::new(r#"{ delete x[i, j] }"#));
2107
2108        let program = parser.parse_program();
2109
2110        assert_eq!(r#"{ delete x[i, j] }"#, program.to_string());
2111    }
2112
2113    #[test]
2114    fn parse_array_add_assignment_and_access() {
2115        let mut parser = Parser::new(Lexer::new(
2116            r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2117        ));
2118
2119        let program = parser.parse_program();
2120
2121        assert_eq!(
2122            r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2123            program.to_string()
2124        );
2125    }
2126
2127    #[test]
2128    fn parse_for_in_loop() {
2129        let mut parser = Parser::new(Lexer::new(
2130            r#"END { for (name in area) print name ":" area[name] }"#,
2131        ));
2132
2133        let program = parser.parse_program();
2134
2135        assert_eq!(
2136            r#"END { for (name in area) { print name ":" area[name] } }"#,
2137            program.to_string()
2138        );
2139    }
2140
2141    #[test]
2142    fn parse_print_redirection() {
2143        let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
2144
2145        let program = parser.parse_program();
2146
2147        assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
2148    }
2149
2150    #[test]
2151    fn parse_print_pipe() {
2152        let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
2153
2154        let program = parser.parse_program();
2155
2156        assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
2157    }
2158
2159    #[test]
2160    fn parse_hexadecimal_number() {
2161        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print 0xAA }"#));
2162
2163        let program = parser.parse_program();
2164
2165        assert_eq!(r#"BEGIN { print 0xAA }"#, program.to_string());
2166    }
2167}