Skip to main content

rawk_core/
parser.rs

1pub use crate::parse_error::{ParseError, ParseErrorKind};
2use crate::{
3    Lexer, Program,
4    ast::{Action, Expression, FunctionDefinition, Rule, Statement},
5    token::{Token, TokenKind},
6};
7
8#[derive(Debug)]
9pub struct Parser<'a> {
10    lexer: Lexer<'a>,
11    current_token: Token<'a>,
12    function_definitions: Vec<FunctionDefinition<'a>>,
13}
14
15impl<'a> Parser<'a> {
16    pub fn new(mut lexer: Lexer<'a>) -> Self {
17        let current_token = lexer.next_token_regex_aware();
18        Parser {
19            lexer,
20            current_token,
21            function_definitions: Vec::new(),
22        }
23    }
24
25    fn next_token(&mut self) {
26        self.current_token = self.lexer.next_token();
27    }
28
29    fn next_token_in_regex_context(&mut self) {
30        self.current_token = self.lexer.next_token_regex_aware();
31    }
32
33    fn skip_newlines(&mut self) {
34        while self.current_token.kind == TokenKind::NewLine {
35            self.next_token();
36        }
37    }
38
39    fn skip_newlines_in_regex_context(&mut self) {
40        while self.current_token.kind == TokenKind::NewLine {
41            self.next_token_in_regex_context();
42        }
43    }
44
45    fn skip_terminators(&mut self) {
46        while matches!(
47            self.current_token.kind,
48            TokenKind::NewLine | TokenKind::Semicolon
49        ) {
50            self.next_token();
51        }
52    }
53
54    fn is_eof(&self) -> bool {
55        self.current_token.kind == TokenKind::Eof
56    }
57
58    fn is_statement_terminator(&self) -> bool {
59        matches!(
60            self.current_token.kind,
61            TokenKind::Semicolon | TokenKind::NewLine | TokenKind::RightCurlyBrace | TokenKind::Eof
62        )
63    }
64
65    fn token_is_immediately_after(&self, previous: &Token<'a>) -> bool {
66        self.current_token.span.start == previous.span.start + previous.literal.len()
67    }
68
69    fn parse_number_expression(&self) -> Option<Expression<'a>> {
70        let literal = self.current_token.literal;
71        if let Some(hex_digits) = literal
72            .strip_prefix("0x")
73            .or_else(|| literal.strip_prefix("0X"))
74        {
75            let value = u64::from_str_radix(hex_digits, 16).ok()? as f64;
76            return Some(Expression::HexNumber { literal, value });
77        }
78
79        literal.parse::<f64>().ok().map(Expression::Number)
80    }
81
82    fn parse_array_index_expression(&mut self) -> Result<Expression<'a>, ParseError<'a>> {
83        let mut index = self.parse_expression()?;
84        while self.current_token.kind == TokenKind::Comma {
85            let operator = self.current_token.clone();
86            self.next_token_in_regex_context();
87            let right = self.parse_expression()?;
88            index = Expression::Infix {
89                left: Box::new(index),
90                operator,
91                right: Box::new(right),
92            };
93        }
94        Ok(index)
95    }
96
97    fn parse_error(&self, kind: ParseErrorKind) -> ParseError<'a> {
98        ParseError {
99            kind,
100            token: self.current_token.clone(),
101        }
102    }
103
104    fn expected_rule(&self) -> ParseError<'a> {
105        self.parse_error(ParseErrorKind::ExpectedRule)
106    }
107
108    fn expected_statement(&self) -> ParseError<'a> {
109        self.parse_error(ParseErrorKind::ExpectedStatement)
110    }
111
112    fn expected_identifier(&self) -> ParseError<'a> {
113        self.parse_error(ParseErrorKind::ExpectedIdentifier)
114    }
115
116    fn unsupported_statement(&self) -> ParseError<'a> {
117        self.parse_error(ParseErrorKind::UnsupportedStatement)
118    }
119
120    fn unsupported_sub_target(&self) -> ParseError<'a> {
121        self.parse_error(ParseErrorKind::UnsupportedSubTarget)
122    }
123
124    fn expected_left_paren(&self) -> ParseError<'a> {
125        self.parse_error(ParseErrorKind::ExpectedLeftParen)
126    }
127
128    fn expected_left_brace(&self) -> ParseError<'a> {
129        self.parse_error(ParseErrorKind::ExpectedLeftBrace)
130    }
131
132    fn expected_right_square_bracket(&self) -> ParseError<'a> {
133        self.parse_error(ParseErrorKind::ExpectedRightSquareBracket)
134    }
135
136    fn expected_comma(&self) -> ParseError<'a> {
137        self.parse_error(ParseErrorKind::ExpectedComma)
138    }
139
140    fn expected_colon(&self) -> ParseError<'a> {
141        self.parse_error(ParseErrorKind::ExpectedColon)
142    }
143
144    fn expected_semicolon(&self) -> ParseError<'a> {
145        self.parse_error(ParseErrorKind::ExpectedSemicolon)
146    }
147
148    fn expected_while(&self) -> ParseError<'a> {
149        self.parse_error(ParseErrorKind::ExpectedWhile)
150    }
151
152    fn expected_right_brace(&self) -> ParseError<'a> {
153        self.parse_error(ParseErrorKind::ExpectedRightBrace)
154    }
155
156    fn expected_right_paren(&self) -> ParseError<'a> {
157        self.parse_error(ParseErrorKind::ExpectedRightParen)
158    }
159
160    fn missing_printf_format_string(&self) -> ParseError<'a> {
161        self.parse_error(ParseErrorKind::MissingPrintfFormatString)
162    }
163
164    fn invalid_numeric_literal(&self) -> ParseError<'a> {
165        self.parse_error(ParseErrorKind::InvalidNumericLiteral)
166    }
167
168    fn split_print_parenthesized_list(expression: Expression<'a>) -> Option<Vec<Expression<'a>>> {
169        fn flatten<'a>(expression: Expression<'a>, expressions: &mut Vec<Expression<'a>>) -> bool {
170            match expression {
171                Expression::Infix {
172                    left,
173                    operator,
174                    right,
175                } if operator.kind == TokenKind::Comma => {
176                    flatten(*left, expressions) && flatten(*right, expressions)
177                }
178                other => {
179                    expressions.push(other);
180                    true
181                }
182            }
183        }
184
185        let mut expressions = Vec::new();
186        if flatten(expression, &mut expressions) && expressions.len() > 1 {
187            Some(expressions)
188        } else {
189            None
190        }
191    }
192
193    fn parse_next_rule(&mut self) -> Result<Option<Rule<'a>>, ParseError<'a>> {
194        match &self.current_token.kind {
195            TokenKind::Begin => {
196                self.next_token();
197                if self.current_token.kind != TokenKind::LeftCurlyBrace {
198                    return Err(self.expected_left_brace());
199                }
200                let action = self.parse_action()?;
201                Ok(Some(Rule::Begin(action)))
202            }
203            TokenKind::NewLine => {
204                self.next_token_in_regex_context();
205                self.parse_next_rule()
206            }
207            TokenKind::Eof => Ok(None),
208            TokenKind::LeftCurlyBrace => {
209                self.parse_action().map(|action| Some(Rule::Action(action)))
210            }
211            TokenKind::Function => {
212                self.parse_function_definition()?;
213                Ok(None)
214            }
215            TokenKind::End => {
216                self.next_token();
217                if self.current_token.kind != TokenKind::LeftCurlyBrace {
218                    return Err(self.expected_left_brace());
219                }
220                let action = self.parse_action()?;
221                Ok(Some(Rule::End(action)))
222            }
223            TokenKind::Regex
224            | TokenKind::String
225            | TokenKind::Number
226            | TokenKind::DollarSign
227            | TokenKind::LeftParen
228            | TokenKind::Identifier
229            | TokenKind::Cos
230            | TokenKind::Exp
231            | TokenKind::Index
232            | TokenKind::Int
233            | TokenKind::Length
234            | TokenKind::Log
235            | TokenKind::Match
236            | TokenKind::Rand
237            | TokenKind::Sin
238            | TokenKind::Sprintf
239            | TokenKind::Split
240            | TokenKind::Sqrt
241            | TokenKind::Srand
242            | TokenKind::Substr
243            | TokenKind::ExclamationMark
244            | TokenKind::Increment
245            | TokenKind::Decrement => self.parse_pattern_rule(),
246            _ => Err(self.expected_rule()),
247        }
248    }
249
250    fn parse_pattern_rule(&mut self) -> Result<Option<Rule<'a>>, ParseError<'a>> {
251        let mut pattern = self.parse_expression()?;
252        if self.current_token.kind == TokenKind::Comma {
253            let operator = self.current_token.clone();
254            self.next_token_in_regex_context();
255            let right = self.parse_expression()?;
256            pattern = Expression::Infix {
257                left: Box::new(pattern),
258                operator,
259                right: Box::new(right),
260            };
261        }
262        let pattern = Some(pattern);
263
264        if self.current_token.kind == TokenKind::LeftCurlyBrace {
265            let action = self.parse_action()?;
266            Ok(Some(Rule::PatternAction {
267                pattern,
268                action: Some(action),
269            }))
270        } else {
271            Ok(Some(Rule::PatternAction {
272                pattern,
273                action: None,
274            }))
275        }
276    }
277
278    fn parse_action(&mut self) -> Result<Action<'a>, ParseError<'a>> {
279        self.next_token(); // consume '{'
280
281        let mut statements = Vec::new();
282        while self.current_token.kind != TokenKind::RightCurlyBrace
283            && self.current_token.kind != TokenKind::Eof
284        {
285            self.skip_terminators();
286
287            if self.current_token.kind == TokenKind::RightCurlyBrace
288                || self.current_token.kind == TokenKind::Eof
289            {
290                break;
291            }
292
293            statements.push(self.parse_statement()?);
294        }
295
296        if self.current_token.kind != TokenKind::RightCurlyBrace {
297            return Err(self.expected_right_brace());
298        }
299
300        Ok(Action { statements })
301    }
302
303    fn parse_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
304        match self.current_token.kind {
305            TokenKind::Print => self.parse_print_function(),
306            TokenKind::Printf => self.parse_printf_function(),
307            TokenKind::System => self.parse_system_function(),
308            TokenKind::Split => self.parse_split_statement(),
309            TokenKind::Sub => self.parse_sub_function(),
310            TokenKind::Gsub => self.parse_gsub_function(),
311            TokenKind::Break => Ok(self.parse_break_statement()),
312            TokenKind::Continue => Ok(self.parse_continue_statement()),
313            TokenKind::Delete => self.parse_delete_statement(),
314            TokenKind::If => self.parse_if_statement(),
315            TokenKind::Do => self.parse_do_statement(),
316            TokenKind::While => self.parse_while_statement(),
317            TokenKind::For => self.parse_for_statement(),
318            TokenKind::Return => self.parse_return_statement(),
319            TokenKind::Next => Ok(self.parse_next_statement()),
320            TokenKind::Exit => self.parse_exit_statement(),
321            TokenKind::Identifier => self.parse_assignment_statement(),
322            TokenKind::DollarSign => self.parse_field_assignment_statement(),
323            TokenKind::Increment => self.parse_pre_increment_statement(),
324            TokenKind::Decrement => self.parse_pre_decrement_statement(),
325            TokenKind::Number
326            | TokenKind::String
327            | TokenKind::Regex
328            | TokenKind::LeftParen
329            | TokenKind::Close
330            | TokenKind::Cos
331            | TokenKind::Exp
332            | TokenKind::Index
333            | TokenKind::Int
334            | TokenKind::Length
335            | TokenKind::Log
336            | TokenKind::Match
337            | TokenKind::Rand
338            | TokenKind::Sin
339            | TokenKind::Sprintf
340            | TokenKind::Sqrt
341            | TokenKind::Srand
342            | TokenKind::Substr
343            | TokenKind::ToLower
344            | TokenKind::ToUpper => Ok(Statement::Expression(self.parse_expression()?)),
345            _ => Err(self.expected_statement()),
346        }
347    }
348
349    fn parse_function_definition(&mut self) -> Result<(), ParseError<'a>> {
350        self.next_token();
351        if self.current_token.kind != TokenKind::Identifier {
352            return Err(self.expected_identifier());
353        }
354        let name = self.current_token.literal;
355        self.next_token();
356        if self.current_token.kind != TokenKind::LeftParen {
357            return Err(self.expected_left_paren());
358        }
359        self.next_token();
360
361        let mut parameters = Vec::new();
362        while self.current_token.kind != TokenKind::RightParen {
363            if self.current_token.kind != TokenKind::Identifier {
364                return Err(self.expected_identifier());
365            }
366            parameters.push(self.current_token.literal);
367            self.next_token();
368            if self.current_token.kind == TokenKind::Comma {
369                self.next_token();
370            } else if self.current_token.kind != TokenKind::RightParen {
371                return Err(self.expected_right_paren());
372            }
373        }
374
375        self.next_token();
376        self.skip_newlines();
377        if self.current_token.kind != TokenKind::LeftCurlyBrace {
378            return Err(self.expected_left_brace());
379        }
380
381        let mut statements = Vec::new();
382        self.next_token(); // consume '{'
383        while self.current_token.kind != TokenKind::RightCurlyBrace
384            && self.current_token.kind != TokenKind::Eof
385        {
386            self.skip_terminators();
387
388            if self.current_token.kind == TokenKind::RightCurlyBrace
389                || self.current_token.kind == TokenKind::Eof
390            {
391                break;
392            }
393
394            statements.push(self.parse_statement()?);
395        }
396        if self.current_token.kind != TokenKind::RightCurlyBrace {
397            return Err(self.expected_right_brace());
398        }
399        self.function_definitions.push(FunctionDefinition {
400            name,
401            parameters,
402            statements,
403        });
404
405        Ok(())
406    }
407
408    fn parse_simple_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
409        match self.current_token.kind {
410            TokenKind::Identifier => self.parse_assignment_statement(),
411            TokenKind::DollarSign => self.parse_field_assignment_statement(),
412            TokenKind::Increment => self.parse_pre_increment_statement(),
413            TokenKind::Decrement => self.parse_pre_decrement_statement(),
414            TokenKind::Number
415            | TokenKind::String
416            | TokenKind::Regex
417            | TokenKind::LeftParen
418            | TokenKind::Close
419            | TokenKind::Cos
420            | TokenKind::Exp
421            | TokenKind::Index
422            | TokenKind::Int
423            | TokenKind::Length
424            | TokenKind::Log
425            | TokenKind::Match
426            | TokenKind::Rand
427            | TokenKind::Sin
428            | TokenKind::Sprintf
429            | TokenKind::Sqrt
430            | TokenKind::Srand
431            | TokenKind::Substr
432            | TokenKind::ToLower
433            | TokenKind::ToUpper => Ok(Statement::Expression(self.parse_expression()?)),
434            _ => Err(self.unsupported_statement()),
435        }
436    }
437
438    fn parse_assignment_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
439        let identifier = self.current_token.clone();
440        self.next_token();
441        self.parse_assignment_statement_with_identifier(identifier)
442    }
443
444    fn parse_assignment_statement_with_identifier(
445        &mut self,
446        identifier: Token<'a>,
447    ) -> Result<Statement<'a>, ParseError<'a>> {
448        if self.current_token.kind == TokenKind::LeftParen
449            && self.token_is_immediately_after(&identifier)
450        {
451            let args = self.parse_call_arguments()?;
452            return Ok(Statement::Expression(Expression::FunctionCall {
453                name: identifier.literal,
454                args,
455            }));
456        }
457        if self.current_token.kind == TokenKind::LeftSquareBracket {
458            self.next_token_in_regex_context();
459            let index = self.parse_array_index_expression()?;
460            if self.current_token.kind != TokenKind::RightSquareBracket {
461                return Err(self.expected_right_square_bracket());
462            }
463            self.next_token();
464            return match self.current_token.kind {
465                TokenKind::Assign => {
466                    self.next_token_in_regex_context();
467                    let value = self.parse_expression()?;
468                    Ok(Statement::ArrayAssignment {
469                        identifier: identifier.literal,
470                        index,
471                        value,
472                    })
473                }
474                TokenKind::AddAssign => {
475                    self.next_token_in_regex_context();
476                    let value = self.parse_expression()?;
477                    Ok(Statement::ArrayAddAssignment {
478                        identifier: identifier.literal,
479                        index,
480                        value,
481                    })
482                }
483                TokenKind::Increment => {
484                    self.next_token();
485                    Ok(Statement::ArrayPostIncrement {
486                        identifier: identifier.literal,
487                        index,
488                    })
489                }
490                TokenKind::Decrement => {
491                    self.next_token();
492                    Ok(Statement::ArrayPostDecrement {
493                        identifier: identifier.literal,
494                        index,
495                    })
496                }
497                _ => Err(self.unsupported_statement()),
498            };
499        }
500        match self.current_token.kind {
501            TokenKind::Assign => {
502                self.next_token_in_regex_context();
503                if self.current_token.kind == TokenKind::Split {
504                    return self.parse_split_assignment_statement(identifier.literal);
505                }
506                let value = self.parse_expression()?;
507                Ok(Statement::Assignment {
508                    identifier: identifier.literal,
509                    value,
510                })
511            }
512            TokenKind::Increment => {
513                self.next_token();
514                Ok(Statement::PostIncrement {
515                    identifier: identifier.literal,
516                })
517            }
518            TokenKind::Decrement => {
519                self.next_token();
520                Ok(Statement::PostDecrement {
521                    identifier: identifier.literal,
522                })
523            }
524            TokenKind::AddAssign => {
525                self.next_token_in_regex_context();
526                let value = self.parse_expression()?;
527                Ok(Statement::AddAssignment {
528                    identifier: identifier.literal,
529                    value,
530                })
531            }
532            TokenKind::SubtractAssign
533            | TokenKind::MultiplyAssign
534            | TokenKind::DivideAssign
535            | TokenKind::ModuloAssign
536            | TokenKind::PowerAssign => {
537                let assign_token = self.current_token.clone();
538                self.next_token_in_regex_context();
539                let right_value = self.parse_expression()?;
540                Ok(Statement::Assignment {
541                    identifier: identifier.literal,
542                    value: Expression::Infix {
543                        left: Box::new(Expression::Identifier(identifier.literal)),
544                        operator: compound_assign_operator(&assign_token),
545                        right: Box::new(right_value),
546                    },
547                })
548            }
549            _ => Err(self.unsupported_statement()),
550        }
551    }
552
553    fn parse_delete_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
554        self.next_token();
555        if self.current_token.kind != TokenKind::Identifier {
556            return Err(self.expected_identifier());
557        }
558        let identifier = self.current_token.literal;
559        self.next_token();
560        if self.current_token.kind != TokenKind::LeftSquareBracket {
561            return Ok(Statement::Delete {
562                identifier,
563                index: None,
564            });
565        }
566
567        self.next_token_in_regex_context();
568        let index = self.parse_array_index_expression()?;
569        if self.current_token.kind != TokenKind::RightSquareBracket {
570            return Err(self.expected_right_square_bracket());
571        }
572        self.next_token();
573        Ok(Statement::Delete {
574            identifier,
575            index: Some(index),
576        })
577    }
578
579    fn parse_break_statement(&mut self) -> Statement<'a> {
580        self.next_token();
581        Statement::Break
582    }
583
584    fn parse_continue_statement(&mut self) -> Statement<'a> {
585        self.next_token();
586        Statement::Continue
587    }
588
589    fn parse_pre_increment_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
590        self.next_token();
591        if self.current_token.kind != TokenKind::Identifier {
592            return Err(self.expected_identifier());
593        }
594        let identifier = self.current_token.literal;
595        self.next_token();
596        Ok(Statement::PreIncrement { identifier })
597    }
598
599    fn parse_pre_decrement_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
600        self.next_token();
601        if self.current_token.kind != TokenKind::Identifier {
602            return Err(self.expected_identifier());
603        }
604        let identifier = self.current_token.literal;
605        self.next_token();
606        Ok(Statement::PreDecrement { identifier })
607    }
608
609    fn parse_split_assignment_statement(
610        &mut self,
611        identifier: &'a str,
612    ) -> Result<Statement<'a>, ParseError<'a>> {
613        self.next_token();
614        if self.current_token.kind != TokenKind::LeftParen {
615            return Err(self.expected_left_paren());
616        }
617        self.next_token_in_regex_context();
618        let string = self.parse_expression()?;
619        if self.current_token.kind != TokenKind::Comma {
620            return Err(self.expected_comma());
621        }
622        self.next_token();
623        if self.current_token.kind != TokenKind::Identifier {
624            return Err(self.expected_identifier());
625        }
626        let array = self.current_token.literal;
627        self.next_token();
628        let separator = if self.current_token.kind == TokenKind::Comma {
629            self.next_token_in_regex_context();
630            Some(self.parse_expression()?)
631        } else {
632            None
633        };
634        if self.current_token.kind != TokenKind::RightParen {
635            return Err(self.expected_right_paren());
636        }
637        self.next_token();
638        Ok(Statement::SplitAssignment {
639            identifier,
640            string,
641            array,
642            separator,
643        })
644    }
645
646    fn parse_split_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
647        self.next_token();
648        if self.current_token.kind != TokenKind::LeftParen {
649            return Err(self.expected_left_paren());
650        }
651        self.next_token_in_regex_context();
652        let string = self.parse_expression()?;
653        if self.current_token.kind != TokenKind::Comma {
654            return Err(self.expected_comma());
655        }
656        self.next_token();
657        if self.current_token.kind != TokenKind::Identifier {
658            return Err(self.expected_identifier());
659        }
660        let array = self.current_token.literal;
661        self.next_token();
662        let separator = if self.current_token.kind == TokenKind::Comma {
663            self.next_token_in_regex_context();
664            Some(self.parse_expression()?)
665        } else {
666            None
667        };
668        if self.current_token.kind != TokenKind::RightParen {
669            return Err(self.expected_right_paren());
670        }
671        self.next_token();
672        Ok(Statement::Split {
673            string,
674            array,
675            separator,
676        })
677    }
678
679    fn parse_field_assignment_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
680        self.next_token();
681        let field = self.parse_primary_expression()?;
682        let assign_token = self.current_token.clone();
683        self.next_token_in_regex_context();
684        let right_value = self.parse_expression()?;
685
686        let value = if assign_token.kind == TokenKind::Assign {
687            right_value
688        } else {
689            let operator = compound_assign_operator(&assign_token);
690            Expression::Infix {
691                left: Box::new(Expression::Field(Box::new(field.clone()))),
692                operator,
693                right: Box::new(right_value),
694            }
695        };
696        Ok(Statement::FieldAssignment { field, value })
697    }
698
699    fn parse_if_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
700        self.next_token();
701        if self.current_token.kind != TokenKind::LeftParen {
702            return Err(self.expected_left_paren());
703        }
704        self.next_token_in_regex_context();
705        let condition = self.parse_condition_in_parens()?;
706        if self.current_token.kind != TokenKind::RightParen {
707            return Err(self.expected_right_paren());
708        }
709        self.next_token();
710        let then_statements = self.parse_control_statement_body()?;
711
712        self.skip_terminators();
713
714        if self.current_token.kind == TokenKind::Else {
715            self.next_token();
716            let else_statements = self.parse_control_statement_body()?;
717            return Ok(Statement::IfElse {
718                condition,
719                then_statements,
720                else_statements,
721            });
722        }
723
724        Ok(Statement::If {
725            condition,
726            then_statements,
727        })
728    }
729
730    fn parse_exit_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
731        self.next_token();
732        let status = if self.is_statement_terminator() {
733            None
734        } else {
735            Some(self.parse_expression()?)
736        };
737        Ok(Statement::Exit(status))
738    }
739
740    fn parse_return_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
741        self.next_token();
742        let value = if self.is_statement_terminator() {
743            None
744        } else {
745            Some(self.parse_expression()?)
746        };
747        Ok(Statement::Return(value))
748    }
749
750    fn parse_next_statement(&mut self) -> Statement<'a> {
751        self.next_token();
752        Statement::Next
753    }
754
755    fn parse_statement_block(&mut self) -> Result<Vec<Statement<'a>>, ParseError<'a>> {
756        self.next_token(); // consume '{'
757        let mut statements = Vec::new();
758        while self.current_token.kind != TokenKind::RightCurlyBrace
759            && self.current_token.kind != TokenKind::Eof
760        {
761            self.skip_terminators();
762
763            if self.current_token.kind == TokenKind::RightCurlyBrace
764                || self.current_token.kind == TokenKind::Eof
765            {
766                break;
767            }
768            statements.push(self.parse_statement()?);
769        }
770        if self.current_token.kind != TokenKind::RightCurlyBrace {
771            return Err(self.expected_right_brace());
772        }
773        self.next_token();
774        Ok(statements)
775    }
776
777    fn parse_control_statement_body(&mut self) -> Result<Vec<Statement<'a>>, ParseError<'a>> {
778        self.skip_newlines();
779
780        if self.current_token.kind == TokenKind::LeftCurlyBrace {
781            return self.parse_statement_block();
782        }
783
784        if self.current_token.kind == TokenKind::Semicolon {
785            self.next_token();
786            return Ok(vec![Statement::Empty]);
787        }
788
789        Ok(vec![self.parse_statement()?])
790    }
791
792    fn parse_while_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
793        self.next_token();
794        if self.current_token.kind != TokenKind::LeftParen {
795            return Err(self.expected_left_paren());
796        }
797        self.next_token_in_regex_context();
798        let condition = self.parse_condition_in_parens()?;
799        if self.current_token.kind != TokenKind::RightParen {
800            return Err(self.expected_right_paren());
801        }
802        self.next_token();
803        let statements = self.parse_control_statement_body()?;
804        Ok(Statement::While {
805            condition,
806            statements,
807        })
808    }
809
810    fn parse_do_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
811        self.next_token();
812        let statements = self.parse_control_statement_body()?;
813
814        self.skip_terminators();
815
816        if self.current_token.kind != TokenKind::While {
817            return Err(self.expected_while());
818        }
819        self.next_token();
820        if self.current_token.kind != TokenKind::LeftParen {
821            return Err(self.expected_left_paren());
822        }
823        self.next_token_in_regex_context();
824        let condition = self.parse_condition_in_parens()?;
825        if self.current_token.kind != TokenKind::RightParen {
826            return Err(self.expected_right_paren());
827        }
828        self.next_token();
829        Ok(Statement::DoWhile {
830            condition,
831            statements,
832        })
833    }
834
835    fn parse_for_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
836        self.next_token();
837        if self.current_token.kind != TokenKind::LeftParen {
838            return Err(self.expected_left_paren());
839        }
840        self.next_token();
841        self.skip_newlines();
842
843        let init = if self.current_token.kind == TokenKind::Semicolon {
844            Statement::Empty
845        } else if self.current_token.kind == TokenKind::Identifier {
846            let variable = self.current_token.clone();
847            self.next_token();
848            if self.current_token.kind == TokenKind::In {
849                self.next_token();
850                if self.current_token.kind != TokenKind::Identifier {
851                    return Err(self.expected_identifier());
852                }
853                let array = self.current_token.literal;
854                self.next_token();
855                if self.current_token.kind != TokenKind::RightParen {
856                    return Err(self.expected_right_paren());
857                }
858                self.next_token();
859                let statements = self.parse_control_statement_body()?;
860                return Ok(Statement::ForIn {
861                    variable: variable.literal,
862                    array,
863                    statements,
864                });
865            }
866            self.parse_assignment_statement_with_identifier(variable)?
867        } else {
868            self.parse_simple_statement()?
869        };
870        self.skip_newlines();
871        if self.current_token.kind != TokenKind::Semicolon {
872            return Err(self.expected_semicolon());
873        }
874        self.next_token_in_regex_context();
875        self.skip_newlines_in_regex_context();
876
877        let condition = if self.current_token.kind == TokenKind::Semicolon {
878            Expression::Number(1.0)
879        } else {
880            self.parse_expression()?
881        };
882        self.skip_newlines();
883        if self.current_token.kind != TokenKind::Semicolon {
884            return Err(self.expected_semicolon());
885        }
886        self.next_token_in_regex_context();
887        self.skip_newlines_in_regex_context();
888
889        let update = if self.current_token.kind == TokenKind::RightParen {
890            Statement::Empty
891        } else {
892            self.parse_simple_statement()?
893        };
894        self.skip_newlines();
895        if self.current_token.kind != TokenKind::RightParen {
896            return Err(self.expected_right_paren());
897        }
898        self.next_token();
899        let statements = self.parse_control_statement_body()?;
900
901        Ok(Statement::For {
902            init: Box::new(init),
903            condition,
904            update: Box::new(update),
905            statements,
906        })
907    }
908
909    fn parse_print_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
910        let mut expressions = Vec::new();
911        let mut expect_more = false;
912        self.next_token();
913
914        loop {
915            if self.current_token.kind == TokenKind::RightCurlyBrace
916                || self.current_token.kind == TokenKind::RightParen
917                || self.current_token.kind == TokenKind::Eof
918                || self.current_token.kind == TokenKind::GreaterThan
919                || self.current_token.kind == TokenKind::Append
920                || self.current_token.kind == TokenKind::Pipe
921            {
922                break;
923            }
924
925            if self.current_token.kind == TokenKind::NewLine
926                || self.current_token.kind == TokenKind::Semicolon
927            {
928                if expect_more {
929                    self.next_token();
930                    continue;
931                }
932                break;
933            }
934
935            if self.current_token.kind == TokenKind::Comma {
936                self.next_token();
937                expect_more = true;
938                continue;
939            }
940
941            let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
942            let expression = self.parse_expression()?;
943            if started_with_left_paren {
944                if let Some(grouped_expressions) =
945                    Self::split_print_parenthesized_list(expression.clone())
946                {
947                    expressions.extend(grouped_expressions);
948                } else {
949                    expressions.push(expression);
950                }
951            } else {
952                expressions.push(expression);
953            }
954            expect_more = false;
955        }
956        if self.current_token.kind == TokenKind::GreaterThan
957            || self.current_token.kind == TokenKind::Append
958        {
959            let append = self.current_token.kind == TokenKind::Append;
960            self.next_token();
961            let target = self.parse_expression()?;
962            return Ok(Statement::PrintRedirect {
963                expressions,
964                target,
965                append,
966            });
967        }
968        if self.current_token.kind == TokenKind::Pipe {
969            self.next_token();
970            let target = self.parse_expression()?;
971            return Ok(Statement::PrintPipe {
972                expressions,
973                target,
974            });
975        }
976
977        Ok(Statement::Print(expressions))
978    }
979
980    fn parse_printf_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
981        self.next_token();
982        let expressions = if self.current_token.kind == TokenKind::LeftParen {
983            self.next_token_in_regex_context();
984            let mut expressions = Vec::new();
985            while self.current_token.kind != TokenKind::RightParen
986                && self.current_token.kind != TokenKind::Eof
987            {
988                if self.current_token.kind == TokenKind::Comma {
989                    self.next_token();
990                    continue;
991                }
992                expressions.push(self.parse_expression()?);
993            }
994            if self.current_token.kind == TokenKind::RightParen {
995                self.next_token();
996            }
997            expressions
998        } else {
999            self.parse_expression_list_until_action_end_from_current()?
1000        };
1001
1002        if expressions.is_empty() {
1003            return Err(self.missing_printf_format_string());
1004        }
1005
1006        Ok(Statement::Printf(expressions))
1007    }
1008
1009    fn parse_gsub_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
1010        self.next_token();
1011        if self.current_token.kind != TokenKind::LeftParen {
1012            return Err(self.expected_left_paren());
1013        }
1014
1015        self.next_token_in_regex_context();
1016        let pattern = self.parse_expression()?;
1017
1018        if self.current_token.kind != TokenKind::Comma {
1019            return Err(self.expected_comma());
1020        }
1021        self.next_token();
1022        let replacement = self.parse_expression()?;
1023
1024        let target = if self.current_token.kind == TokenKind::Comma {
1025            self.next_token();
1026            Some(self.parse_expression()?)
1027        } else {
1028            None
1029        };
1030
1031        if self.current_token.kind != TokenKind::RightParen {
1032            return Err(self.expected_right_paren());
1033        }
1034        self.next_token();
1035
1036        Ok(Statement::Gsub {
1037            pattern,
1038            replacement,
1039            target,
1040        })
1041    }
1042
1043    fn parse_sub_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
1044        self.next_token();
1045        if self.current_token.kind != TokenKind::LeftParen {
1046            return Err(self.expected_left_paren());
1047        }
1048
1049        self.next_token_in_regex_context();
1050        let pattern = self.parse_expression()?;
1051
1052        if self.current_token.kind != TokenKind::Comma {
1053            return Err(self.expected_comma());
1054        }
1055        self.next_token();
1056        let replacement = self.parse_expression()?;
1057
1058        if self.current_token.kind == TokenKind::Comma {
1059            return Err(self.unsupported_sub_target());
1060        }
1061
1062        if self.current_token.kind != TokenKind::RightParen {
1063            return Err(self.expected_right_paren());
1064        }
1065        self.next_token();
1066
1067        Ok(Statement::Sub {
1068            pattern,
1069            replacement,
1070        })
1071    }
1072
1073    fn parse_system_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
1074        self.next_token();
1075        if self.current_token.kind != TokenKind::LeftParen {
1076            return Err(self.expected_left_paren());
1077        }
1078        self.next_token();
1079        let command = self.parse_expression()?;
1080        if self.current_token.kind != TokenKind::RightParen {
1081            return Err(self.expected_right_paren());
1082        }
1083        self.next_token();
1084        Ok(Statement::System(command))
1085    }
1086
1087    fn parse_expression_list_until_action_end_from_current(
1088        &mut self,
1089    ) -> Result<Vec<Expression<'a>>, ParseError<'a>> {
1090        let mut expressions = Vec::new();
1091        let mut expect_more = false;
1092
1093        loop {
1094            if self.current_token.kind == TokenKind::RightCurlyBrace
1095                || self.current_token.kind == TokenKind::RightParen
1096                || self.current_token.kind == TokenKind::Eof
1097            {
1098                break;
1099            }
1100
1101            if self.current_token.kind == TokenKind::NewLine
1102                || self.current_token.kind == TokenKind::Semicolon
1103            {
1104                if expect_more {
1105                    self.next_token();
1106                    continue;
1107                }
1108                break;
1109            }
1110
1111            if self.current_token.kind == TokenKind::Comma {
1112                self.next_token();
1113                expect_more = true;
1114                continue;
1115            }
1116
1117            let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
1118            let expression = self.parse_expression()?;
1119            expressions.push(expression);
1120            if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
1121                while self.current_token.kind == TokenKind::Comma {
1122                    self.next_token();
1123                    expressions.push(self.parse_expression()?);
1124                }
1125                if self.current_token.kind != TokenKind::RightParen {
1126                    return Err(self.expected_right_paren());
1127                }
1128                self.next_token();
1129            }
1130            expect_more = false;
1131        }
1132
1133        Ok(expressions)
1134    }
1135
1136    fn parse_expression(&mut self) -> Result<Expression<'a>, ParseError<'a>> {
1137        self.parse_expression_with_min_precedence(0)
1138    }
1139
1140    fn parse_expression_with_min_precedence(
1141        &mut self,
1142        min_precedence: u8,
1143    ) -> Result<Expression<'a>, ParseError<'a>> {
1144        let left = self.parse_primary_expression()?;
1145        self.parse_expression_suffix(left, min_precedence)
1146    }
1147
1148    fn parse_expression_suffix(
1149        &mut self,
1150        mut left: Expression<'a>,
1151        min_precedence: u8,
1152    ) -> Result<Expression<'a>, ParseError<'a>> {
1153        const CONCAT_LEFT_PRECEDENCE: u8 = 6;
1154        const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
1155
1156        loop {
1157            if self.current_token.kind == TokenKind::QuestionMark {
1158                if min_precedence > 0 {
1159                    break;
1160                }
1161                self.next_token_in_regex_context();
1162                let then_expr = self.parse_expression_with_min_precedence(0)?;
1163                if self.current_token.kind != TokenKind::Colon {
1164                    return Err(self.expected_colon());
1165                }
1166                self.next_token_in_regex_context();
1167                let else_expr = self.parse_expression_with_min_precedence(0)?;
1168                left = Expression::Ternary {
1169                    condition: Box::new(left),
1170                    then_expr: Box::new(then_expr),
1171                    else_expr: Box::new(else_expr),
1172                };
1173                continue;
1174            }
1175
1176            if infix_operator_precedence(&self.current_token.kind).is_none()
1177                && is_expression_start(&self.current_token.kind)
1178            {
1179                if CONCAT_LEFT_PRECEDENCE < min_precedence {
1180                    break;
1181                }
1182
1183                let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE)?;
1184                left = Expression::Concatenation {
1185                    left: Box::new(left),
1186                    right: Box::new(right),
1187                };
1188                continue;
1189            }
1190
1191            let (left_precedence, right_precedence) =
1192                match infix_operator_precedence(&self.current_token.kind) {
1193                    Some(value) => value,
1194                    None => break,
1195                };
1196
1197            if left_precedence < min_precedence {
1198                break;
1199            }
1200
1201            let operator = self.current_token.clone();
1202            if matches!(
1203                operator.kind,
1204                TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
1205            ) {
1206                self.next_token_in_regex_context();
1207            } else {
1208                self.next_token();
1209            }
1210            let right = self.parse_expression_with_min_precedence(right_precedence)?;
1211
1212            left = Expression::Infix {
1213                left: Box::new(left),
1214                operator,
1215                right: Box::new(right),
1216            };
1217        }
1218
1219        Ok(left)
1220    }
1221
1222    fn parse_condition_in_parens(&mut self) -> Result<Expression<'a>, ParseError<'a>> {
1223        let mut condition = self.parse_expression()?;
1224        if self.current_token.kind == TokenKind::Comma {
1225            while self.current_token.kind == TokenKind::Comma {
1226                let operator = self.current_token.clone();
1227                self.next_token_in_regex_context();
1228                let right = self.parse_expression()?;
1229                condition = Expression::Infix {
1230                    left: Box::new(condition),
1231                    operator,
1232                    right: Box::new(right),
1233                };
1234            }
1235            if self.current_token.kind != TokenKind::RightParen {
1236                return Err(self.expected_right_paren());
1237            }
1238            self.next_token();
1239            condition = self.parse_expression_suffix(condition, 0)?;
1240        }
1241        Ok(condition)
1242    }
1243
1244    fn parse_primary_expression(&mut self) -> Result<Expression<'a>, ParseError<'a>> {
1245        if self.current_token.kind == TokenKind::Minus {
1246            let operator = self.current_token.clone();
1247            self.next_token();
1248            let right = self.parse_primary_expression()?;
1249            return Ok(Expression::Infix {
1250                left: Box::new(Expression::Number(0.0)),
1251                operator,
1252                right: Box::new(right),
1253            });
1254        }
1255        if self.current_token.kind == TokenKind::Plus {
1256            self.next_token();
1257            return self.parse_primary_expression();
1258        }
1259        if self.current_token.kind == TokenKind::ExclamationMark {
1260            self.next_token_in_regex_context();
1261            let expression = self.parse_primary_expression()?;
1262            return Ok(Expression::Not(Box::new(expression)));
1263        }
1264        if self.current_token.kind == TokenKind::Increment {
1265            self.next_token();
1266            let expression = self.parse_primary_expression()?;
1267            return Ok(Expression::PreIncrement(Box::new(expression)));
1268        }
1269        if self.current_token.kind == TokenKind::Decrement {
1270            self.next_token();
1271            let expression = self.parse_primary_expression()?;
1272            return Ok(Expression::PreDecrement(Box::new(expression)));
1273        }
1274
1275        let mut expression = self.parse_primary_atom()?;
1276        match self.current_token.kind {
1277            TokenKind::Increment => {
1278                self.next_token();
1279                expression = Expression::PostIncrement(Box::new(expression));
1280            }
1281            TokenKind::Decrement => {
1282                self.next_token();
1283                expression = Expression::PostDecrement(Box::new(expression));
1284            }
1285            _ => {}
1286        }
1287        Ok(expression)
1288    }
1289
1290    fn parse_primary_atom(&mut self) -> Result<Expression<'a>, ParseError<'a>> {
1291        match self.current_token.kind {
1292            TokenKind::String => {
1293                let expression = Expression::String(self.current_token.literal);
1294                self.next_token();
1295                Ok(expression)
1296            }
1297            TokenKind::Regex => {
1298                let expression = Expression::Regex(self.current_token.literal);
1299                self.next_token();
1300                Ok(expression)
1301            }
1302            TokenKind::Number => {
1303                let expression = self
1304                    .parse_number_expression()
1305                    .ok_or_else(|| self.invalid_numeric_literal())?;
1306                self.next_token();
1307                Ok(expression)
1308            }
1309            TokenKind::DollarSign => {
1310                self.next_token();
1311                let expression = self.parse_primary_atom()?;
1312                Ok(Expression::Field(Box::new(expression)))
1313            }
1314            TokenKind::LeftParen => {
1315                self.next_token_in_regex_context();
1316                let mut expression = self.parse_expression()?;
1317                while self.current_token.kind == TokenKind::Comma {
1318                    let operator = self.current_token.clone();
1319                    self.next_token_in_regex_context();
1320                    let right = self.parse_expression()?;
1321                    expression = Expression::Infix {
1322                        left: Box::new(expression),
1323                        operator,
1324                        right: Box::new(right),
1325                    };
1326                }
1327                if self.current_token.kind != TokenKind::RightParen {
1328                    return Err(self.expected_right_paren());
1329                }
1330                self.next_token();
1331                Ok(expression)
1332            }
1333            TokenKind::Identifier => {
1334                let identifier = self.current_token.clone();
1335                self.next_token();
1336                if self.current_token.kind == TokenKind::LeftParen
1337                    && self.token_is_immediately_after(&identifier)
1338                {
1339                    let args = self.parse_call_arguments()?;
1340                    return Ok(Expression::FunctionCall {
1341                        name: identifier.literal,
1342                        args,
1343                    });
1344                }
1345                if self.current_token.kind == TokenKind::LeftSquareBracket {
1346                    self.next_token_in_regex_context();
1347                    let index = self.parse_array_index_expression()?;
1348                    if self.current_token.kind != TokenKind::RightSquareBracket {
1349                        return Err(self.expected_right_square_bracket());
1350                    }
1351                    self.next_token();
1352                    Ok(Expression::ArrayAccess {
1353                        identifier: identifier.literal,
1354                        index: Box::new(index),
1355                    })
1356                } else {
1357                    Ok(Expression::Identifier(identifier.literal))
1358                }
1359            }
1360            TokenKind::Length => {
1361                self.next_token();
1362                if self.current_token.kind == TokenKind::LeftParen {
1363                    self.next_token();
1364                    if self.current_token.kind == TokenKind::RightParen {
1365                        self.next_token();
1366                        Ok(Expression::Length(None))
1367                    } else {
1368                        let expression = self.parse_expression()?;
1369                        if self.current_token.kind != TokenKind::RightParen {
1370                            return Err(self.expected_right_paren());
1371                        }
1372                        self.next_token();
1373                        Ok(Expression::Length(Some(Box::new(expression))))
1374                    }
1375                } else {
1376                    Ok(Expression::Length(None))
1377                }
1378            }
1379            TokenKind::Substr => {
1380                self.next_token();
1381                if self.current_token.kind != TokenKind::LeftParen {
1382                    return Err(self.expected_left_paren());
1383                }
1384                self.next_token();
1385                let string = self.parse_expression()?;
1386                if self.current_token.kind != TokenKind::Comma {
1387                    return Err(self.expected_comma());
1388                }
1389                self.next_token();
1390                let start = self.parse_expression()?;
1391                let mut length = None;
1392                if self.current_token.kind == TokenKind::Comma {
1393                    self.next_token();
1394                    length = Some(Box::new(self.parse_expression()?));
1395                }
1396                if self.current_token.kind != TokenKind::RightParen {
1397                    return Err(self.expected_right_paren());
1398                }
1399                self.next_token();
1400                Ok(Expression::Substr {
1401                    string: Box::new(string),
1402                    start: Box::new(start),
1403                    length,
1404                })
1405            }
1406            TokenKind::Rand => {
1407                self.next_token();
1408                if self.current_token.kind == TokenKind::LeftParen {
1409                    self.next_token();
1410                    if self.current_token.kind != TokenKind::RightParen {
1411                        return Err(self.expected_right_paren());
1412                    }
1413                    self.next_token();
1414                }
1415                Ok(Expression::Rand)
1416            }
1417            TokenKind::Close
1418            | TokenKind::Cos
1419            | TokenKind::Exp
1420            | TokenKind::Index
1421            | TokenKind::Int
1422            | TokenKind::Log
1423            | TokenKind::Match
1424            | TokenKind::Sin
1425            | TokenKind::Sprintf
1426            | TokenKind::Split
1427            | TokenKind::Sqrt
1428            | TokenKind::Srand => {
1429                let name = self.current_token.literal;
1430                self.next_token();
1431                if self.current_token.kind == TokenKind::LeftParen {
1432                    let args = self.parse_call_arguments()?;
1433                    return Ok(Expression::FunctionCall { name, args });
1434                }
1435                Err(self.expected_left_paren())
1436            }
1437            _ => Err(self.expected_statement()),
1438        }
1439    }
1440
1441    pub fn try_parse_program(&mut self) -> Result<Program<'_>, ParseError<'a>> {
1442        let mut program = Program::new();
1443
1444        while !self.is_eof() {
1445            match self.parse_next_rule()? {
1446                Some(Rule::Begin(action)) => program.add_begin_block(action),
1447                Some(Rule::End(action)) => program.add_end_block(action),
1448                Some(rule) => program.add_rule(rule),
1449                None => {}
1450            }
1451            self.next_token_in_regex_context();
1452        }
1453
1454        for definition in self.function_definitions.drain(..) {
1455            program.add_function_definition(definition);
1456        }
1457
1458        Ok(program)
1459    }
1460
1461    pub fn parse_program(&mut self) -> Program<'_> {
1462        self.try_parse_program()
1463            .unwrap_or_else(|err| panic!("{err}"))
1464    }
1465
1466    fn parse_call_arguments(&mut self) -> Result<Vec<Expression<'a>>, ParseError<'a>> {
1467        if self.current_token.kind != TokenKind::LeftParen {
1468            return Ok(vec![]);
1469        }
1470        self.next_token_in_regex_context();
1471        let mut args = Vec::new();
1472        while self.current_token.kind != TokenKind::RightParen
1473            && self.current_token.kind != TokenKind::Eof
1474        {
1475            if self.current_token.kind == TokenKind::Comma {
1476                self.next_token();
1477                continue;
1478            }
1479            args.push(self.parse_expression()?);
1480        }
1481        if self.current_token.kind == TokenKind::RightParen {
1482            self.next_token();
1483        }
1484        Ok(args)
1485    }
1486}
1487
1488fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
1489    match kind {
1490        TokenKind::Assign
1491        | TokenKind::AddAssign
1492        | TokenKind::SubtractAssign
1493        | TokenKind::MultiplyAssign
1494        | TokenKind::DivideAssign
1495        | TokenKind::ModuloAssign
1496        | TokenKind::PowerAssign => Some((0, 0)),
1497        TokenKind::Or => Some((1, 2)),
1498        TokenKind::And => Some((3, 4)),
1499        TokenKind::Equal
1500        | TokenKind::NotEqual
1501        | TokenKind::GreaterThan
1502        | TokenKind::GreaterThanOrEqual
1503        | TokenKind::In
1504        | TokenKind::LessThan
1505        | TokenKind::LessThanOrEqual
1506        | TokenKind::Tilde
1507        | TokenKind::NoMatch => Some((5, 6)),
1508        TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
1509        TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
1510        TokenKind::Caret => Some((13, 12)),
1511        _ => None,
1512    }
1513}
1514
1515fn is_expression_start(kind: &TokenKind) -> bool {
1516    matches!(
1517        kind,
1518        TokenKind::String
1519            | TokenKind::Regex
1520            | TokenKind::Number
1521            | TokenKind::DollarSign
1522            | TokenKind::LeftParen
1523            | TokenKind::Identifier
1524            | TokenKind::Cos
1525            | TokenKind::Exp
1526            | TokenKind::Index
1527            | TokenKind::Int
1528            | TokenKind::Length
1529            | TokenKind::Log
1530            | TokenKind::Match
1531            | TokenKind::Rand
1532            | TokenKind::Sin
1533            | TokenKind::Sprintf
1534            | TokenKind::Split
1535            | TokenKind::Sqrt
1536            | TokenKind::Srand
1537            | TokenKind::Substr
1538            | TokenKind::Increment
1539            | TokenKind::Decrement
1540    )
1541}
1542
1543fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
1544    let (kind, literal) = match token.kind {
1545        TokenKind::AddAssign => (TokenKind::Plus, "+"),
1546        TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
1547        TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
1548        TokenKind::DivideAssign => (TokenKind::Division, "/"),
1549        TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
1550        TokenKind::PowerAssign => (TokenKind::Caret, "^"),
1551        _ => unreachable!(
1552            "compound_assign_operator called with non-compound token: {:?}",
1553            token.kind
1554        ),
1555    };
1556
1557    Token::new(kind, literal, token.span.start)
1558}
1559
1560#[cfg(test)]
1561mod tests {
1562    use super::*;
1563
1564    #[test]
1565    fn create_parser() {
1566        let mut parser = Parser::new(Lexer::new("42 == 42"));
1567
1568        assert_eq!(parser.current_token.literal, "42");
1569        parser.next_token();
1570        assert_eq!(parser.current_token.literal, "==");
1571    }
1572
1573    #[test]
1574    fn parse_empty_program() {
1575        let mut parser = Parser::new(Lexer::new(""));
1576
1577        let program = parser.parse_program();
1578
1579        assert_eq!(program.len(), 0);
1580    }
1581
1582    #[test]
1583    fn parse_statement_with_unhandled_token_returns_parse_error() {
1584        let mut parser = Parser::new(Lexer::new("BEGIN { else }"));
1585
1586        let err = parser
1587            .try_parse_program()
1588            .expect_err("expected parse error for stray else");
1589
1590        assert_eq!(err.kind, ParseErrorKind::ExpectedStatement);
1591        assert_eq!(err.token.kind, TokenKind::Else);
1592    }
1593
1594    #[test]
1595    fn parse_begin_without_left_brace_returns_parse_error() {
1596        let mut parser = Parser::new(Lexer::new("BEGIN print }"));
1597
1598        let err = parser
1599            .try_parse_program()
1600            .expect_err("expected parse error for missing left brace");
1601
1602        assert_eq!(err.kind, ParseErrorKind::ExpectedLeftBrace);
1603        assert_eq!(err.token.kind, TokenKind::Print);
1604    }
1605
1606    #[test]
1607    fn parse_delete_without_identifier_returns_parse_error() {
1608        let mut parser = Parser::new(Lexer::new("{ delete 1 }"));
1609
1610        let err = parser
1611            .try_parse_program()
1612            .expect_err("expected parse error for delete without identifier");
1613
1614        assert_eq!(err.kind, ParseErrorKind::ExpectedIdentifier);
1615        assert_eq!(err.token.kind, TokenKind::Number);
1616    }
1617
1618    #[test]
1619    fn parse_if_without_right_paren_returns_parse_error() {
1620        let mut parser = Parser::new(Lexer::new("{ if (x print }"));
1621
1622        let err = parser
1623            .try_parse_program()
1624            .expect_err("expected parse error for missing right paren");
1625
1626        assert_eq!(err.kind, ParseErrorKind::ExpectedRightParen);
1627        assert_eq!(err.token.kind, TokenKind::Print);
1628    }
1629
1630    #[test]
1631    fn parse_if_without_left_paren_returns_parse_error() {
1632        let mut parser = Parser::new(Lexer::new("{ if x) print }"));
1633
1634        let err = parser
1635            .try_parse_program()
1636            .expect_err("expected parse error for missing left paren after if");
1637
1638        assert_eq!(err.kind, ParseErrorKind::ExpectedLeftParen);
1639        assert_eq!(err.token.kind, TokenKind::Identifier);
1640    }
1641
1642    #[test]
1643    fn parse_grouped_expression_without_right_paren_returns_parse_error() {
1644        let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2 }"));
1645
1646        let err = parser
1647            .try_parse_program()
1648            .expect_err("expected parse error for missing right paren in grouped expression");
1649
1650        assert_eq!(err.kind, ParseErrorKind::ExpectedRightParen);
1651        assert_eq!(err.token.kind, TokenKind::RightCurlyBrace);
1652    }
1653
1654    #[test]
1655    fn parse_array_assignment_without_right_square_bracket_returns_parse_error() {
1656        let mut parser = Parser::new(Lexer::new("BEGIN { a[1 = 2 }"));
1657
1658        let err = parser
1659            .try_parse_program()
1660            .expect_err("expected parse error for missing right square bracket");
1661
1662        assert_eq!(err.kind, ParseErrorKind::ExpectedRightSquareBracket);
1663        assert_eq!(err.token.kind, TokenKind::RightCurlyBrace);
1664    }
1665
1666    #[test]
1667    fn parse_split_without_comma_returns_parse_error() {
1668        let mut parser = Parser::new(Lexer::new("BEGIN { split($0 arr) }"));
1669
1670        let err = parser
1671            .try_parse_program()
1672            .expect_err("expected parse error for missing comma in split");
1673
1674        assert_eq!(err.kind, ParseErrorKind::ExpectedComma);
1675        assert_eq!(err.token.kind, TokenKind::RightParen);
1676    }
1677
1678    #[test]
1679    fn parse_identifier_expression_statement_returns_parse_error() {
1680        let mut parser = Parser::new(Lexer::new("BEGIN { x + 1 }"));
1681
1682        let err = parser
1683            .try_parse_program()
1684            .expect_err("expected parse error for unsupported identifier expression statement");
1685
1686        assert_eq!(err.kind, ParseErrorKind::UnsupportedStatement);
1687        assert_eq!(err.token.kind, TokenKind::Plus);
1688    }
1689
1690    #[test]
1691    fn parse_array_multiply_assignment_returns_parse_error() {
1692        let mut parser = Parser::new(Lexer::new("BEGIN { a[1] *= 2 }"));
1693
1694        let err = parser
1695            .try_parse_program()
1696            .expect_err("expected parse error for unsupported array compound assignment");
1697
1698        assert_eq!(err.kind, ParseErrorKind::UnsupportedStatement);
1699        assert_eq!(err.token.kind, TokenKind::MultiplyAssign);
1700    }
1701
1702    #[test]
1703    fn parse_sub_with_target_returns_parse_error() {
1704        let mut parser = Parser::new(Lexer::new(r#"BEGIN { sub(/a/, "b", t) }"#));
1705
1706        let err = parser
1707            .try_parse_program()
1708            .expect_err("expected parse error for unsupported sub target argument");
1709
1710        assert_eq!(err.kind, ParseErrorKind::UnsupportedSubTarget);
1711        assert_eq!(err.token.kind, TokenKind::Comma);
1712    }
1713
1714    #[test]
1715    fn parse_ternary_without_colon_returns_parse_error() {
1716        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 ? 2 }"));
1717
1718        let err = parser
1719            .try_parse_program()
1720            .expect_err("expected parse error for missing colon in ternary");
1721
1722        assert_eq!(err.kind, ParseErrorKind::ExpectedColon);
1723        assert_eq!(err.token.kind, TokenKind::RightCurlyBrace);
1724    }
1725
1726    #[test]
1727    fn parse_print_with_extra_right_paren_returns_parse_error() {
1728        let mut parser = Parser::new(Lexer::new("BEGIN { print 1) }"));
1729
1730        let err = parser
1731            .try_parse_program()
1732            .expect_err("expected parse error for stray right paren after print expression");
1733
1734        assert_eq!(err.kind, ParseErrorKind::ExpectedStatement);
1735        assert_eq!(err.token.kind, TokenKind::RightParen);
1736    }
1737
1738    #[test]
1739    fn parse_for_without_first_semicolon_returns_parse_error() {
1740        let mut parser = Parser::new(Lexer::new("BEGIN { for (i = 0 i < 3; i++) print i }"));
1741
1742        let err = parser
1743            .try_parse_program()
1744            .expect_err("expected parse error for missing first semicolon in for");
1745
1746        assert_eq!(err.kind, ParseErrorKind::ExpectedSemicolon);
1747        assert_eq!(err.token.kind, TokenKind::RightParen);
1748    }
1749
1750    #[test]
1751    fn parse_do_without_while_returns_parse_error() {
1752        let mut parser = Parser::new(Lexer::new("BEGIN { do print 1 }"));
1753
1754        let err = parser
1755            .try_parse_program()
1756            .expect_err("expected parse error for missing while after do body");
1757
1758        assert_eq!(err.kind, ParseErrorKind::ExpectedWhile);
1759        assert_eq!(err.token.kind, TokenKind::RightCurlyBrace);
1760    }
1761
1762    #[test]
1763    #[should_panic(expected = "compound_assign_operator called with non-compound token")]
1764    fn compound_assign_operator_panics_for_non_compound_token() {
1765        let token = Token::new(TokenKind::Assign, "=", 0);
1766        let _ = compound_assign_operator(&token);
1767    }
1768
1769    #[test]
1770    fn parse_printf_expression_list_with_extra_right_paren_returns_parse_error() {
1771        let mut parser = Parser::new(Lexer::new(r#"BEGIN { printf "%s", 1) }"#));
1772
1773        let err = parser
1774            .try_parse_program()
1775            .expect_err("expected parse error for stray right paren after printf arguments");
1776
1777        assert_eq!(err.kind, ParseErrorKind::ExpectedStatement);
1778        assert_eq!(err.token.kind, TokenKind::RightParen);
1779    }
1780
1781    #[test]
1782    fn parse_action_without_right_brace_returns_parse_error() {
1783        let mut parser = Parser::new(Lexer::new("BEGIN { print 1"));
1784
1785        let err = parser
1786            .try_parse_program()
1787            .expect_err("expected parse error for missing right brace in action");
1788
1789        assert_eq!(err.kind, ParseErrorKind::ExpectedRightBrace);
1790        assert_eq!(err.token.kind, TokenKind::Eof);
1791    }
1792
1793    #[test]
1794    fn parse_nested_block_without_right_brace_returns_parse_error() {
1795        let mut parser = Parser::new(Lexer::new("{ if (1) { print 1 }"));
1796
1797        let err = parser
1798            .try_parse_program()
1799            .expect_err("expected parse error for missing right brace in nested block");
1800
1801        assert_eq!(err.kind, ParseErrorKind::ExpectedRightBrace);
1802        assert_eq!(err.token.kind, TokenKind::Eof);
1803    }
1804
1805    #[test]
1806    fn parse_action_without_pattern() {
1807        let mut parser = Parser::new(Lexer::new("{ print }"));
1808
1809        let program = parser.parse_program();
1810
1811        assert_eq!(program.len(), 1);
1812        assert_eq!("{ print }", program.to_string());
1813    }
1814
1815    #[test]
1816    fn parse_action_with_leading_newlines() {
1817        let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
1818
1819        let program = parser.parse_program();
1820
1821        assert_eq!(program.len(), 1);
1822        assert_eq!("{ print }", program.to_string());
1823    }
1824
1825    #[test]
1826    fn parse_begin_block() {
1827        let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
1828
1829        let program = parser.parse_program();
1830
1831        assert_eq!(program.len(), 1);
1832        assert_eq!("BEGIN { print }", program.to_string());
1833    }
1834
1835    #[test]
1836    fn parse_end_block() {
1837        let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1838
1839        let program = parser.parse_program();
1840
1841        assert_eq!(program.len(), 1);
1842        assert_eq!("END { print 42 }", program.to_string());
1843    }
1844
1845    #[test]
1846    fn parse_regex_pattern_action() {
1847        let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1848
1849        let program = parser.parse_program();
1850
1851        assert_eq!(program.len(), 1);
1852        assert_eq!("/foo/ { print }", program.to_string());
1853    }
1854
1855    #[test]
1856    fn parse_print_infix_expression() {
1857        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1858
1859        let program = parser.parse_program();
1860        let mut begin_blocks = program.begin_blocks_iter();
1861        let Action { statements } = begin_blocks.next().expect("expected begin block");
1862
1863        let exprs = match &statements[0] {
1864            Statement::Print(expressions) => expressions,
1865            _ => panic!("expected print statement"),
1866        };
1867
1868        match &exprs[0] {
1869            Expression::Infix {
1870                left,
1871                operator,
1872                right,
1873            } => {
1874                assert!(matches!(**left, Expression::Number(1.0)));
1875                assert_eq!(operator.kind, TokenKind::Plus);
1876                assert!(matches!(**right, Expression::Number(2.0)));
1877            }
1878            _ => panic!("expected infix expression"),
1879        }
1880    }
1881
1882    #[test]
1883    fn parse_print_parenthesized_expression() {
1884        let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1885
1886        let program = parser.parse_program();
1887        let mut begin_blocks = program.begin_blocks_iter();
1888        let Action { statements } = begin_blocks.next().expect("expected begin block");
1889
1890        let exprs = match &statements[0] {
1891            Statement::Print(expressions) => expressions,
1892            _ => panic!("expected print statement"),
1893        };
1894
1895        match &exprs[0] {
1896            Expression::Infix {
1897                left,
1898                operator,
1899                right,
1900            } => {
1901                assert_eq!(operator.kind, TokenKind::Asterisk);
1902                assert!(matches!(**right, Expression::Number(3.0)));
1903                assert!(matches!(**left, Expression::Infix { .. }));
1904            }
1905            _ => panic!("expected infix expression"),
1906        }
1907    }
1908
1909    #[test]
1910    fn parse_print_multiplication_has_higher_precedence_than_addition() {
1911        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1912
1913        let program = parser.parse_program();
1914        let mut begin_blocks = program.begin_blocks_iter();
1915        let Action { statements } = begin_blocks.next().expect("expected begin block");
1916
1917        let exprs = match &statements[0] {
1918            Statement::Print(expressions) => expressions,
1919            _ => panic!("expected print statement"),
1920        };
1921
1922        match &exprs[0] {
1923            Expression::Infix {
1924                left,
1925                operator,
1926                right,
1927            } => {
1928                assert_eq!(operator.kind, TokenKind::Plus);
1929                assert!(matches!(**left, Expression::Number(1.0)));
1930                match &**right {
1931                    Expression::Infix {
1932                        operator: right_op, ..
1933                    } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1934                    _ => panic!("expected nested infix expression"),
1935                }
1936            }
1937            _ => panic!("expected infix expression"),
1938        }
1939    }
1940
1941    #[test]
1942    fn parse_print_power_is_right_associative() {
1943        let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1944
1945        let program = parser.parse_program();
1946        let mut begin_blocks = program.begin_blocks_iter();
1947        let Action { statements } = begin_blocks.next().expect("expected begin block");
1948
1949        let exprs = match &statements[0] {
1950            Statement::Print(expressions) => expressions,
1951            _ => panic!("expected print statement"),
1952        };
1953
1954        match &exprs[0] {
1955            Expression::Infix {
1956                left,
1957                operator,
1958                right,
1959            } => {
1960                assert_eq!(operator.kind, TokenKind::Caret);
1961                assert!(matches!(**left, Expression::Number(2.0)));
1962                match &**right {
1963                    Expression::Infix {
1964                        operator: right_op, ..
1965                    } => assert_eq!(right_op.kind, TokenKind::Caret),
1966                    _ => panic!("expected nested infix expression"),
1967                }
1968            }
1969            _ => panic!("expected infix expression"),
1970        }
1971    }
1972
1973    #[test]
1974    fn parse_print_minus_is_left_associative() {
1975        let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1976
1977        let program = parser.parse_program();
1978        let mut begin_blocks = program.begin_blocks_iter();
1979        let Action { statements } = begin_blocks.next().expect("expected begin block");
1980
1981        let exprs = match &statements[0] {
1982            Statement::Print(expressions) => expressions,
1983            _ => panic!("expected print statement"),
1984        };
1985
1986        match &exprs[0] {
1987            Expression::Infix {
1988                left,
1989                operator,
1990                right,
1991            } => {
1992                assert_eq!(operator.kind, TokenKind::Minus);
1993                match &**left {
1994                    Expression::Infix {
1995                        operator: left_op, ..
1996                    } => assert_eq!(left_op.kind, TokenKind::Minus),
1997                    _ => panic!("expected nested infix expression"),
1998                }
1999                assert!(matches!(**right, Expression::Number(1.0)));
2000            }
2001            _ => panic!("expected infix expression"),
2002        }
2003    }
2004
2005    #[test]
2006    fn parse_print_concatenation() {
2007        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
2008
2009        let program = parser.parse_program();
2010        let mut begin_blocks = program.begin_blocks_iter();
2011        let Action { statements } = begin_blocks.next().expect("expected begin block");
2012
2013        let exprs = match &statements[0] {
2014            Statement::Print(expressions) => expressions,
2015            _ => panic!("expected print statement"),
2016        };
2017
2018        assert_eq!(exprs.len(), 1);
2019        match &exprs[0] {
2020            Expression::Concatenation { left, right } => {
2021                assert!(matches!(**left, Expression::String("Value:")));
2022                assert!(matches!(**right, Expression::Number(42.0)));
2023            }
2024            _ => panic!("expected concatenation expression"),
2025        }
2026    }
2027
2028    #[test]
2029    fn parse_continue_statement() {
2030        let mut parser = Parser::new(Lexer::new(r#"{ continue }"#));
2031
2032        let program = parser.parse_program();
2033        let mut rules = program.rules_iter();
2034        let rule = rules.next().expect("expected rule");
2035
2036        let statements = match rule {
2037            Rule::Action(Action { statements }) => statements,
2038            _ => panic!("expected action rule"),
2039        };
2040
2041        assert!(matches!(statements[0], Statement::Continue));
2042    }
2043
2044    #[test]
2045    fn parse_identifier_followed_by_spaced_parentheses_as_concatenation() {
2046        let mut parser = Parser::new(Lexer::new(r#"{ x = $1; print x (++i) }"#));
2047
2048        let program = parser.parse_program();
2049        let mut rules = program.rules_iter();
2050        let rule = rules.next().expect("expected rule");
2051
2052        let statements = match rule {
2053            Rule::Action(Action { statements }) => statements,
2054            _ => panic!("expected action rule"),
2055        };
2056
2057        let exprs = match &statements[1] {
2058            Statement::Print(expressions) => expressions,
2059            _ => panic!("expected print statement"),
2060        };
2061
2062        assert_eq!(exprs.len(), 1);
2063        match &exprs[0] {
2064            Expression::Concatenation { left, right } => {
2065                assert!(matches!(**left, Expression::Identifier("x")));
2066                assert!(matches!(**right, Expression::PreIncrement(_)));
2067            }
2068            _ => panic!("expected concatenation expression"),
2069        }
2070    }
2071
2072    #[test]
2073    fn parse_print_field_expression() {
2074        let mut parser = Parser::new(Lexer::new("{ print $1 }"));
2075
2076        let program = parser.parse_program();
2077        let mut rules = program.rules_iter();
2078        let rule = rules.next().expect("expected rule");
2079
2080        let statements = match rule {
2081            Rule::Action(Action { statements }) => statements,
2082            _ => panic!("expected action rule"),
2083        };
2084
2085        let exprs = match &statements[0] {
2086            Statement::Print(expressions) => expressions,
2087            _ => panic!("expected print statement"),
2088        };
2089
2090        match &exprs[0] {
2091            Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
2092            _ => panic!("expected field expression"),
2093        }
2094    }
2095
2096    #[test]
2097    fn parse_print_with_commas() {
2098        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
2099
2100        let program = parser.parse_program();
2101
2102        assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
2103    }
2104
2105    #[test]
2106    fn parse_number_of_fields_identifier() {
2107        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
2108
2109        let program = parser.parse_program();
2110
2111        assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
2112    }
2113
2114    #[test]
2115    fn parse_printf_with_format_and_arguments() {
2116        let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
2117
2118        let program = parser.parse_program();
2119
2120        assert_eq!(
2121            r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
2122            program.to_string()
2123        );
2124    }
2125
2126    #[test]
2127    fn parse_print_ternary_expression() {
2128        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print x ? y : z }"#));
2129
2130        let program = parser.parse_program();
2131        let mut begin_blocks = program.begin_blocks_iter();
2132        let Action { statements } = begin_blocks.next().expect("expected begin block");
2133
2134        let exprs = match &statements[0] {
2135            Statement::Print(expressions) => expressions,
2136            _ => panic!("expected print statement"),
2137        };
2138
2139        assert_eq!(exprs.len(), 1);
2140        match &exprs[0] {
2141            Expression::Ternary {
2142                condition,
2143                then_expr,
2144                else_expr,
2145            } => {
2146                assert!(matches!(**condition, Expression::Identifier("x")));
2147                assert!(matches!(**then_expr, Expression::Identifier("y")));
2148                assert!(matches!(**else_expr, Expression::Identifier("z")));
2149            }
2150            _ => panic!("expected ternary expression"),
2151        }
2152    }
2153
2154    #[test]
2155    fn parse_printf_without_arguments_returns_parse_error() {
2156        let mut parser = Parser::new(Lexer::new(r#"{ printf }"#));
2157
2158        let err = parser
2159            .try_parse_program()
2160            .expect_err("expected parse error for printf without arguments");
2161
2162        assert_eq!(err.kind, ParseErrorKind::MissingPrintfFormatString);
2163    }
2164
2165    #[test]
2166    fn parse_printf_without_arguments_in_parentheses_returns_parse_error() {
2167        let mut parser = Parser::new(Lexer::new(r#"{ printf() }"#));
2168
2169        let err = parser
2170            .try_parse_program()
2171            .expect_err("expected parse error for empty printf call");
2172
2173        assert_eq!(err.kind, ParseErrorKind::MissingPrintfFormatString);
2174    }
2175
2176    #[test]
2177    fn parse_add_assignment_and_pre_increment() {
2178        let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
2179
2180        let program = parser.parse_program();
2181
2182        assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
2183    }
2184
2185    #[test]
2186    fn parse_regex_match_pattern_action() {
2187        let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
2188
2189        let program = parser.parse_program();
2190
2191        assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
2192    }
2193
2194    #[test]
2195    fn parse_not_pattern_action() {
2196        let mut parser = Parser::new(Lexer::new(r#"!($1 < 2000) { print $1 }"#));
2197
2198        let program = parser.parse_program();
2199        let mut rules = program.rules_iter();
2200        let rule = rules.next().expect("expected rule");
2201
2202        match rule {
2203            Rule::PatternAction {
2204                pattern: Some(Expression::Not(inner)),
2205                action: Some(Action { statements }),
2206            } => {
2207                assert!(matches!(**inner, Expression::Infix { .. }));
2208                assert!(matches!(statements[0], Statement::Print(_)));
2209            }
2210            _ => panic!("expected negated pattern action"),
2211        }
2212    }
2213
2214    #[test]
2215    fn parse_print_with_line_continuation_after_comma() {
2216        let mut parser = Parser::new(Lexer::new(
2217            "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
2218        ));
2219
2220        let program = parser.parse_program();
2221
2222        assert_eq!(
2223            "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
2224            program.to_string()
2225        );
2226    }
2227
2228    #[test]
2229    fn parse_gsub_statement() {
2230        let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
2231
2232        let program = parser.parse_program();
2233
2234        assert_eq!(
2235            r#"{ gsub(/USA/, "United States"); print }"#,
2236            program.to_string()
2237        );
2238    }
2239
2240    #[test]
2241    fn parse_gsub_statement_with_target() {
2242        let mut parser = Parser::new(Lexer::new(r#"{ gsub(/[ \t]+/, "", t) }"#));
2243
2244        let program = parser.parse_program();
2245
2246        assert_eq!(r#"{ gsub(/[ \t]+/, "", t) }"#, program.to_string());
2247    }
2248
2249    #[test]
2250    fn parse_system_statement() {
2251        let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
2252
2253        let program = parser.parse_program();
2254
2255        assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
2256    }
2257
2258    #[test]
2259    fn parse_print_length_builtin_expression() {
2260        let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
2261
2262        let program = parser.parse_program();
2263
2264        assert_eq!(r#"{ print length, $0 }"#, program.to_string());
2265    }
2266
2267    #[test]
2268    fn parse_length_expression_as_rule_pattern() {
2269        let mut parser = Parser::new(Lexer::new(
2270            r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
2271        ));
2272
2273        let program = parser.parse_program();
2274
2275        assert_eq!(
2276            r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
2277            program.to_string()
2278        );
2279    }
2280
2281    #[test]
2282    fn parse_field_assignment_with_substr() {
2283        let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
2284
2285        let program = parser.parse_program();
2286
2287        assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
2288    }
2289
2290    #[test]
2291    fn parse_assignment_with_concatenation_and_substr() {
2292        let mut parser = Parser::new(Lexer::new(r#"{ s = s " " substr($1, 1, 3) }"#));
2293
2294        let program = parser.parse_program();
2295
2296        assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
2297    }
2298
2299    #[test]
2300    fn parse_field_divide_assignment() {
2301        let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
2302
2303        let program = parser.parse_program();
2304
2305        assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
2306    }
2307
2308    #[test]
2309    fn parse_chained_assignment() {
2310        let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
2311
2312        let program = parser.parse_program();
2313
2314        assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
2315    }
2316
2317    #[test]
2318    fn parse_if_statement_with_block() {
2319        let mut parser = Parser::new(Lexer::new(
2320            r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
2321        ));
2322
2323        let program = parser.parse_program();
2324
2325        assert_eq!(
2326            r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
2327            program.to_string()
2328        );
2329    }
2330
2331    #[test]
2332    fn parse_while_with_post_increment() {
2333        let mut parser = Parser::new(Lexer::new(
2334            r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
2335        ));
2336
2337        let program = parser.parse_program();
2338
2339        assert_eq!(
2340            r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
2341            program.to_string()
2342        );
2343    }
2344
2345    #[test]
2346    fn parse_while_with_single_body_statement() {
2347        let mut parser = Parser::new(Lexer::new(r#"{ while (n > 1) print n }"#));
2348
2349        let program = parser.parse_program();
2350
2351        assert_eq!(r#"{ while (n > 1) { print n } }"#, program.to_string());
2352    }
2353
2354    #[test]
2355    fn parse_do_while_with_post_increment() {
2356        let mut parser = Parser::new(Lexer::new(
2357            r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
2358        ));
2359
2360        let program = parser.parse_program();
2361
2362        assert_eq!(
2363            r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
2364            program.to_string()
2365        );
2366    }
2367
2368    #[test]
2369    fn parse_for_with_empty_body_statement() {
2370        let mut parser = Parser::new(Lexer::new(
2371            r#"{ for (i = 1; i <= NF; s += $(i++)) ; print s }"#,
2372        ));
2373
2374        let program = parser.parse_program();
2375
2376        assert_eq!(
2377            r#"{ for (i = 1; i <= NF; s += $i++) {  }; print s }"#,
2378            program.to_string()
2379        );
2380    }
2381
2382    #[test]
2383    fn parse_post_decrement_statement() {
2384        let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
2385
2386        let program = parser.parse_program();
2387
2388        assert_eq!(r#"{ k--; n-- }"#, program.to_string());
2389    }
2390
2391    #[test]
2392    fn parse_rand_expression() {
2393        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
2394
2395        let program = parser.parse_program();
2396
2397        assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
2398    }
2399
2400    #[test]
2401    fn parse_math_builtin_expressions() {
2402        let mut parser = Parser::new(Lexer::new(
2403            r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
2404        ));
2405
2406        let program = parser.parse_program();
2407
2408        assert_eq!(
2409            r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
2410            program.to_string()
2411        );
2412    }
2413
2414    #[test]
2415    fn parse_index_builtin_expression() {
2416        let mut parser = Parser::new(Lexer::new(r#"{ print index(1, $1) }"#));
2417
2418        let program = parser.parse_program();
2419
2420        assert_eq!(r#"{ print index(1, $1) }"#, program.to_string());
2421    }
2422
2423    #[test]
2424    fn parse_match_builtin_expression() {
2425        let mut parser = Parser::new(Lexer::new(r#"{ print match($NF, $1), RSTART, RLENGTH }"#));
2426
2427        let program = parser.parse_program();
2428
2429        assert_eq!(
2430            r#"{ print match($NF, $1), RSTART, RLENGTH }"#,
2431            program.to_string()
2432        );
2433    }
2434
2435    #[test]
2436    fn parse_in_membership_expression() {
2437        let mut parser = Parser::new(Lexer::new(r#"{ print 1 in x }"#));
2438
2439        let program = parser.parse_program();
2440
2441        assert_eq!(r#"{ print 1 in x }"#, program.to_string());
2442    }
2443
2444    #[test]
2445    fn parse_parenthesized_composite_membership_expression() {
2446        let mut parser = Parser::new(Lexer::new(r#"{ if (($0, $1) in x) print "yes" }"#));
2447
2448        let program = parser.parse_program();
2449
2450        assert_eq!(
2451            r#"{ if ($0, $1 in x) { print "yes" } }"#,
2452            program.to_string()
2453        );
2454    }
2455
2456    #[test]
2457    fn parse_for_loop_with_single_body_statement() {
2458        let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; i++) print $i }"#));
2459
2460        let program = parser.parse_program();
2461
2462        assert_eq!(
2463            r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
2464            program.to_string()
2465        );
2466    }
2467
2468    #[test]
2469    fn parse_if_with_single_statement_body() {
2470        let mut parser = Parser::new(Lexer::new(
2471            r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
2472        ));
2473
2474        let program = parser.parse_program();
2475
2476        assert_eq!(
2477            r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
2478            program.to_string()
2479        );
2480    }
2481
2482    #[test]
2483    fn parse_exit_statement() {
2484        let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
2485
2486        let program = parser.parse_program();
2487
2488        assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
2489    }
2490
2491    #[test]
2492    fn parse_exit_statement_with_status() {
2493        let mut parser = Parser::new(Lexer::new(r#"$1 < 5000 { exit NR }"#));
2494
2495        let program = parser.parse_program();
2496
2497        assert_eq!(r#"$1 < 5000 { exit NR }"#, program.to_string());
2498    }
2499
2500    #[test]
2501    fn parse_user_defined_function_call_statement() {
2502        let mut parser = Parser::new(Lexer::new(
2503            "BEGIN { myabort(1) }\nfunction myabort(n) { exit n }",
2504        ));
2505
2506        let program = parser.parse_program();
2507
2508        let definition = program
2509            .function_definition("myabort")
2510            .expect("expected function definition");
2511        assert_eq!(definition.parameters, vec!["n"]);
2512        assert_eq!(definition.statements.len(), 1);
2513    }
2514
2515    #[test]
2516    fn parse_delete_array_element_statement() {
2517        let mut parser = Parser::new(Lexer::new(r#"{ delete x[i, j] }"#));
2518
2519        let program = parser.parse_program();
2520
2521        assert_eq!(r#"{ delete x[i, j] }"#, program.to_string());
2522    }
2523
2524    #[test]
2525    fn parse_array_add_assignment_and_access() {
2526        let mut parser = Parser::new(Lexer::new(
2527            r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2528        ));
2529
2530        let program = parser.parse_program();
2531
2532        assert_eq!(
2533            r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2534            program.to_string()
2535        );
2536    }
2537
2538    #[test]
2539    fn parse_for_in_loop() {
2540        let mut parser = Parser::new(Lexer::new(
2541            r#"END { for (name in area) print name ":" area[name] }"#,
2542        ));
2543
2544        let program = parser.parse_program();
2545
2546        assert_eq!(
2547            r#"END { for (name in area) { print name ":" area[name] } }"#,
2548            program.to_string()
2549        );
2550    }
2551
2552    #[test]
2553    fn parse_print_redirection() {
2554        let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
2555
2556        let program = parser.parse_program();
2557
2558        assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
2559    }
2560
2561    #[test]
2562    fn parse_print_pipe() {
2563        let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
2564
2565        let program = parser.parse_program();
2566
2567        assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
2568    }
2569
2570    #[test]
2571    fn parse_hexadecimal_number() {
2572        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print 0xAA }"#));
2573
2574        let program = parser.parse_program();
2575
2576        assert_eq!(r#"BEGIN { print 0xAA }"#, program.to_string());
2577    }
2578
2579    #[test]
2580    fn parse_field_compound_assignment() {
2581        let mut parser = Parser::new(Lexer::new(r#"{ $1 += 2 }"#));
2582
2583        let program = parser.parse_program();
2584
2585        assert_eq!(r#"{ $1 = $1 + 2 }"#, program.to_string());
2586    }
2587
2588    #[test]
2589    fn parse_builtin_without_parens_returns_parse_error() {
2590        let mut parser = Parser::new(Lexer::new(r#"{ x = cos }"#));
2591
2592        let err = parser
2593            .try_parse_program()
2594            .expect_err("expected parse error for builtin used without parentheses");
2595
2596        assert_eq!(err.kind, ParseErrorKind::ExpectedLeftParen);
2597        assert_eq!(err.token.kind, TokenKind::RightCurlyBrace);
2598    }
2599
2600    #[test]
2601    fn parse_nested_function_calls() {
2602        let mut parser = Parser::new(Lexer::new(r#"{ x = substr(substr(s, 1, 2), 1) }"#));
2603
2604        let program = parser.parse_program();
2605
2606        assert_eq!(r#"{ x = substr(substr(s, 1, 2), 1) }"#, program.to_string());
2607    }
2608
2609    #[test]
2610    fn parse_chained_ternary_is_right_associative() {
2611        let mut parser = Parser::new(Lexer::new(r#"{ x = a ? b : c ? d : e }"#));
2612
2613        let program = parser.parse_program();
2614
2615        assert_eq!(r#"{ x = (a) ? b : (c) ? d : e }"#, program.to_string());
2616    }
2617
2618    #[test]
2619    fn parse_for_loop_with_empty_init_condition_update() {
2620        let mut parser = Parser::new(Lexer::new(r#"{ for (;;) break }"#));
2621
2622        let program = parser.parse_program();
2623
2624        assert_eq!(r#"{ for (; 1; ) { break } }"#, program.to_string());
2625    }
2626
2627    #[test]
2628    fn parse_assignment_with_regex_rhs() {
2629        let mut parser = Parser::new(Lexer::new(r#"{ x = /foo/ }"#));
2630
2631        let program = parser.parse_program();
2632
2633        assert_eq!(r#"{ x = /foo/ }"#, program.to_string());
2634    }
2635
2636    #[test]
2637    fn parse_field_assignment_with_regex_rhs() {
2638        let mut parser = Parser::new(Lexer::new(r#"{ $1 = /foo/ }"#));
2639
2640        let program = parser.parse_program();
2641
2642        assert_eq!(r#"{ $1 = /foo/ }"#, program.to_string());
2643    }
2644
2645    #[test]
2646    fn parse_array_assignment_with_regex_rhs() {
2647        let mut parser = Parser::new(Lexer::new(r#"{ a[i] = /foo/ }"#));
2648
2649        let program = parser.parse_program();
2650
2651        assert_eq!(r#"{ a[i] = /foo/ }"#, program.to_string());
2652    }
2653
2654    #[test]
2655    fn parse_for_loop_with_print_as_init_returns_parse_error() {
2656        let mut parser = Parser::new(Lexer::new(r#"{ for (print "hi"; i < 10; i++) print i }"#));
2657
2658        let err = parser
2659            .try_parse_program()
2660            .expect_err("expected parse error for print statement as for-loop initializer");
2661
2662        assert_eq!(err.kind, ParseErrorKind::UnsupportedStatement);
2663        assert_eq!(err.token.kind, TokenKind::Print);
2664    }
2665
2666    #[test]
2667    fn parse_for_loop_with_print_as_update_returns_parse_error() {
2668        let mut parser = Parser::new(Lexer::new(r#"{ for (i = 0; i < 10; print i) print i }"#));
2669
2670        let err = parser
2671            .try_parse_program()
2672            .expect_err("expected parse error for print statement as for-loop update");
2673
2674        assert_eq!(err.kind, ParseErrorKind::UnsupportedStatement);
2675        assert_eq!(err.token.kind, TokenKind::Print);
2676    }
2677
2678    #[test]
2679    fn parse_for_loop_with_field_assignment_as_init() {
2680        let mut parser = Parser::new(Lexer::new(r#"{ for ($1 = 0; $1 < 10; $1 += 1) print $1 }"#));
2681
2682        let program = parser.parse_program();
2683
2684        assert_eq!(
2685            r#"{ for ($1 = 0; $1 < 10; $1 = $1 + 1) { print $1 } }"#,
2686            program.to_string()
2687        );
2688    }
2689
2690    #[test]
2691    fn parse_primary_atom_with_invalid_number_literal_returns_parse_error() {
2692        // The lexer always emits valid number literals, so this path is
2693        // unreachable through normal input. We test it directly by injecting
2694        // a synthetic Number token with an invalid literal.
2695        let mut parser = Parser::new(Lexer::new(""));
2696        parser.current_token = Token::new(TokenKind::Number, "0xZZ", 7);
2697
2698        let err = parser
2699            .parse_primary_atom()
2700            .expect_err("expected parse error for invalid numeric literal");
2701
2702        assert_eq!(err.kind, ParseErrorKind::InvalidNumericLiteral);
2703        assert_eq!(err.token.kind, TokenKind::Number);
2704        assert_eq!(err.token.literal, "0xZZ");
2705    }
2706
2707    #[test]
2708    fn parse_unrecognized_token_in_expression_returns_parse_error() {
2709        let mut parser = Parser::new(Lexer::new("{ x = else }"));
2710
2711        let err = parser
2712            .try_parse_program()
2713            .expect_err("expected parse error for unrecognized token in expression");
2714
2715        assert_eq!(err.kind, ParseErrorKind::ExpectedStatement);
2716        assert_eq!(err.token.kind, TokenKind::Else);
2717    }
2718}