Skip to main content

rawk_core/
parser.rs

1use crate::{
2    Lexer, Program,
3    ast::{Action, Expression, FunctionDefinition, Rule, Statement},
4    token::{Token, TokenKind},
5};
6
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub enum ParseErrorKind {
9    UnexpectedToken { expected: &'static str },
10    MissingPrintfFormatString,
11}
12
13#[derive(Debug, Clone, PartialEq)]
14pub struct ParseError<'a> {
15    pub kind: ParseErrorKind,
16    pub token: Token<'a>,
17}
18
19impl std::fmt::Display for ParseError<'_> {
20    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21        match &self.kind {
22            ParseErrorKind::UnexpectedToken { expected } => write!(
23                f,
24                "unexpected token {:?} ({:?}) at byte {}: expected {}",
25                self.token.kind, self.token.literal, self.token.span.start, expected
26            ),
27            ParseErrorKind::MissingPrintfFormatString => write!(
28                f,
29                "printf requires a format string at byte {}",
30                self.token.span.start
31            ),
32        }
33    }
34}
35
36impl std::error::Error for ParseError<'_> {}
37
38#[derive(Debug)]
39pub struct Parser<'a> {
40    lexer: Lexer<'a>,
41    current_token: Token<'a>,
42    function_definitions: Vec<FunctionDefinition<'a>>,
43}
44
45impl<'a> Parser<'a> {
46    pub fn new(mut lexer: Lexer<'a>) -> Self {
47        let current_token = lexer.next_token_regex_aware();
48        Parser {
49            lexer,
50            current_token,
51            function_definitions: Vec::new(),
52        }
53    }
54
55    fn next_token(&mut self) {
56        self.current_token = self.lexer.next_token();
57    }
58
59    fn next_token_in_regex_context(&mut self) {
60        self.current_token = self.lexer.next_token_regex_aware();
61    }
62
63    fn is_eof(&self) -> bool {
64        self.current_token.kind == TokenKind::Eof
65    }
66
67    fn is_statement_terminator(&self) -> bool {
68        matches!(
69            self.current_token.kind,
70            TokenKind::Semicolon | TokenKind::NewLine | TokenKind::RightCurlyBrace | TokenKind::Eof
71        )
72    }
73
74    fn token_is_immediately_after(&self, previous: &Token<'a>) -> bool {
75        self.current_token.span.start == previous.span.start + previous.literal.len()
76    }
77
78    fn parse_number_expression(&self) -> Option<Expression<'a>> {
79        let literal = self.current_token.literal;
80        if let Some(hex_digits) = literal
81            .strip_prefix("0x")
82            .or_else(|| literal.strip_prefix("0X"))
83        {
84            let value = u64::from_str_radix(hex_digits, 16).ok()? as f64;
85            return Some(Expression::HexNumber { literal, value });
86        }
87
88        literal.parse::<f64>().ok().map(Expression::Number)
89    }
90
91    fn parse_array_index_expression(&mut self) -> Expression<'a> {
92        let mut index = self.parse_expression();
93        while self.current_token.kind == TokenKind::Comma {
94            let operator = self.current_token.clone();
95            self.next_token_in_regex_context();
96            let right = self.parse_expression();
97            index = Expression::Infix {
98                left: Box::new(index),
99                operator,
100                right: Box::new(right),
101            };
102        }
103        index
104    }
105
106    fn unexpected_token(&self, expected: &'static str) -> ParseError<'a> {
107        ParseError {
108            kind: ParseErrorKind::UnexpectedToken { expected },
109            token: self.current_token.clone(),
110        }
111    }
112
113    fn missing_printf_format_string(&self) -> ParseError<'a> {
114        ParseError {
115            kind: ParseErrorKind::MissingPrintfFormatString,
116            token: self.current_token.clone(),
117        }
118    }
119
120    fn parse_next_rule(&mut self) -> Result<Option<Rule<'a>>, ParseError<'a>> {
121        match &self.current_token.kind {
122            TokenKind::Begin => {
123                self.next_token();
124                let action = self.parse_action()?;
125                Ok(Some(Rule::Begin(action)))
126            }
127            TokenKind::NewLine => {
128                self.next_token_in_regex_context();
129                self.parse_next_rule()
130            }
131            TokenKind::Eof => Ok(None),
132            TokenKind::LeftCurlyBrace => {
133                self.parse_action().map(|action| Some(Rule::Action(action)))
134            }
135            TokenKind::Function => {
136                self.parse_function_definition()?;
137                Ok(None)
138            }
139            TokenKind::End => {
140                self.next_token();
141                let action = self.parse_action()?;
142                Ok(Some(Rule::End(action)))
143            }
144            TokenKind::Regex
145            | TokenKind::String
146            | TokenKind::Number
147            | TokenKind::DollarSign
148            | TokenKind::LeftParen
149            | TokenKind::Identifier
150            | TokenKind::Cos
151            | TokenKind::Exp
152            | TokenKind::Index
153            | TokenKind::Int
154            | TokenKind::Length
155            | TokenKind::Log
156            | TokenKind::Match
157            | TokenKind::Rand
158            | TokenKind::Sin
159            | TokenKind::Sprintf
160            | TokenKind::Split
161            | TokenKind::Sqrt
162            | TokenKind::Srand
163            | TokenKind::Substr
164            | TokenKind::ExclamationMark
165            | TokenKind::Increment
166            | TokenKind::Decrement => self.parse_pattern_rule(),
167            _ => Err(self.unexpected_token("rule")),
168        }
169    }
170
171    fn parse_pattern_rule(&mut self) -> Result<Option<Rule<'a>>, ParseError<'a>> {
172        let mut pattern = self.parse_expression();
173        if self.current_token.kind == TokenKind::Comma {
174            let operator = self.current_token.clone();
175            self.next_token_in_regex_context();
176            let right = self.parse_expression();
177            pattern = Expression::Infix {
178                left: Box::new(pattern),
179                operator,
180                right: Box::new(right),
181            };
182        }
183        let pattern = Some(pattern);
184
185        if self.current_token.kind == TokenKind::LeftCurlyBrace {
186            let action = self.parse_action()?;
187            Ok(Some(Rule::PatternAction {
188                pattern,
189                action: Some(action),
190            }))
191        } else {
192            Ok(Some(Rule::PatternAction {
193                pattern,
194                action: None,
195            }))
196        }
197    }
198
199    fn parse_action(&mut self) -> Result<Action<'a>, ParseError<'a>> {
200        self.next_token(); // consume '{'
201
202        let mut statements = Vec::new();
203        while self.current_token.kind != TokenKind::RightCurlyBrace
204            && self.current_token.kind != TokenKind::Eof
205        {
206            while self.current_token.kind == TokenKind::NewLine
207                || self.current_token.kind == TokenKind::Semicolon
208            {
209                self.next_token();
210            }
211
212            if self.current_token.kind == TokenKind::RightCurlyBrace
213                || self.current_token.kind == TokenKind::Eof
214            {
215                break;
216            }
217
218            statements.push(self.parse_statement()?);
219        }
220
221        Ok(Action { statements })
222    }
223
224    fn parse_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
225        match self.current_token.kind {
226            TokenKind::Print => Ok(self.parse_print_function()),
227            TokenKind::Printf => self.parse_printf_function(),
228            TokenKind::System => Ok(self.parse_system_function()),
229            TokenKind::Split => Ok(self.parse_split_statement()),
230            TokenKind::Sub => Ok(self.parse_sub_function()),
231            TokenKind::Gsub => Ok(self.parse_gsub_function()),
232            TokenKind::Break => Ok(self.parse_break_statement()),
233            TokenKind::Continue => Ok(self.parse_continue_statement()),
234            TokenKind::Delete => Ok(self.parse_delete_statement()),
235            TokenKind::If => self.parse_if_statement(),
236            TokenKind::Do => self.parse_do_statement(),
237            TokenKind::While => self.parse_while_statement(),
238            TokenKind::For => self.parse_for_statement(),
239            TokenKind::Return => Ok(self.parse_return_statement()),
240            TokenKind::Next => Ok(self.parse_next_statement()),
241            TokenKind::Exit => Ok(self.parse_exit_statement()),
242            TokenKind::Identifier => Ok(self.parse_assignment_statement()),
243            TokenKind::DollarSign => Ok(self.parse_field_assignment_statement()),
244            TokenKind::Increment => Ok(self.parse_pre_increment_statement()),
245            TokenKind::Decrement => Ok(self.parse_pre_decrement_statement()),
246            TokenKind::Number
247            | TokenKind::String
248            | TokenKind::Regex
249            | TokenKind::LeftParen
250            | TokenKind::Close
251            | TokenKind::Cos
252            | TokenKind::Exp
253            | TokenKind::Index
254            | TokenKind::Int
255            | TokenKind::Length
256            | TokenKind::Log
257            | TokenKind::Match
258            | TokenKind::Rand
259            | TokenKind::Sin
260            | TokenKind::Sprintf
261            | TokenKind::Sqrt
262            | TokenKind::Srand
263            | TokenKind::Substr
264            | TokenKind::ToLower
265            | TokenKind::ToUpper => Ok(Statement::Expression(self.parse_expression())),
266            _ => Err(self.unexpected_token("statement")),
267        }
268    }
269
270    fn parse_function_definition(&mut self) -> Result<(), ParseError<'a>> {
271        self.next_token();
272        if self.current_token.kind != TokenKind::Identifier {
273            todo!()
274        }
275        let name = self.current_token.literal;
276        self.next_token();
277        if self.current_token.kind != TokenKind::LeftParen {
278            todo!()
279        }
280        self.next_token();
281
282        let mut parameters = Vec::new();
283        while self.current_token.kind != TokenKind::RightParen {
284            if self.current_token.kind != TokenKind::Identifier {
285                todo!()
286            }
287            parameters.push(self.current_token.literal);
288            self.next_token();
289            if self.current_token.kind == TokenKind::Comma {
290                self.next_token();
291            } else if self.current_token.kind != TokenKind::RightParen {
292                todo!()
293            }
294        }
295
296        self.next_token();
297        while self.current_token.kind == TokenKind::NewLine {
298            self.next_token();
299        }
300        if self.current_token.kind != TokenKind::LeftCurlyBrace {
301            todo!()
302        }
303
304        let mut statements = Vec::new();
305        self.next_token(); // consume '{'
306        while self.current_token.kind != TokenKind::RightCurlyBrace
307            && self.current_token.kind != TokenKind::Eof
308        {
309            while self.current_token.kind == TokenKind::NewLine
310                || self.current_token.kind == TokenKind::Semicolon
311            {
312                self.next_token();
313            }
314
315            if self.current_token.kind == TokenKind::RightCurlyBrace
316                || self.current_token.kind == TokenKind::Eof
317            {
318                break;
319            }
320
321            statements.push(self.parse_statement()?);
322        }
323        self.function_definitions.push(FunctionDefinition {
324            name,
325            parameters,
326            statements,
327        });
328
329        Ok(())
330    }
331
332    fn parse_assignment_statement(&mut self) -> Statement<'a> {
333        let identifier = self.current_token.clone();
334        self.next_token();
335        self.parse_assignment_statement_with_identifier(identifier)
336    }
337
338    fn parse_assignment_statement_with_identifier(
339        &mut self,
340        identifier: Token<'a>,
341    ) -> Statement<'a> {
342        if self.current_token.kind == TokenKind::LeftParen
343            && self.token_is_immediately_after(&identifier)
344        {
345            let args = self.parse_call_arguments();
346            return Statement::Expression(Expression::FunctionCall {
347                name: identifier.literal,
348                args,
349            });
350        }
351        if self.current_token.kind == TokenKind::LeftSquareBracket {
352            self.next_token_in_regex_context();
353            let index = self.parse_array_index_expression();
354            if self.current_token.kind != TokenKind::RightSquareBracket {
355                todo!()
356            }
357            self.next_token();
358            if self.current_token.kind == TokenKind::Assign {
359                self.next_token();
360                let value = self.parse_expression();
361                return Statement::ArrayAssignment {
362                    identifier: identifier.literal,
363                    index,
364                    value,
365                };
366            }
367            if self.current_token.kind == TokenKind::AddAssign {
368                self.next_token();
369                let value = self.parse_expression();
370                return Statement::ArrayAddAssignment {
371                    identifier: identifier.literal,
372                    index,
373                    value,
374                };
375            }
376            if self.current_token.kind == TokenKind::Increment {
377                self.next_token();
378                return Statement::ArrayPostIncrement {
379                    identifier: identifier.literal,
380                    index,
381                };
382            }
383            if self.current_token.kind == TokenKind::Decrement {
384                self.next_token();
385                return Statement::ArrayPostDecrement {
386                    identifier: identifier.literal,
387                    index,
388                };
389            }
390            todo!()
391        }
392        if self.current_token.kind == TokenKind::Assign {
393            self.next_token();
394            if self.current_token.kind == TokenKind::Split {
395                return self.parse_split_assignment_statement(identifier.literal);
396            }
397            let value = self.parse_expression();
398            Statement::Assignment {
399                identifier: identifier.literal,
400                value,
401            }
402        } else if self.current_token.kind == TokenKind::Increment {
403            self.next_token();
404            Statement::PostIncrement {
405                identifier: identifier.literal,
406            }
407        } else if self.current_token.kind == TokenKind::Decrement {
408            self.next_token();
409            Statement::PostDecrement {
410                identifier: identifier.literal,
411            }
412        } else if self.current_token.kind == TokenKind::AddAssign {
413            self.next_token();
414            let value = self.parse_expression();
415            Statement::AddAssignment {
416                identifier: identifier.literal,
417                value,
418            }
419        } else if matches!(
420            self.current_token.kind,
421            TokenKind::SubtractAssign
422                | TokenKind::MultiplyAssign
423                | TokenKind::DivideAssign
424                | TokenKind::ModuloAssign
425                | TokenKind::PowerAssign
426        ) {
427            let assign_token = self.current_token.clone();
428            self.next_token();
429            let right_value = self.parse_expression();
430            Statement::Assignment {
431                identifier: identifier.literal,
432                value: Expression::Infix {
433                    left: Box::new(Expression::Identifier(identifier.literal)),
434                    operator: compound_assign_operator(&assign_token),
435                    right: Box::new(right_value),
436                },
437            }
438        } else {
439            todo!()
440        }
441    }
442
443    fn parse_delete_statement(&mut self) -> Statement<'a> {
444        self.next_token();
445        if self.current_token.kind != TokenKind::Identifier {
446            todo!()
447        }
448        let identifier = self.current_token.literal;
449        self.next_token();
450        if self.current_token.kind != TokenKind::LeftSquareBracket {
451            return Statement::Delete {
452                identifier,
453                index: None,
454            };
455        }
456
457        self.next_token_in_regex_context();
458        let index = self.parse_array_index_expression();
459        if self.current_token.kind != TokenKind::RightSquareBracket {
460            todo!()
461        }
462        self.next_token();
463        Statement::Delete {
464            identifier,
465            index: Some(index),
466        }
467    }
468
469    fn parse_break_statement(&mut self) -> Statement<'a> {
470        self.next_token();
471        Statement::Break
472    }
473
474    fn parse_continue_statement(&mut self) -> Statement<'a> {
475        self.next_token();
476        Statement::Continue
477    }
478
479    fn parse_pre_increment_statement(&mut self) -> Statement<'a> {
480        self.next_token();
481        if self.current_token.kind != TokenKind::Identifier {
482            todo!()
483        }
484        let identifier = self.current_token.literal;
485        self.next_token();
486        Statement::PreIncrement { identifier }
487    }
488
489    fn parse_pre_decrement_statement(&mut self) -> Statement<'a> {
490        self.next_token();
491        if self.current_token.kind != TokenKind::Identifier {
492            todo!()
493        }
494        let identifier = self.current_token.literal;
495        self.next_token();
496        Statement::PreDecrement { identifier }
497    }
498
499    fn parse_split_assignment_statement(&mut self, identifier: &'a str) -> Statement<'a> {
500        self.next_token();
501        if self.current_token.kind != TokenKind::LeftParen {
502            todo!()
503        }
504        self.next_token_in_regex_context();
505        let string = self.parse_expression();
506        if self.current_token.kind != TokenKind::Comma {
507            todo!()
508        }
509        self.next_token();
510        if self.current_token.kind != TokenKind::Identifier {
511            todo!()
512        }
513        let array = self.current_token.literal;
514        self.next_token();
515        let separator = if self.current_token.kind == TokenKind::Comma {
516            self.next_token_in_regex_context();
517            Some(self.parse_expression())
518        } else {
519            None
520        };
521        if self.current_token.kind != TokenKind::RightParen {
522            todo!()
523        }
524        self.next_token();
525        Statement::SplitAssignment {
526            identifier,
527            string,
528            array,
529            separator,
530        }
531    }
532
533    fn parse_split_statement(&mut self) -> Statement<'a> {
534        self.next_token();
535        if self.current_token.kind != TokenKind::LeftParen {
536            todo!()
537        }
538        self.next_token_in_regex_context();
539        let string = self.parse_expression();
540        if self.current_token.kind != TokenKind::Comma {
541            todo!()
542        }
543        self.next_token();
544        if self.current_token.kind != TokenKind::Identifier {
545            todo!()
546        }
547        let array = self.current_token.literal;
548        self.next_token();
549        let separator = if self.current_token.kind == TokenKind::Comma {
550            self.next_token_in_regex_context();
551            Some(self.parse_expression())
552        } else {
553            None
554        };
555        if self.current_token.kind != TokenKind::RightParen {
556            todo!()
557        }
558        self.next_token();
559        Statement::Split {
560            string,
561            array,
562            separator,
563        }
564    }
565
566    fn parse_field_assignment_statement(&mut self) -> Statement<'a> {
567        self.next_token();
568        let field = self.parse_primary_expression();
569        let assign_token = self.current_token.clone();
570        self.next_token();
571        let right_value = self.parse_expression();
572
573        let value = if assign_token.kind == TokenKind::Assign {
574            right_value
575        } else {
576            let operator = compound_assign_operator(&assign_token);
577            Expression::Infix {
578                left: Box::new(Expression::Field(Box::new(field.clone()))),
579                operator,
580                right: Box::new(right_value),
581            }
582        };
583        Statement::FieldAssignment { field, value }
584    }
585
586    fn parse_if_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
587        self.next_token();
588        if self.current_token.kind != TokenKind::LeftParen {
589            todo!()
590        }
591        self.next_token_in_regex_context();
592        let condition = self.parse_condition_in_parens();
593        if self.current_token.kind != TokenKind::RightParen {
594            todo!()
595        }
596        self.next_token();
597        let then_statements = self.parse_control_statement_body()?;
598
599        while self.current_token.kind == TokenKind::NewLine
600            || self.current_token.kind == TokenKind::Semicolon
601        {
602            self.next_token();
603        }
604
605        if self.current_token.kind == TokenKind::Else {
606            self.next_token();
607            let else_statements = self.parse_control_statement_body()?;
608            return Ok(Statement::IfElse {
609                condition,
610                then_statements,
611                else_statements,
612            });
613        }
614
615        Ok(Statement::If {
616            condition,
617            then_statements,
618        })
619    }
620
621    fn parse_exit_statement(&mut self) -> Statement<'a> {
622        self.next_token();
623        let status = if self.is_statement_terminator() {
624            None
625        } else {
626            Some(self.parse_expression())
627        };
628        Statement::Exit(status)
629    }
630
631    fn parse_return_statement(&mut self) -> Statement<'a> {
632        self.next_token();
633        let value = if self.is_statement_terminator() {
634            None
635        } else {
636            Some(self.parse_expression())
637        };
638        Statement::Return(value)
639    }
640
641    fn parse_next_statement(&mut self) -> Statement<'a> {
642        self.next_token();
643        Statement::Next
644    }
645
646    fn parse_statement_block(&mut self) -> Result<Vec<Statement<'a>>, ParseError<'a>> {
647        self.next_token(); // consume '{'
648        let mut statements = Vec::new();
649        while self.current_token.kind != TokenKind::RightCurlyBrace
650            && self.current_token.kind != TokenKind::Eof
651        {
652            while self.current_token.kind == TokenKind::NewLine
653                || self.current_token.kind == TokenKind::Semicolon
654            {
655                self.next_token();
656            }
657
658            if self.current_token.kind == TokenKind::RightCurlyBrace
659                || self.current_token.kind == TokenKind::Eof
660            {
661                break;
662            }
663            statements.push(self.parse_statement()?);
664        }
665        if self.current_token.kind == TokenKind::RightCurlyBrace {
666            self.next_token();
667        }
668        Ok(statements)
669    }
670
671    fn parse_control_statement_body(&mut self) -> Result<Vec<Statement<'a>>, ParseError<'a>> {
672        while self.current_token.kind == TokenKind::NewLine {
673            self.next_token();
674        }
675
676        if self.current_token.kind == TokenKind::LeftCurlyBrace {
677            return self.parse_statement_block();
678        }
679
680        if self.current_token.kind == TokenKind::Semicolon {
681            self.next_token();
682            return Ok(vec![Statement::Empty]);
683        }
684
685        Ok(vec![self.parse_statement()?])
686    }
687
688    fn parse_while_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
689        self.next_token();
690        if self.current_token.kind != TokenKind::LeftParen {
691            todo!()
692        }
693        self.next_token_in_regex_context();
694        let condition = self.parse_condition_in_parens();
695        if self.current_token.kind != TokenKind::RightParen {
696            todo!()
697        }
698        self.next_token();
699        let statements = self.parse_control_statement_body()?;
700        Ok(Statement::While {
701            condition,
702            statements,
703        })
704    }
705
706    fn parse_do_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
707        self.next_token();
708        let statements = self.parse_control_statement_body()?;
709
710        while self.current_token.kind == TokenKind::NewLine
711            || self.current_token.kind == TokenKind::Semicolon
712        {
713            self.next_token();
714        }
715
716        if self.current_token.kind != TokenKind::While {
717            todo!()
718        }
719        self.next_token();
720        if self.current_token.kind != TokenKind::LeftParen {
721            todo!()
722        }
723        self.next_token_in_regex_context();
724        let condition = self.parse_condition_in_parens();
725        if self.current_token.kind != TokenKind::RightParen {
726            todo!()
727        }
728        self.next_token();
729        Ok(Statement::DoWhile {
730            condition,
731            statements,
732        })
733    }
734
735    fn parse_for_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
736        self.next_token();
737        if self.current_token.kind != TokenKind::LeftParen {
738            todo!()
739        }
740        self.next_token();
741        while self.current_token.kind == TokenKind::NewLine {
742            self.next_token();
743        }
744
745        let init = if self.current_token.kind == TokenKind::Semicolon {
746            Statement::Empty
747        } else if self.current_token.kind == TokenKind::Identifier {
748            let variable = self.current_token.clone();
749            self.next_token();
750            if self.current_token.kind == TokenKind::In {
751                self.next_token();
752                if self.current_token.kind != TokenKind::Identifier {
753                    todo!()
754                }
755                let array = self.current_token.literal;
756                self.next_token();
757                if self.current_token.kind != TokenKind::RightParen {
758                    todo!()
759                }
760                self.next_token();
761                let statements = self.parse_control_statement_body()?;
762                return Ok(Statement::ForIn {
763                    variable: variable.literal,
764                    array,
765                    statements,
766                });
767            }
768            self.parse_assignment_statement_with_identifier(variable)
769        } else {
770            self.parse_statement()?
771        };
772        while self.current_token.kind == TokenKind::NewLine {
773            self.next_token();
774        }
775        if self.current_token.kind != TokenKind::Semicolon {
776            todo!()
777        }
778        self.next_token_in_regex_context();
779        while self.current_token.kind == TokenKind::NewLine {
780            self.next_token_in_regex_context();
781        }
782
783        let condition = if self.current_token.kind == TokenKind::Semicolon {
784            Expression::Number(1.0)
785        } else {
786            self.parse_expression()
787        };
788        while self.current_token.kind == TokenKind::NewLine {
789            self.next_token();
790        }
791        if self.current_token.kind != TokenKind::Semicolon {
792            todo!()
793        }
794        self.next_token_in_regex_context();
795        while self.current_token.kind == TokenKind::NewLine {
796            self.next_token_in_regex_context();
797        }
798
799        let update = if self.current_token.kind == TokenKind::RightParen {
800            Statement::Empty
801        } else {
802            self.parse_statement()?
803        };
804        while self.current_token.kind == TokenKind::NewLine {
805            self.next_token();
806        }
807        if self.current_token.kind != TokenKind::RightParen {
808            todo!()
809        }
810        self.next_token();
811        let statements = self.parse_control_statement_body()?;
812
813        Ok(Statement::For {
814            init: Box::new(init),
815            condition,
816            update: Box::new(update),
817            statements,
818        })
819    }
820
821    fn parse_print_function(&mut self) -> Statement<'a> {
822        let mut expressions = Vec::new();
823        let mut expect_more = false;
824        self.next_token();
825
826        loop {
827            if self.current_token.kind == TokenKind::RightCurlyBrace
828                || self.current_token.kind == TokenKind::RightParen
829                || self.current_token.kind == TokenKind::Eof
830                || self.current_token.kind == TokenKind::GreaterThan
831                || self.current_token.kind == TokenKind::Append
832                || self.current_token.kind == TokenKind::Pipe
833            {
834                break;
835            }
836
837            if self.current_token.kind == TokenKind::NewLine
838                || self.current_token.kind == TokenKind::Semicolon
839            {
840                if expect_more {
841                    self.next_token();
842                    continue;
843                }
844                break;
845            }
846
847            if self.current_token.kind == TokenKind::Comma {
848                self.next_token();
849                expect_more = true;
850                continue;
851            }
852
853            let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
854            let expression = self.parse_expression();
855            expressions.push(expression);
856            if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
857                while self.current_token.kind == TokenKind::Comma {
858                    self.next_token();
859                    expressions.push(self.parse_expression());
860                }
861                if self.current_token.kind != TokenKind::RightParen {
862                    todo!()
863                }
864                self.next_token();
865            }
866            expect_more = false;
867        }
868        if self.current_token.kind == TokenKind::RightParen {
869            self.next_token();
870        }
871
872        if self.current_token.kind == TokenKind::GreaterThan
873            || self.current_token.kind == TokenKind::Append
874        {
875            let append = self.current_token.kind == TokenKind::Append;
876            self.next_token();
877            let target = self.parse_expression();
878            return Statement::PrintRedirect {
879                expressions,
880                target,
881                append,
882            };
883        }
884        if self.current_token.kind == TokenKind::Pipe {
885            self.next_token();
886            let target = self.parse_expression();
887            return Statement::PrintPipe {
888                expressions,
889                target,
890            };
891        }
892
893        Statement::Print(expressions)
894    }
895
896    fn parse_printf_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
897        self.next_token();
898        let expressions = if self.current_token.kind == TokenKind::LeftParen {
899            self.next_token_in_regex_context();
900            let mut expressions = Vec::new();
901            while self.current_token.kind != TokenKind::RightParen
902                && self.current_token.kind != TokenKind::Eof
903            {
904                if self.current_token.kind == TokenKind::Comma {
905                    self.next_token();
906                    continue;
907                }
908                expressions.push(self.parse_expression());
909            }
910            if self.current_token.kind == TokenKind::RightParen {
911                self.next_token();
912            }
913            expressions
914        } else {
915            self.parse_expression_list_until_action_end_from_current()
916        };
917
918        if expressions.is_empty() {
919            return Err(self.missing_printf_format_string());
920        }
921
922        Ok(Statement::Printf(expressions))
923    }
924
925    fn parse_gsub_function(&mut self) -> Statement<'a> {
926        self.next_token();
927        if self.current_token.kind != TokenKind::LeftParen {
928            todo!()
929        }
930
931        self.next_token_in_regex_context();
932        let pattern = self.parse_expression();
933
934        if self.current_token.kind != TokenKind::Comma {
935            todo!()
936        }
937        self.next_token();
938        let replacement = self.parse_expression();
939
940        let target = if self.current_token.kind == TokenKind::Comma {
941            self.next_token();
942            Some(self.parse_expression())
943        } else {
944            None
945        };
946
947        if self.current_token.kind != TokenKind::RightParen {
948            todo!()
949        }
950        self.next_token();
951
952        Statement::Gsub {
953            pattern,
954            replacement,
955            target,
956        }
957    }
958
959    fn parse_sub_function(&mut self) -> Statement<'a> {
960        self.next_token();
961        if self.current_token.kind != TokenKind::LeftParen {
962            todo!()
963        }
964
965        self.next_token_in_regex_context();
966        let pattern = self.parse_expression();
967
968        if self.current_token.kind != TokenKind::Comma {
969            todo!()
970        }
971        self.next_token();
972        let replacement = self.parse_expression();
973
974        if self.current_token.kind == TokenKind::Comma {
975            todo!()
976        }
977
978        if self.current_token.kind != TokenKind::RightParen {
979            todo!()
980        }
981        self.next_token();
982
983        Statement::Sub {
984            pattern,
985            replacement,
986        }
987    }
988
989    fn parse_system_function(&mut self) -> Statement<'a> {
990        self.next_token();
991        if self.current_token.kind != TokenKind::LeftParen {
992            todo!()
993        }
994        self.next_token();
995        let command = self.parse_expression();
996        if self.current_token.kind != TokenKind::RightParen {
997            todo!()
998        }
999        self.next_token();
1000        Statement::System(command)
1001    }
1002
1003    fn parse_expression_list_until_action_end_from_current(&mut self) -> Vec<Expression<'a>> {
1004        let mut expressions = Vec::new();
1005        let mut expect_more = false;
1006
1007        loop {
1008            if self.current_token.kind == TokenKind::RightCurlyBrace
1009                || self.current_token.kind == TokenKind::RightParen
1010                || self.current_token.kind == TokenKind::Eof
1011            {
1012                break;
1013            }
1014
1015            if self.current_token.kind == TokenKind::NewLine
1016                || self.current_token.kind == TokenKind::Semicolon
1017            {
1018                if expect_more {
1019                    self.next_token();
1020                    continue;
1021                }
1022                break;
1023            }
1024
1025            if self.current_token.kind == TokenKind::Comma {
1026                self.next_token();
1027                expect_more = true;
1028                continue;
1029            }
1030
1031            let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
1032            let expression = self.parse_expression();
1033            expressions.push(expression);
1034            if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
1035                while self.current_token.kind == TokenKind::Comma {
1036                    self.next_token();
1037                    expressions.push(self.parse_expression());
1038                }
1039                if self.current_token.kind != TokenKind::RightParen {
1040                    todo!()
1041                }
1042                self.next_token();
1043            }
1044            expect_more = false;
1045        }
1046
1047        if self.current_token.kind == TokenKind::RightParen {
1048            self.next_token();
1049        }
1050
1051        expressions
1052    }
1053
1054    fn parse_expression(&mut self) -> Expression<'a> {
1055        self.parse_expression_with_min_precedence(0)
1056    }
1057
1058    fn parse_expression_with_min_precedence(&mut self, min_precedence: u8) -> Expression<'a> {
1059        let left = self.parse_primary_expression();
1060        self.parse_expression_suffix(left, min_precedence)
1061    }
1062
1063    fn parse_expression_suffix(
1064        &mut self,
1065        mut left: Expression<'a>,
1066        min_precedence: u8,
1067    ) -> Expression<'a> {
1068        const CONCAT_LEFT_PRECEDENCE: u8 = 6;
1069        const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
1070
1071        loop {
1072            if self.current_token.kind == TokenKind::QuestionMark {
1073                if min_precedence > 0 {
1074                    break;
1075                }
1076                self.next_token_in_regex_context();
1077                let then_expr = self.parse_expression_with_min_precedence(0);
1078                if self.current_token.kind != TokenKind::Colon {
1079                    todo!()
1080                }
1081                self.next_token_in_regex_context();
1082                let else_expr = self.parse_expression_with_min_precedence(0);
1083                left = Expression::Ternary {
1084                    condition: Box::new(left),
1085                    then_expr: Box::new(then_expr),
1086                    else_expr: Box::new(else_expr),
1087                };
1088                continue;
1089            }
1090
1091            if infix_operator_precedence(&self.current_token.kind).is_none()
1092                && is_expression_start(&self.current_token.kind)
1093            {
1094                if CONCAT_LEFT_PRECEDENCE < min_precedence {
1095                    break;
1096                }
1097
1098                let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE);
1099                left = Expression::Concatenation {
1100                    left: Box::new(left),
1101                    right: Box::new(right),
1102                };
1103                continue;
1104            }
1105
1106            let (left_precedence, right_precedence) =
1107                match infix_operator_precedence(&self.current_token.kind) {
1108                    Some(value) => value,
1109                    None => break,
1110                };
1111
1112            if left_precedence < min_precedence {
1113                break;
1114            }
1115
1116            let operator = self.current_token.clone();
1117            if matches!(
1118                operator.kind,
1119                TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
1120            ) {
1121                self.next_token_in_regex_context();
1122            } else {
1123                self.next_token();
1124            }
1125            let right = self.parse_expression_with_min_precedence(right_precedence);
1126
1127            left = Expression::Infix {
1128                left: Box::new(left),
1129                operator,
1130                right: Box::new(right),
1131            };
1132        }
1133
1134        left
1135    }
1136
1137    fn parse_condition_in_parens(&mut self) -> Expression<'a> {
1138        let mut condition = self.parse_expression();
1139        if self.current_token.kind == TokenKind::Comma {
1140            while self.current_token.kind == TokenKind::Comma {
1141                let operator = self.current_token.clone();
1142                self.next_token_in_regex_context();
1143                let right = self.parse_expression();
1144                condition = Expression::Infix {
1145                    left: Box::new(condition),
1146                    operator,
1147                    right: Box::new(right),
1148                };
1149            }
1150            if self.current_token.kind != TokenKind::RightParen {
1151                todo!()
1152            }
1153            self.next_token();
1154            condition = self.parse_expression_suffix(condition, 0);
1155        }
1156        condition
1157    }
1158
1159    fn parse_primary_expression(&mut self) -> Expression<'a> {
1160        if self.current_token.kind == TokenKind::Minus {
1161            let operator = self.current_token.clone();
1162            self.next_token();
1163            let right = self.parse_primary_expression();
1164            return Expression::Infix {
1165                left: Box::new(Expression::Number(0.0)),
1166                operator,
1167                right: Box::new(right),
1168            };
1169        }
1170        if self.current_token.kind == TokenKind::Plus {
1171            self.next_token();
1172            return self.parse_primary_expression();
1173        }
1174        if self.current_token.kind == TokenKind::ExclamationMark {
1175            self.next_token_in_regex_context();
1176            let expression = self.parse_primary_expression();
1177            return Expression::Not(Box::new(expression));
1178        }
1179        if self.current_token.kind == TokenKind::Increment {
1180            self.next_token();
1181            let expression = self.parse_primary_expression();
1182            return Expression::PreIncrement(Box::new(expression));
1183        }
1184        if self.current_token.kind == TokenKind::Decrement {
1185            self.next_token();
1186            let expression = self.parse_primary_expression();
1187            return Expression::PreDecrement(Box::new(expression));
1188        }
1189
1190        let mut expression = self.parse_primary_atom();
1191        if self.current_token.kind == TokenKind::Increment {
1192            self.next_token();
1193            expression = Expression::PostIncrement(Box::new(expression));
1194        } else if self.current_token.kind == TokenKind::Decrement {
1195            self.next_token();
1196            expression = Expression::PostDecrement(Box::new(expression));
1197        }
1198        expression
1199    }
1200
1201    fn parse_primary_atom(&mut self) -> Expression<'a> {
1202        match self.current_token.kind {
1203            TokenKind::String => {
1204                let expression = Expression::String(self.current_token.literal);
1205                self.next_token();
1206                expression
1207            }
1208            TokenKind::Regex => {
1209                let expression = Expression::Regex(self.current_token.literal);
1210                self.next_token();
1211                expression
1212            }
1213            TokenKind::Number => {
1214                let expression = self.parse_number_expression().unwrap_or_else(|| {
1215                    panic!(
1216                        "failed to parse numeric literal: {}",
1217                        self.current_token.literal
1218                    )
1219                });
1220                self.next_token();
1221                expression
1222            }
1223            TokenKind::DollarSign => {
1224                self.next_token();
1225                let expression = self.parse_primary_atom();
1226                Expression::Field(Box::new(expression))
1227            }
1228            TokenKind::LeftParen => {
1229                self.next_token();
1230                let expression = self.parse_expression();
1231                if self.current_token.kind == TokenKind::RightParen {
1232                    self.next_token();
1233                }
1234                expression
1235            }
1236            TokenKind::Identifier => {
1237                let identifier = self.current_token.clone();
1238                self.next_token();
1239                if self.current_token.kind == TokenKind::LeftParen
1240                    && self.token_is_immediately_after(&identifier)
1241                {
1242                    let args = self.parse_call_arguments();
1243                    return Expression::FunctionCall {
1244                        name: identifier.literal,
1245                        args,
1246                    };
1247                }
1248                if self.current_token.kind == TokenKind::LeftSquareBracket {
1249                    self.next_token_in_regex_context();
1250                    let index = self.parse_array_index_expression();
1251                    if self.current_token.kind != TokenKind::RightSquareBracket {
1252                        todo!()
1253                    }
1254                    self.next_token();
1255                    Expression::ArrayAccess {
1256                        identifier: identifier.literal,
1257                        index: Box::new(index),
1258                    }
1259                } else {
1260                    Expression::Identifier(identifier.literal)
1261                }
1262            }
1263            TokenKind::Length => {
1264                self.next_token();
1265                if self.current_token.kind == TokenKind::LeftParen {
1266                    self.next_token();
1267                    if self.current_token.kind == TokenKind::RightParen {
1268                        self.next_token();
1269                        Expression::Length(None)
1270                    } else {
1271                        let expression = self.parse_expression();
1272                        if self.current_token.kind != TokenKind::RightParen {
1273                            todo!()
1274                        }
1275                        self.next_token();
1276                        Expression::Length(Some(Box::new(expression)))
1277                    }
1278                } else {
1279                    Expression::Length(None)
1280                }
1281            }
1282            TokenKind::Substr => {
1283                self.next_token();
1284                if self.current_token.kind != TokenKind::LeftParen {
1285                    todo!()
1286                }
1287                self.next_token();
1288                let string = self.parse_expression();
1289                if self.current_token.kind != TokenKind::Comma {
1290                    todo!()
1291                }
1292                self.next_token();
1293                let start = self.parse_expression();
1294                let mut length = None;
1295                if self.current_token.kind == TokenKind::Comma {
1296                    self.next_token();
1297                    length = Some(Box::new(self.parse_expression()));
1298                }
1299                if self.current_token.kind != TokenKind::RightParen {
1300                    todo!()
1301                }
1302                self.next_token();
1303                Expression::Substr {
1304                    string: Box::new(string),
1305                    start: Box::new(start),
1306                    length,
1307                }
1308            }
1309            TokenKind::Rand => {
1310                self.next_token();
1311                if self.current_token.kind == TokenKind::LeftParen {
1312                    self.next_token();
1313                    if self.current_token.kind != TokenKind::RightParen {
1314                        todo!()
1315                    }
1316                    self.next_token();
1317                }
1318                Expression::Rand
1319            }
1320            TokenKind::Close
1321            | TokenKind::Cos
1322            | TokenKind::Exp
1323            | TokenKind::Index
1324            | TokenKind::Int
1325            | TokenKind::Log
1326            | TokenKind::Match
1327            | TokenKind::Sin
1328            | TokenKind::Sprintf
1329            | TokenKind::Split
1330            | TokenKind::Sqrt
1331            | TokenKind::Srand => {
1332                let name = self.current_token.literal;
1333                self.next_token();
1334                if self.current_token.kind == TokenKind::LeftParen {
1335                    let args = self.parse_call_arguments();
1336                    return Expression::FunctionCall { name, args };
1337                }
1338                Expression::Number(0.0)
1339            }
1340            _ => {
1341                panic!(
1342                    "parse_primary_expression not yet implemented, found token: {:?}",
1343                    self.current_token
1344                )
1345            }
1346        }
1347    }
1348
1349    pub fn try_parse_program(&mut self) -> Result<Program<'_>, ParseError<'a>> {
1350        let mut program = Program::new();
1351
1352        while !self.is_eof() {
1353            match self.parse_next_rule()? {
1354                Some(Rule::Begin(action)) => program.add_begin_block(action),
1355                Some(Rule::End(action)) => program.add_end_block(action),
1356                Some(rule) => program.add_rule(rule),
1357                None => {}
1358            }
1359            self.next_token_in_regex_context();
1360        }
1361
1362        for definition in self.function_definitions.drain(..) {
1363            program.add_function_definition(definition);
1364        }
1365
1366        Ok(program)
1367    }
1368
1369    pub fn parse_program(&mut self) -> Program<'_> {
1370        self.try_parse_program()
1371            .unwrap_or_else(|err| panic!("{err}"))
1372    }
1373
1374    fn parse_call_arguments(&mut self) -> Vec<Expression<'a>> {
1375        if self.current_token.kind != TokenKind::LeftParen {
1376            return vec![];
1377        }
1378        self.next_token_in_regex_context();
1379        let mut args = Vec::new();
1380        while self.current_token.kind != TokenKind::RightParen
1381            && self.current_token.kind != TokenKind::Eof
1382        {
1383            if self.current_token.kind == TokenKind::Comma {
1384                self.next_token();
1385                continue;
1386            }
1387            args.push(self.parse_expression());
1388        }
1389        if self.current_token.kind == TokenKind::RightParen {
1390            self.next_token();
1391        }
1392        args
1393    }
1394}
1395
1396fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
1397    match kind {
1398        TokenKind::Assign
1399        | TokenKind::AddAssign
1400        | TokenKind::SubtractAssign
1401        | TokenKind::MultiplyAssign
1402        | TokenKind::DivideAssign
1403        | TokenKind::ModuloAssign
1404        | TokenKind::PowerAssign => Some((0, 0)),
1405        TokenKind::Or => Some((1, 2)),
1406        TokenKind::And => Some((3, 4)),
1407        TokenKind::Equal
1408        | TokenKind::NotEqual
1409        | TokenKind::GreaterThan
1410        | TokenKind::GreaterThanOrEqual
1411        | TokenKind::In
1412        | TokenKind::LessThan
1413        | TokenKind::LessThanOrEqual
1414        | TokenKind::Tilde
1415        | TokenKind::NoMatch => Some((5, 6)),
1416        TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
1417        TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
1418        TokenKind::Caret => Some((13, 12)),
1419        _ => None,
1420    }
1421}
1422
1423fn is_expression_start(kind: &TokenKind) -> bool {
1424    matches!(
1425        kind,
1426        TokenKind::String
1427            | TokenKind::Regex
1428            | TokenKind::Number
1429            | TokenKind::DollarSign
1430            | TokenKind::LeftParen
1431            | TokenKind::Identifier
1432            | TokenKind::Cos
1433            | TokenKind::Exp
1434            | TokenKind::Index
1435            | TokenKind::Int
1436            | TokenKind::Length
1437            | TokenKind::Log
1438            | TokenKind::Match
1439            | TokenKind::Rand
1440            | TokenKind::Sin
1441            | TokenKind::Sprintf
1442            | TokenKind::Split
1443            | TokenKind::Sqrt
1444            | TokenKind::Srand
1445            | TokenKind::Substr
1446            | TokenKind::Increment
1447            | TokenKind::Decrement
1448    )
1449}
1450
1451fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
1452    let (kind, literal) = match token.kind {
1453        TokenKind::AddAssign => (TokenKind::Plus, "+"),
1454        TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
1455        TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
1456        TokenKind::DivideAssign => (TokenKind::Division, "/"),
1457        TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
1458        TokenKind::PowerAssign => (TokenKind::Caret, "^"),
1459        _ => todo!(),
1460    };
1461
1462    Token::new(kind, literal, token.span.start)
1463}
1464
1465#[cfg(test)]
1466mod tests {
1467    use super::*;
1468
1469    #[test]
1470    fn create_parser() {
1471        let mut parser = Parser::new(Lexer::new("42 == 42"));
1472
1473        assert_eq!(parser.current_token.literal, "42");
1474        parser.next_token();
1475        assert_eq!(parser.current_token.literal, "==");
1476    }
1477
1478    #[test]
1479    fn parse_empty_program() {
1480        let mut parser = Parser::new(Lexer::new(""));
1481
1482        let program = parser.parse_program();
1483
1484        assert_eq!(program.len(), 0);
1485    }
1486
1487    #[test]
1488    fn parse_statement_with_unhandled_token_returns_parse_error() {
1489        let mut parser = Parser::new(Lexer::new("BEGIN { else }"));
1490
1491        let err = parser
1492            .try_parse_program()
1493            .expect_err("expected parse error for stray else");
1494
1495        assert_eq!(
1496            err.kind,
1497            ParseErrorKind::UnexpectedToken {
1498                expected: "statement"
1499            }
1500        );
1501        assert_eq!(err.token.kind, TokenKind::Else);
1502    }
1503
1504    #[test]
1505    fn parse_action_without_pattern() {
1506        let mut parser = Parser::new(Lexer::new("{ print }"));
1507
1508        let program = parser.parse_program();
1509
1510        assert_eq!(program.len(), 1);
1511        assert_eq!("{ print }", program.to_string());
1512    }
1513
1514    #[test]
1515    fn parse_action_with_leading_newlines() {
1516        let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
1517
1518        let program = parser.parse_program();
1519
1520        assert_eq!(program.len(), 1);
1521        assert_eq!("{ print }", program.to_string());
1522    }
1523
1524    #[test]
1525    fn parse_begin_block() {
1526        let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
1527
1528        let program = parser.parse_program();
1529
1530        assert_eq!(program.len(), 1);
1531        assert_eq!("BEGIN { print }", program.to_string());
1532    }
1533
1534    #[test]
1535    fn parse_end_block() {
1536        let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1537
1538        let program = parser.parse_program();
1539
1540        assert_eq!(program.len(), 1);
1541        assert_eq!("END { print 42 }", program.to_string());
1542    }
1543
1544    #[test]
1545    fn parse_regex_pattern_action() {
1546        let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1547
1548        let program = parser.parse_program();
1549
1550        assert_eq!(program.len(), 1);
1551        assert_eq!("/foo/ { print }", program.to_string());
1552    }
1553
1554    #[test]
1555    fn parse_print_infix_expression() {
1556        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1557
1558        let program = parser.parse_program();
1559        let mut begin_blocks = program.begin_blocks_iter();
1560        let Action { statements } = begin_blocks.next().expect("expected begin block");
1561
1562        let exprs = match &statements[0] {
1563            Statement::Print(expressions) => expressions,
1564            _ => panic!("expected print statement"),
1565        };
1566
1567        match &exprs[0] {
1568            Expression::Infix {
1569                left,
1570                operator,
1571                right,
1572            } => {
1573                assert!(matches!(**left, Expression::Number(1.0)));
1574                assert_eq!(operator.kind, TokenKind::Plus);
1575                assert!(matches!(**right, Expression::Number(2.0)));
1576            }
1577            _ => panic!("expected infix expression"),
1578        }
1579    }
1580
1581    #[test]
1582    fn parse_print_parenthesized_expression() {
1583        let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1584
1585        let program = parser.parse_program();
1586        let mut begin_blocks = program.begin_blocks_iter();
1587        let Action { statements } = begin_blocks.next().expect("expected begin block");
1588
1589        let exprs = match &statements[0] {
1590            Statement::Print(expressions) => expressions,
1591            _ => panic!("expected print statement"),
1592        };
1593
1594        match &exprs[0] {
1595            Expression::Infix {
1596                left,
1597                operator,
1598                right,
1599            } => {
1600                assert_eq!(operator.kind, TokenKind::Asterisk);
1601                assert!(matches!(**right, Expression::Number(3.0)));
1602                assert!(matches!(**left, Expression::Infix { .. }));
1603            }
1604            _ => panic!("expected infix expression"),
1605        }
1606    }
1607
1608    #[test]
1609    fn parse_print_multiplication_has_higher_precedence_than_addition() {
1610        let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1611
1612        let program = parser.parse_program();
1613        let mut begin_blocks = program.begin_blocks_iter();
1614        let Action { statements } = begin_blocks.next().expect("expected begin block");
1615
1616        let exprs = match &statements[0] {
1617            Statement::Print(expressions) => expressions,
1618            _ => panic!("expected print statement"),
1619        };
1620
1621        match &exprs[0] {
1622            Expression::Infix {
1623                left,
1624                operator,
1625                right,
1626            } => {
1627                assert_eq!(operator.kind, TokenKind::Plus);
1628                assert!(matches!(**left, Expression::Number(1.0)));
1629                match &**right {
1630                    Expression::Infix {
1631                        operator: right_op, ..
1632                    } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1633                    _ => panic!("expected nested infix expression"),
1634                }
1635            }
1636            _ => panic!("expected infix expression"),
1637        }
1638    }
1639
1640    #[test]
1641    fn parse_print_power_is_right_associative() {
1642        let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1643
1644        let program = parser.parse_program();
1645        let mut begin_blocks = program.begin_blocks_iter();
1646        let Action { statements } = begin_blocks.next().expect("expected begin block");
1647
1648        let exprs = match &statements[0] {
1649            Statement::Print(expressions) => expressions,
1650            _ => panic!("expected print statement"),
1651        };
1652
1653        match &exprs[0] {
1654            Expression::Infix {
1655                left,
1656                operator,
1657                right,
1658            } => {
1659                assert_eq!(operator.kind, TokenKind::Caret);
1660                assert!(matches!(**left, Expression::Number(2.0)));
1661                match &**right {
1662                    Expression::Infix {
1663                        operator: right_op, ..
1664                    } => assert_eq!(right_op.kind, TokenKind::Caret),
1665                    _ => panic!("expected nested infix expression"),
1666                }
1667            }
1668            _ => panic!("expected infix expression"),
1669        }
1670    }
1671
1672    #[test]
1673    fn parse_print_minus_is_left_associative() {
1674        let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1675
1676        let program = parser.parse_program();
1677        let mut begin_blocks = program.begin_blocks_iter();
1678        let Action { statements } = begin_blocks.next().expect("expected begin block");
1679
1680        let exprs = match &statements[0] {
1681            Statement::Print(expressions) => expressions,
1682            _ => panic!("expected print statement"),
1683        };
1684
1685        match &exprs[0] {
1686            Expression::Infix {
1687                left,
1688                operator,
1689                right,
1690            } => {
1691                assert_eq!(operator.kind, TokenKind::Minus);
1692                match &**left {
1693                    Expression::Infix {
1694                        operator: left_op, ..
1695                    } => assert_eq!(left_op.kind, TokenKind::Minus),
1696                    _ => panic!("expected nested infix expression"),
1697                }
1698                assert!(matches!(**right, Expression::Number(1.0)));
1699            }
1700            _ => panic!("expected infix expression"),
1701        }
1702    }
1703
1704    #[test]
1705    fn parse_print_concatenation() {
1706        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
1707
1708        let program = parser.parse_program();
1709        let mut begin_blocks = program.begin_blocks_iter();
1710        let Action { statements } = begin_blocks.next().expect("expected begin block");
1711
1712        let exprs = match &statements[0] {
1713            Statement::Print(expressions) => expressions,
1714            _ => panic!("expected print statement"),
1715        };
1716
1717        assert_eq!(exprs.len(), 1);
1718        match &exprs[0] {
1719            Expression::Concatenation { left, right } => {
1720                assert!(matches!(**left, Expression::String("Value:")));
1721                assert!(matches!(**right, Expression::Number(42.0)));
1722            }
1723            _ => panic!("expected concatenation expression"),
1724        }
1725    }
1726
1727    #[test]
1728    fn parse_continue_statement() {
1729        let mut parser = Parser::new(Lexer::new(r#"{ continue }"#));
1730
1731        let program = parser.parse_program();
1732        let mut rules = program.rules_iter();
1733        let rule = rules.next().expect("expected rule");
1734
1735        let statements = match rule {
1736            Rule::Action(Action { statements }) => statements,
1737            _ => panic!("expected action rule"),
1738        };
1739
1740        assert!(matches!(statements[0], Statement::Continue));
1741    }
1742
1743    #[test]
1744    fn parse_identifier_followed_by_spaced_parentheses_as_concatenation() {
1745        let mut parser = Parser::new(Lexer::new(r#"{ x = $1; print x (++i) }"#));
1746
1747        let program = parser.parse_program();
1748        let mut rules = program.rules_iter();
1749        let rule = rules.next().expect("expected rule");
1750
1751        let statements = match rule {
1752            Rule::Action(Action { statements }) => statements,
1753            _ => panic!("expected action rule"),
1754        };
1755
1756        let exprs = match &statements[1] {
1757            Statement::Print(expressions) => expressions,
1758            _ => panic!("expected print statement"),
1759        };
1760
1761        assert_eq!(exprs.len(), 1);
1762        match &exprs[0] {
1763            Expression::Concatenation { left, right } => {
1764                assert!(matches!(**left, Expression::Identifier("x")));
1765                assert!(matches!(**right, Expression::PreIncrement(_)));
1766            }
1767            _ => panic!("expected concatenation expression"),
1768        }
1769    }
1770
1771    #[test]
1772    fn parse_print_field_expression() {
1773        let mut parser = Parser::new(Lexer::new("{ print $1 }"));
1774
1775        let program = parser.parse_program();
1776        let mut rules = program.rules_iter();
1777        let rule = rules.next().expect("expected rule");
1778
1779        let statements = match rule {
1780            Rule::Action(Action { statements }) => statements,
1781            _ => panic!("expected action rule"),
1782        };
1783
1784        let exprs = match &statements[0] {
1785            Statement::Print(expressions) => expressions,
1786            _ => panic!("expected print statement"),
1787        };
1788
1789        match &exprs[0] {
1790            Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
1791            _ => panic!("expected field expression"),
1792        }
1793    }
1794
1795    #[test]
1796    fn parse_print_with_commas() {
1797        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
1798
1799        let program = parser.parse_program();
1800
1801        assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
1802    }
1803
1804    #[test]
1805    fn parse_number_of_fields_identifier() {
1806        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
1807
1808        let program = parser.parse_program();
1809
1810        assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
1811    }
1812
1813    #[test]
1814    fn parse_printf_with_format_and_arguments() {
1815        let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
1816
1817        let program = parser.parse_program();
1818
1819        assert_eq!(
1820            r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
1821            program.to_string()
1822        );
1823    }
1824
1825    #[test]
1826    fn parse_print_ternary_expression() {
1827        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print x ? y : z }"#));
1828
1829        let program = parser.parse_program();
1830        let mut begin_blocks = program.begin_blocks_iter();
1831        let Action { statements } = begin_blocks.next().expect("expected begin block");
1832
1833        let exprs = match &statements[0] {
1834            Statement::Print(expressions) => expressions,
1835            _ => panic!("expected print statement"),
1836        };
1837
1838        assert_eq!(exprs.len(), 1);
1839        match &exprs[0] {
1840            Expression::Ternary {
1841                condition,
1842                then_expr,
1843                else_expr,
1844            } => {
1845                assert!(matches!(**condition, Expression::Identifier("x")));
1846                assert!(matches!(**then_expr, Expression::Identifier("y")));
1847                assert!(matches!(**else_expr, Expression::Identifier("z")));
1848            }
1849            _ => panic!("expected ternary expression"),
1850        }
1851    }
1852
1853    #[test]
1854    fn parse_printf_without_arguments_returns_parse_error() {
1855        let mut parser = Parser::new(Lexer::new(r#"{ printf }"#));
1856
1857        let err = parser
1858            .try_parse_program()
1859            .expect_err("expected parse error for printf without arguments");
1860
1861        assert_eq!(err.kind, ParseErrorKind::MissingPrintfFormatString);
1862    }
1863
1864    #[test]
1865    fn parse_printf_without_arguments_in_parentheses_returns_parse_error() {
1866        let mut parser = Parser::new(Lexer::new(r#"{ printf() }"#));
1867
1868        let err = parser
1869            .try_parse_program()
1870            .expect_err("expected parse error for empty printf call");
1871
1872        assert_eq!(err.kind, ParseErrorKind::MissingPrintfFormatString);
1873    }
1874
1875    #[test]
1876    fn parse_add_assignment_and_pre_increment() {
1877        let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
1878
1879        let program = parser.parse_program();
1880
1881        assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
1882    }
1883
1884    #[test]
1885    fn parse_regex_match_pattern_action() {
1886        let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
1887
1888        let program = parser.parse_program();
1889
1890        assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
1891    }
1892
1893    #[test]
1894    fn parse_not_pattern_action() {
1895        let mut parser = Parser::new(Lexer::new(r#"!($1 < 2000) { print $1 }"#));
1896
1897        let program = parser.parse_program();
1898        let mut rules = program.rules_iter();
1899        let rule = rules.next().expect("expected rule");
1900
1901        match rule {
1902            Rule::PatternAction {
1903                pattern: Some(Expression::Not(inner)),
1904                action: Some(Action { statements }),
1905            } => {
1906                assert!(matches!(**inner, Expression::Infix { .. }));
1907                assert!(matches!(statements[0], Statement::Print(_)));
1908            }
1909            _ => panic!("expected negated pattern action"),
1910        }
1911    }
1912
1913    #[test]
1914    fn parse_print_with_line_continuation_after_comma() {
1915        let mut parser = Parser::new(Lexer::new(
1916            "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
1917        ));
1918
1919        let program = parser.parse_program();
1920
1921        assert_eq!(
1922            "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
1923            program.to_string()
1924        );
1925    }
1926
1927    #[test]
1928    fn parse_gsub_statement() {
1929        let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
1930
1931        let program = parser.parse_program();
1932
1933        assert_eq!(
1934            r#"{ gsub(/USA/, "United States"); print }"#,
1935            program.to_string()
1936        );
1937    }
1938
1939    #[test]
1940    fn parse_gsub_statement_with_target() {
1941        let mut parser = Parser::new(Lexer::new(r#"{ gsub(/[ \t]+/, "", t) }"#));
1942
1943        let program = parser.parse_program();
1944
1945        assert_eq!(r#"{ gsub(/[ \t]+/, "", t) }"#, program.to_string());
1946    }
1947
1948    #[test]
1949    fn parse_system_statement() {
1950        let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
1951
1952        let program = parser.parse_program();
1953
1954        assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
1955    }
1956
1957    #[test]
1958    fn parse_print_length_builtin_expression() {
1959        let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
1960
1961        let program = parser.parse_program();
1962
1963        assert_eq!(r#"{ print length, $0 }"#, program.to_string());
1964    }
1965
1966    #[test]
1967    fn parse_length_expression_as_rule_pattern() {
1968        let mut parser = Parser::new(Lexer::new(
1969            r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1970        ));
1971
1972        let program = parser.parse_program();
1973
1974        assert_eq!(
1975            r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1976            program.to_string()
1977        );
1978    }
1979
1980    #[test]
1981    fn parse_field_assignment_with_substr() {
1982        let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
1983
1984        let program = parser.parse_program();
1985
1986        assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
1987    }
1988
1989    #[test]
1990    fn parse_assignment_with_concatenation_and_substr() {
1991        let mut parser = Parser::new(Lexer::new(r#"{ s = s " " substr($1, 1, 3) }"#));
1992
1993        let program = parser.parse_program();
1994
1995        assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
1996    }
1997
1998    #[test]
1999    fn parse_field_divide_assignment() {
2000        let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
2001
2002        let program = parser.parse_program();
2003
2004        assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
2005    }
2006
2007    #[test]
2008    fn parse_chained_assignment() {
2009        let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
2010
2011        let program = parser.parse_program();
2012
2013        assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
2014    }
2015
2016    #[test]
2017    fn parse_if_statement_with_block() {
2018        let mut parser = Parser::new(Lexer::new(
2019            r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
2020        ));
2021
2022        let program = parser.parse_program();
2023
2024        assert_eq!(
2025            r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
2026            program.to_string()
2027        );
2028    }
2029
2030    #[test]
2031    fn parse_while_with_post_increment() {
2032        let mut parser = Parser::new(Lexer::new(
2033            r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
2034        ));
2035
2036        let program = parser.parse_program();
2037
2038        assert_eq!(
2039            r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
2040            program.to_string()
2041        );
2042    }
2043
2044    #[test]
2045    fn parse_while_with_single_body_statement() {
2046        let mut parser = Parser::new(Lexer::new(r#"{ while (n > 1) print n }"#));
2047
2048        let program = parser.parse_program();
2049
2050        assert_eq!(r#"{ while (n > 1) { print n } }"#, program.to_string());
2051    }
2052
2053    #[test]
2054    fn parse_do_while_with_post_increment() {
2055        let mut parser = Parser::new(Lexer::new(
2056            r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
2057        ));
2058
2059        let program = parser.parse_program();
2060
2061        assert_eq!(
2062            r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
2063            program.to_string()
2064        );
2065    }
2066
2067    #[test]
2068    fn parse_for_with_empty_body_statement() {
2069        let mut parser = Parser::new(Lexer::new(
2070            r#"{ for (i = 1; i <= NF; s += $(i++)) ; print s }"#,
2071        ));
2072
2073        let program = parser.parse_program();
2074
2075        assert_eq!(
2076            r#"{ for (i = 1; i <= NF; s += $i++) {  }; print s }"#,
2077            program.to_string()
2078        );
2079    }
2080
2081    #[test]
2082    fn parse_post_decrement_statement() {
2083        let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
2084
2085        let program = parser.parse_program();
2086
2087        assert_eq!(r#"{ k--; n-- }"#, program.to_string());
2088    }
2089
2090    #[test]
2091    fn parse_rand_expression() {
2092        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
2093
2094        let program = parser.parse_program();
2095
2096        assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
2097    }
2098
2099    #[test]
2100    fn parse_math_builtin_expressions() {
2101        let mut parser = Parser::new(Lexer::new(
2102            r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
2103        ));
2104
2105        let program = parser.parse_program();
2106
2107        assert_eq!(
2108            r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
2109            program.to_string()
2110        );
2111    }
2112
2113    #[test]
2114    fn parse_index_builtin_expression() {
2115        let mut parser = Parser::new(Lexer::new(r#"{ print index(1, $1) }"#));
2116
2117        let program = parser.parse_program();
2118
2119        assert_eq!(r#"{ print index(1, $1) }"#, program.to_string());
2120    }
2121
2122    #[test]
2123    fn parse_match_builtin_expression() {
2124        let mut parser = Parser::new(Lexer::new(r#"{ print match($NF, $1), RSTART, RLENGTH }"#));
2125
2126        let program = parser.parse_program();
2127
2128        assert_eq!(
2129            r#"{ print match($NF, $1), RSTART, RLENGTH }"#,
2130            program.to_string()
2131        );
2132    }
2133
2134    #[test]
2135    fn parse_in_membership_expression() {
2136        let mut parser = Parser::new(Lexer::new(r#"{ print 1 in x }"#));
2137
2138        let program = parser.parse_program();
2139
2140        assert_eq!(r#"{ print 1 in x }"#, program.to_string());
2141    }
2142
2143    #[test]
2144    fn parse_parenthesized_composite_membership_expression() {
2145        let mut parser = Parser::new(Lexer::new(r#"{ if (($0, $1) in x) print "yes" }"#));
2146
2147        let program = parser.parse_program();
2148
2149        assert_eq!(
2150            r#"{ if ($0, $1 in x) { print "yes" } }"#,
2151            program.to_string()
2152        );
2153    }
2154
2155    #[test]
2156    fn parse_for_loop_with_single_body_statement() {
2157        let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; i++) print $i }"#));
2158
2159        let program = parser.parse_program();
2160
2161        assert_eq!(
2162            r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
2163            program.to_string()
2164        );
2165    }
2166
2167    #[test]
2168    fn parse_if_with_single_statement_body() {
2169        let mut parser = Parser::new(Lexer::new(
2170            r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
2171        ));
2172
2173        let program = parser.parse_program();
2174
2175        assert_eq!(
2176            r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
2177            program.to_string()
2178        );
2179    }
2180
2181    #[test]
2182    fn parse_exit_statement() {
2183        let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
2184
2185        let program = parser.parse_program();
2186
2187        assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
2188    }
2189
2190    #[test]
2191    fn parse_exit_statement_with_status() {
2192        let mut parser = Parser::new(Lexer::new(r#"$1 < 5000 { exit NR }"#));
2193
2194        let program = parser.parse_program();
2195
2196        assert_eq!(r#"$1 < 5000 { exit NR }"#, program.to_string());
2197    }
2198
2199    #[test]
2200    fn parse_user_defined_function_call_statement() {
2201        let mut parser = Parser::new(Lexer::new(
2202            "BEGIN { myabort(1) }\nfunction myabort(n) { exit n }",
2203        ));
2204
2205        let program = parser.parse_program();
2206
2207        let definition = program
2208            .function_definition("myabort")
2209            .expect("expected function definition");
2210        assert_eq!(definition.parameters, vec!["n"]);
2211        assert_eq!(definition.statements.len(), 1);
2212    }
2213
2214    #[test]
2215    fn parse_delete_array_element_statement() {
2216        let mut parser = Parser::new(Lexer::new(r#"{ delete x[i, j] }"#));
2217
2218        let program = parser.parse_program();
2219
2220        assert_eq!(r#"{ delete x[i, j] }"#, program.to_string());
2221    }
2222
2223    #[test]
2224    fn parse_array_add_assignment_and_access() {
2225        let mut parser = Parser::new(Lexer::new(
2226            r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2227        ));
2228
2229        let program = parser.parse_program();
2230
2231        assert_eq!(
2232            r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2233            program.to_string()
2234        );
2235    }
2236
2237    #[test]
2238    fn parse_for_in_loop() {
2239        let mut parser = Parser::new(Lexer::new(
2240            r#"END { for (name in area) print name ":" area[name] }"#,
2241        ));
2242
2243        let program = parser.parse_program();
2244
2245        assert_eq!(
2246            r#"END { for (name in area) { print name ":" area[name] } }"#,
2247            program.to_string()
2248        );
2249    }
2250
2251    #[test]
2252    fn parse_print_redirection() {
2253        let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
2254
2255        let program = parser.parse_program();
2256
2257        assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
2258    }
2259
2260    #[test]
2261    fn parse_print_pipe() {
2262        let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
2263
2264        let program = parser.parse_program();
2265
2266        assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
2267    }
2268
2269    #[test]
2270    fn parse_hexadecimal_number() {
2271        let mut parser = Parser::new(Lexer::new(r#"BEGIN { print 0xAA }"#));
2272
2273        let program = parser.parse_program();
2274
2275        assert_eq!(r#"BEGIN { print 0xAA }"#, program.to_string());
2276    }
2277}