1pub use crate::parse_error::{ParseError, ParseErrorKind};
2use crate::{
3 Lexer, Program,
4 ast::{Action, Expression, FunctionDefinition, Rule, Statement},
5 token::{Token, TokenKind},
6};
7
8#[derive(Debug)]
9pub struct Parser<'a> {
10 lexer: Lexer<'a>,
11 current_token: Token<'a>,
12 function_definitions: Vec<FunctionDefinition<'a>>,
13}
14
15impl<'a> Parser<'a> {
16 pub fn new(mut lexer: Lexer<'a>) -> Self {
17 let current_token = lexer.next_token_regex_aware();
18 Parser {
19 lexer,
20 current_token,
21 function_definitions: Vec::new(),
22 }
23 }
24
25 fn next_token(&mut self) {
26 self.current_token = self.lexer.next_token();
27 }
28
29 fn next_token_in_regex_context(&mut self) {
30 self.current_token = self.lexer.next_token_regex_aware();
31 }
32
33 fn skip_newlines(&mut self) {
34 while self.current_token.kind == TokenKind::NewLine {
35 self.next_token();
36 }
37 }
38
39 fn skip_newlines_in_regex_context(&mut self) {
40 while self.current_token.kind == TokenKind::NewLine {
41 self.next_token_in_regex_context();
42 }
43 }
44
45 fn skip_terminators(&mut self) {
46 while matches!(
47 self.current_token.kind,
48 TokenKind::NewLine | TokenKind::Semicolon
49 ) {
50 self.next_token();
51 }
52 }
53
54 fn is_eof(&self) -> bool {
55 self.current_token.kind == TokenKind::Eof
56 }
57
58 fn is_statement_terminator(&self) -> bool {
59 matches!(
60 self.current_token.kind,
61 TokenKind::Semicolon | TokenKind::NewLine | TokenKind::RightCurlyBrace | TokenKind::Eof
62 )
63 }
64
65 fn token_is_immediately_after(&self, previous: &Token<'a>) -> bool {
66 self.current_token.span.start == previous.span.start + previous.literal.len()
67 }
68
69 fn parse_number_expression(&self) -> Option<Expression<'a>> {
70 let literal = self.current_token.literal;
71 if let Some(hex_digits) = literal
72 .strip_prefix("0x")
73 .or_else(|| literal.strip_prefix("0X"))
74 {
75 let value = u64::from_str_radix(hex_digits, 16).ok()? as f64;
76 return Some(Expression::HexNumber { literal, value });
77 }
78
79 literal.parse::<f64>().ok().map(Expression::Number)
80 }
81
82 fn parse_array_index_expression(&mut self) -> Result<Expression<'a>, ParseError<'a>> {
83 let mut index = self.parse_expression()?;
84 while self.current_token.kind == TokenKind::Comma {
85 let operator = self.current_token.clone();
86 self.next_token_in_regex_context();
87 let right = self.parse_expression()?;
88 index = Expression::Infix {
89 left: Box::new(index),
90 operator,
91 right: Box::new(right),
92 };
93 }
94 Ok(index)
95 }
96
97 fn parse_error(&self, kind: ParseErrorKind) -> ParseError<'a> {
98 ParseError {
99 kind,
100 token: self.current_token.clone(),
101 }
102 }
103
104 fn expected_rule(&self) -> ParseError<'a> {
105 self.parse_error(ParseErrorKind::ExpectedRule)
106 }
107
108 fn expected_statement(&self) -> ParseError<'a> {
109 self.parse_error(ParseErrorKind::ExpectedStatement)
110 }
111
112 fn expected_identifier(&self) -> ParseError<'a> {
113 self.parse_error(ParseErrorKind::ExpectedIdentifier)
114 }
115
116 fn unsupported_statement(&self) -> ParseError<'a> {
117 self.parse_error(ParseErrorKind::UnsupportedStatement)
118 }
119
120 fn unsupported_sub_target(&self) -> ParseError<'a> {
121 self.parse_error(ParseErrorKind::UnsupportedSubTarget)
122 }
123
124 fn expected_left_paren(&self) -> ParseError<'a> {
125 self.parse_error(ParseErrorKind::ExpectedLeftParen)
126 }
127
128 fn expected_left_brace(&self) -> ParseError<'a> {
129 self.parse_error(ParseErrorKind::ExpectedLeftBrace)
130 }
131
132 fn expected_right_square_bracket(&self) -> ParseError<'a> {
133 self.parse_error(ParseErrorKind::ExpectedRightSquareBracket)
134 }
135
136 fn expected_comma(&self) -> ParseError<'a> {
137 self.parse_error(ParseErrorKind::ExpectedComma)
138 }
139
140 fn expected_colon(&self) -> ParseError<'a> {
141 self.parse_error(ParseErrorKind::ExpectedColon)
142 }
143
144 fn expected_semicolon(&self) -> ParseError<'a> {
145 self.parse_error(ParseErrorKind::ExpectedSemicolon)
146 }
147
148 fn expected_while(&self) -> ParseError<'a> {
149 self.parse_error(ParseErrorKind::ExpectedWhile)
150 }
151
152 fn expected_right_brace(&self) -> ParseError<'a> {
153 self.parse_error(ParseErrorKind::ExpectedRightBrace)
154 }
155
156 fn expected_right_paren(&self) -> ParseError<'a> {
157 self.parse_error(ParseErrorKind::ExpectedRightParen)
158 }
159
160 fn missing_printf_format_string(&self) -> ParseError<'a> {
161 self.parse_error(ParseErrorKind::MissingPrintfFormatString)
162 }
163
164 fn invalid_numeric_literal(&self) -> ParseError<'a> {
165 self.parse_error(ParseErrorKind::InvalidNumericLiteral)
166 }
167
168 fn split_print_parenthesized_list(expression: Expression<'a>) -> Option<Vec<Expression<'a>>> {
169 fn flatten<'a>(expression: Expression<'a>, expressions: &mut Vec<Expression<'a>>) -> bool {
170 match expression {
171 Expression::Infix {
172 left,
173 operator,
174 right,
175 } if operator.kind == TokenKind::Comma => {
176 flatten(*left, expressions) && flatten(*right, expressions)
177 }
178 other => {
179 expressions.push(other);
180 true
181 }
182 }
183 }
184
185 let mut expressions = Vec::new();
186 if flatten(expression, &mut expressions) && expressions.len() > 1 {
187 Some(expressions)
188 } else {
189 None
190 }
191 }
192
193 fn parse_next_rule(&mut self) -> Result<Option<Rule<'a>>, ParseError<'a>> {
194 match &self.current_token.kind {
195 TokenKind::Begin => {
196 self.next_token();
197 if self.current_token.kind != TokenKind::LeftCurlyBrace {
198 return Err(self.expected_left_brace());
199 }
200 let action = self.parse_action()?;
201 Ok(Some(Rule::Begin(action)))
202 }
203 TokenKind::NewLine => {
204 self.next_token_in_regex_context();
205 self.parse_next_rule()
206 }
207 TokenKind::Eof => Ok(None),
208 TokenKind::LeftCurlyBrace => {
209 self.parse_action().map(|action| Some(Rule::Action(action)))
210 }
211 TokenKind::Function => {
212 self.parse_function_definition()?;
213 Ok(None)
214 }
215 TokenKind::End => {
216 self.next_token();
217 if self.current_token.kind != TokenKind::LeftCurlyBrace {
218 return Err(self.expected_left_brace());
219 }
220 let action = self.parse_action()?;
221 Ok(Some(Rule::End(action)))
222 }
223 TokenKind::Regex
224 | TokenKind::String
225 | TokenKind::Number
226 | TokenKind::DollarSign
227 | TokenKind::LeftParen
228 | TokenKind::Identifier
229 | TokenKind::Cos
230 | TokenKind::Exp
231 | TokenKind::Index
232 | TokenKind::Int
233 | TokenKind::Length
234 | TokenKind::Log
235 | TokenKind::Match
236 | TokenKind::Rand
237 | TokenKind::Sin
238 | TokenKind::Sprintf
239 | TokenKind::Split
240 | TokenKind::Sqrt
241 | TokenKind::Srand
242 | TokenKind::Substr
243 | TokenKind::ExclamationMark
244 | TokenKind::Increment
245 | TokenKind::Decrement => self.parse_pattern_rule(),
246 _ => Err(self.expected_rule()),
247 }
248 }
249
250 fn parse_pattern_rule(&mut self) -> Result<Option<Rule<'a>>, ParseError<'a>> {
251 let mut pattern = self.parse_expression()?;
252 if self.current_token.kind == TokenKind::Comma {
253 let operator = self.current_token.clone();
254 self.next_token_in_regex_context();
255 let right = self.parse_expression()?;
256 pattern = Expression::Infix {
257 left: Box::new(pattern),
258 operator,
259 right: Box::new(right),
260 };
261 }
262 let pattern = Some(pattern);
263
264 if self.current_token.kind == TokenKind::LeftCurlyBrace {
265 let action = self.parse_action()?;
266 Ok(Some(Rule::PatternAction {
267 pattern,
268 action: Some(action),
269 }))
270 } else {
271 Ok(Some(Rule::PatternAction {
272 pattern,
273 action: None,
274 }))
275 }
276 }
277
278 fn parse_action(&mut self) -> Result<Action<'a>, ParseError<'a>> {
279 self.next_token(); let mut statements = Vec::new();
282 while self.current_token.kind != TokenKind::RightCurlyBrace
283 && self.current_token.kind != TokenKind::Eof
284 {
285 self.skip_terminators();
286
287 if self.current_token.kind == TokenKind::RightCurlyBrace
288 || self.current_token.kind == TokenKind::Eof
289 {
290 break;
291 }
292
293 statements.push(self.parse_statement()?);
294 }
295
296 if self.current_token.kind != TokenKind::RightCurlyBrace {
297 return Err(self.expected_right_brace());
298 }
299
300 Ok(Action { statements })
301 }
302
303 fn parse_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
304 match self.current_token.kind {
305 TokenKind::Print => self.parse_print_function(),
306 TokenKind::Printf => self.parse_printf_function(),
307 TokenKind::System => self.parse_system_function(),
308 TokenKind::Split => self.parse_split_statement(),
309 TokenKind::Sub => self.parse_sub_function(),
310 TokenKind::Gsub => self.parse_gsub_function(),
311 TokenKind::Break => Ok(self.parse_break_statement()),
312 TokenKind::Continue => Ok(self.parse_continue_statement()),
313 TokenKind::Delete => self.parse_delete_statement(),
314 TokenKind::If => self.parse_if_statement(),
315 TokenKind::Do => self.parse_do_statement(),
316 TokenKind::While => self.parse_while_statement(),
317 TokenKind::For => self.parse_for_statement(),
318 TokenKind::Return => self.parse_return_statement(),
319 TokenKind::Next => Ok(self.parse_next_statement()),
320 TokenKind::Exit => self.parse_exit_statement(),
321 TokenKind::Identifier => self.parse_assignment_statement(),
322 TokenKind::DollarSign => self.parse_field_assignment_statement(),
323 TokenKind::Increment => self.parse_pre_increment_statement(),
324 TokenKind::Decrement => self.parse_pre_decrement_statement(),
325 TokenKind::Number
326 | TokenKind::String
327 | TokenKind::Regex
328 | TokenKind::LeftParen
329 | TokenKind::Close
330 | TokenKind::Cos
331 | TokenKind::Exp
332 | TokenKind::Index
333 | TokenKind::Int
334 | TokenKind::Length
335 | TokenKind::Log
336 | TokenKind::Match
337 | TokenKind::Rand
338 | TokenKind::Sin
339 | TokenKind::Sprintf
340 | TokenKind::Sqrt
341 | TokenKind::Srand
342 | TokenKind::Substr
343 | TokenKind::ToLower
344 | TokenKind::ToUpper => Ok(Statement::Expression(self.parse_expression()?)),
345 _ => Err(self.expected_statement()),
346 }
347 }
348
349 fn parse_function_definition(&mut self) -> Result<(), ParseError<'a>> {
350 self.next_token();
351 if self.current_token.kind != TokenKind::Identifier {
352 return Err(self.expected_identifier());
353 }
354 let name = self.current_token.literal;
355 self.next_token();
356 if self.current_token.kind != TokenKind::LeftParen {
357 return Err(self.expected_left_paren());
358 }
359 self.next_token();
360
361 let mut parameters = Vec::new();
362 while self.current_token.kind != TokenKind::RightParen {
363 if self.current_token.kind != TokenKind::Identifier {
364 return Err(self.expected_identifier());
365 }
366 parameters.push(self.current_token.literal);
367 self.next_token();
368 if self.current_token.kind == TokenKind::Comma {
369 self.next_token();
370 } else if self.current_token.kind != TokenKind::RightParen {
371 return Err(self.expected_right_paren());
372 }
373 }
374
375 self.next_token();
376 self.skip_newlines();
377 if self.current_token.kind != TokenKind::LeftCurlyBrace {
378 return Err(self.expected_left_brace());
379 }
380
381 let mut statements = Vec::new();
382 self.next_token(); while self.current_token.kind != TokenKind::RightCurlyBrace
384 && self.current_token.kind != TokenKind::Eof
385 {
386 self.skip_terminators();
387
388 if self.current_token.kind == TokenKind::RightCurlyBrace
389 || self.current_token.kind == TokenKind::Eof
390 {
391 break;
392 }
393
394 statements.push(self.parse_statement()?);
395 }
396 if self.current_token.kind != TokenKind::RightCurlyBrace {
397 return Err(self.expected_right_brace());
398 }
399 self.function_definitions.push(FunctionDefinition {
400 name,
401 parameters,
402 statements,
403 });
404
405 Ok(())
406 }
407
408 fn parse_simple_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
409 match self.current_token.kind {
410 TokenKind::Identifier => self.parse_assignment_statement(),
411 TokenKind::DollarSign => self.parse_field_assignment_statement(),
412 TokenKind::Increment => self.parse_pre_increment_statement(),
413 TokenKind::Decrement => self.parse_pre_decrement_statement(),
414 TokenKind::Number
415 | TokenKind::String
416 | TokenKind::Regex
417 | TokenKind::LeftParen
418 | TokenKind::Close
419 | TokenKind::Cos
420 | TokenKind::Exp
421 | TokenKind::Index
422 | TokenKind::Int
423 | TokenKind::Length
424 | TokenKind::Log
425 | TokenKind::Match
426 | TokenKind::Rand
427 | TokenKind::Sin
428 | TokenKind::Sprintf
429 | TokenKind::Sqrt
430 | TokenKind::Srand
431 | TokenKind::Substr
432 | TokenKind::ToLower
433 | TokenKind::ToUpper => Ok(Statement::Expression(self.parse_expression()?)),
434 _ => Err(self.unsupported_statement()),
435 }
436 }
437
438 fn parse_assignment_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
439 let identifier = self.current_token.clone();
440 self.next_token();
441 self.parse_assignment_statement_with_identifier(identifier)
442 }
443
444 fn parse_assignment_statement_with_identifier(
445 &mut self,
446 identifier: Token<'a>,
447 ) -> Result<Statement<'a>, ParseError<'a>> {
448 if self.current_token.kind == TokenKind::LeftParen
449 && self.token_is_immediately_after(&identifier)
450 {
451 let args = self.parse_call_arguments()?;
452 return Ok(Statement::Expression(Expression::FunctionCall {
453 name: identifier.literal,
454 args,
455 }));
456 }
457 if self.current_token.kind == TokenKind::LeftSquareBracket {
458 self.next_token_in_regex_context();
459 let index = self.parse_array_index_expression()?;
460 if self.current_token.kind != TokenKind::RightSquareBracket {
461 return Err(self.expected_right_square_bracket());
462 }
463 self.next_token();
464 return match self.current_token.kind {
465 TokenKind::Assign => {
466 self.next_token_in_regex_context();
467 let value = self.parse_expression()?;
468 Ok(Statement::ArrayAssignment {
469 identifier: identifier.literal,
470 index,
471 value,
472 })
473 }
474 TokenKind::AddAssign => {
475 self.next_token_in_regex_context();
476 let value = self.parse_expression()?;
477 Ok(Statement::ArrayAddAssignment {
478 identifier: identifier.literal,
479 index,
480 value,
481 })
482 }
483 TokenKind::Increment => {
484 self.next_token();
485 Ok(Statement::ArrayPostIncrement {
486 identifier: identifier.literal,
487 index,
488 })
489 }
490 TokenKind::Decrement => {
491 self.next_token();
492 Ok(Statement::ArrayPostDecrement {
493 identifier: identifier.literal,
494 index,
495 })
496 }
497 _ => Err(self.unsupported_statement()),
498 };
499 }
500 match self.current_token.kind {
501 TokenKind::Assign => {
502 self.next_token_in_regex_context();
503 if self.current_token.kind == TokenKind::Split {
504 return self.parse_split_assignment_statement(identifier.literal);
505 }
506 let value = self.parse_expression()?;
507 Ok(Statement::Assignment {
508 identifier: identifier.literal,
509 value,
510 })
511 }
512 TokenKind::Increment => {
513 self.next_token();
514 Ok(Statement::PostIncrement {
515 identifier: identifier.literal,
516 })
517 }
518 TokenKind::Decrement => {
519 self.next_token();
520 Ok(Statement::PostDecrement {
521 identifier: identifier.literal,
522 })
523 }
524 TokenKind::AddAssign => {
525 self.next_token_in_regex_context();
526 let value = self.parse_expression()?;
527 Ok(Statement::AddAssignment {
528 identifier: identifier.literal,
529 value,
530 })
531 }
532 TokenKind::SubtractAssign
533 | TokenKind::MultiplyAssign
534 | TokenKind::DivideAssign
535 | TokenKind::ModuloAssign
536 | TokenKind::PowerAssign => {
537 let assign_token = self.current_token.clone();
538 self.next_token_in_regex_context();
539 let right_value = self.parse_expression()?;
540 Ok(Statement::Assignment {
541 identifier: identifier.literal,
542 value: Expression::Infix {
543 left: Box::new(Expression::Identifier(identifier.literal)),
544 operator: compound_assign_operator(&assign_token),
545 right: Box::new(right_value),
546 },
547 })
548 }
549 _ => Err(self.unsupported_statement()),
550 }
551 }
552
553 fn parse_delete_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
554 self.next_token();
555 if self.current_token.kind != TokenKind::Identifier {
556 return Err(self.expected_identifier());
557 }
558 let identifier = self.current_token.literal;
559 self.next_token();
560 if self.current_token.kind != TokenKind::LeftSquareBracket {
561 return Ok(Statement::Delete {
562 identifier,
563 index: None,
564 });
565 }
566
567 self.next_token_in_regex_context();
568 let index = self.parse_array_index_expression()?;
569 if self.current_token.kind != TokenKind::RightSquareBracket {
570 return Err(self.expected_right_square_bracket());
571 }
572 self.next_token();
573 Ok(Statement::Delete {
574 identifier,
575 index: Some(index),
576 })
577 }
578
579 fn parse_break_statement(&mut self) -> Statement<'a> {
580 self.next_token();
581 Statement::Break
582 }
583
584 fn parse_continue_statement(&mut self) -> Statement<'a> {
585 self.next_token();
586 Statement::Continue
587 }
588
589 fn parse_pre_increment_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
590 self.next_token();
591 if self.current_token.kind != TokenKind::Identifier {
592 return Err(self.expected_identifier());
593 }
594 let identifier = self.current_token.literal;
595 self.next_token();
596 Ok(Statement::PreIncrement { identifier })
597 }
598
599 fn parse_pre_decrement_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
600 self.next_token();
601 if self.current_token.kind != TokenKind::Identifier {
602 return Err(self.expected_identifier());
603 }
604 let identifier = self.current_token.literal;
605 self.next_token();
606 Ok(Statement::PreDecrement { identifier })
607 }
608
609 fn parse_split_assignment_statement(
610 &mut self,
611 identifier: &'a str,
612 ) -> Result<Statement<'a>, ParseError<'a>> {
613 self.next_token();
614 if self.current_token.kind != TokenKind::LeftParen {
615 return Err(self.expected_left_paren());
616 }
617 self.next_token_in_regex_context();
618 let string = self.parse_expression()?;
619 if self.current_token.kind != TokenKind::Comma {
620 return Err(self.expected_comma());
621 }
622 self.next_token();
623 if self.current_token.kind != TokenKind::Identifier {
624 return Err(self.expected_identifier());
625 }
626 let array = self.current_token.literal;
627 self.next_token();
628 let separator = if self.current_token.kind == TokenKind::Comma {
629 self.next_token_in_regex_context();
630 Some(self.parse_expression()?)
631 } else {
632 None
633 };
634 if self.current_token.kind != TokenKind::RightParen {
635 return Err(self.expected_right_paren());
636 }
637 self.next_token();
638 Ok(Statement::SplitAssignment {
639 identifier,
640 string,
641 array,
642 separator,
643 })
644 }
645
646 fn parse_split_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
647 self.next_token();
648 if self.current_token.kind != TokenKind::LeftParen {
649 return Err(self.expected_left_paren());
650 }
651 self.next_token_in_regex_context();
652 let string = self.parse_expression()?;
653 if self.current_token.kind != TokenKind::Comma {
654 return Err(self.expected_comma());
655 }
656 self.next_token();
657 if self.current_token.kind != TokenKind::Identifier {
658 return Err(self.expected_identifier());
659 }
660 let array = self.current_token.literal;
661 self.next_token();
662 let separator = if self.current_token.kind == TokenKind::Comma {
663 self.next_token_in_regex_context();
664 Some(self.parse_expression()?)
665 } else {
666 None
667 };
668 if self.current_token.kind != TokenKind::RightParen {
669 return Err(self.expected_right_paren());
670 }
671 self.next_token();
672 Ok(Statement::Split {
673 string,
674 array,
675 separator,
676 })
677 }
678
679 fn parse_field_assignment_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
680 self.next_token();
681 let field = self.parse_primary_expression()?;
682 let assign_token = self.current_token.clone();
683 self.next_token_in_regex_context();
684 let right_value = self.parse_expression()?;
685
686 let value = if assign_token.kind == TokenKind::Assign {
687 right_value
688 } else {
689 let operator = compound_assign_operator(&assign_token);
690 Expression::Infix {
691 left: Box::new(Expression::Field(Box::new(field.clone()))),
692 operator,
693 right: Box::new(right_value),
694 }
695 };
696 Ok(Statement::FieldAssignment { field, value })
697 }
698
699 fn parse_if_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
700 self.next_token();
701 if self.current_token.kind != TokenKind::LeftParen {
702 return Err(self.expected_left_paren());
703 }
704 self.next_token_in_regex_context();
705 let condition = self.parse_condition_in_parens()?;
706 if self.current_token.kind != TokenKind::RightParen {
707 return Err(self.expected_right_paren());
708 }
709 self.next_token();
710 let then_statements = self.parse_control_statement_body()?;
711
712 self.skip_terminators();
713
714 if self.current_token.kind == TokenKind::Else {
715 self.next_token();
716 let else_statements = self.parse_control_statement_body()?;
717 return Ok(Statement::IfElse {
718 condition,
719 then_statements,
720 else_statements,
721 });
722 }
723
724 Ok(Statement::If {
725 condition,
726 then_statements,
727 })
728 }
729
730 fn parse_exit_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
731 self.next_token();
732 let status = if self.is_statement_terminator() {
733 None
734 } else {
735 Some(self.parse_expression()?)
736 };
737 Ok(Statement::Exit(status))
738 }
739
740 fn parse_return_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
741 self.next_token();
742 let value = if self.is_statement_terminator() {
743 None
744 } else {
745 Some(self.parse_expression()?)
746 };
747 Ok(Statement::Return(value))
748 }
749
750 fn parse_next_statement(&mut self) -> Statement<'a> {
751 self.next_token();
752 Statement::Next
753 }
754
755 fn parse_statement_block(&mut self) -> Result<Vec<Statement<'a>>, ParseError<'a>> {
756 self.next_token(); let mut statements = Vec::new();
758 while self.current_token.kind != TokenKind::RightCurlyBrace
759 && self.current_token.kind != TokenKind::Eof
760 {
761 self.skip_terminators();
762
763 if self.current_token.kind == TokenKind::RightCurlyBrace
764 || self.current_token.kind == TokenKind::Eof
765 {
766 break;
767 }
768 statements.push(self.parse_statement()?);
769 }
770 if self.current_token.kind != TokenKind::RightCurlyBrace {
771 return Err(self.expected_right_brace());
772 }
773 self.next_token();
774 Ok(statements)
775 }
776
777 fn parse_control_statement_body(&mut self) -> Result<Vec<Statement<'a>>, ParseError<'a>> {
778 self.skip_newlines();
779
780 if self.current_token.kind == TokenKind::LeftCurlyBrace {
781 return self.parse_statement_block();
782 }
783
784 if self.current_token.kind == TokenKind::Semicolon {
785 self.next_token();
786 return Ok(vec![Statement::Empty]);
787 }
788
789 Ok(vec![self.parse_statement()?])
790 }
791
792 fn parse_while_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
793 self.next_token();
794 if self.current_token.kind != TokenKind::LeftParen {
795 return Err(self.expected_left_paren());
796 }
797 self.next_token_in_regex_context();
798 let condition = self.parse_condition_in_parens()?;
799 if self.current_token.kind != TokenKind::RightParen {
800 return Err(self.expected_right_paren());
801 }
802 self.next_token();
803 let statements = self.parse_control_statement_body()?;
804 Ok(Statement::While {
805 condition,
806 statements,
807 })
808 }
809
810 fn parse_do_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
811 self.next_token();
812 let statements = self.parse_control_statement_body()?;
813
814 self.skip_terminators();
815
816 if self.current_token.kind != TokenKind::While {
817 return Err(self.expected_while());
818 }
819 self.next_token();
820 if self.current_token.kind != TokenKind::LeftParen {
821 return Err(self.expected_left_paren());
822 }
823 self.next_token_in_regex_context();
824 let condition = self.parse_condition_in_parens()?;
825 if self.current_token.kind != TokenKind::RightParen {
826 return Err(self.expected_right_paren());
827 }
828 self.next_token();
829 Ok(Statement::DoWhile {
830 condition,
831 statements,
832 })
833 }
834
835 fn parse_for_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
836 self.next_token();
837 if self.current_token.kind != TokenKind::LeftParen {
838 return Err(self.expected_left_paren());
839 }
840 self.next_token();
841 self.skip_newlines();
842
843 let init = if self.current_token.kind == TokenKind::Semicolon {
844 Statement::Empty
845 } else if self.current_token.kind == TokenKind::Identifier {
846 let variable = self.current_token.clone();
847 self.next_token();
848 if self.current_token.kind == TokenKind::In {
849 self.next_token();
850 if self.current_token.kind != TokenKind::Identifier {
851 return Err(self.expected_identifier());
852 }
853 let array = self.current_token.literal;
854 self.next_token();
855 if self.current_token.kind != TokenKind::RightParen {
856 return Err(self.expected_right_paren());
857 }
858 self.next_token();
859 let statements = self.parse_control_statement_body()?;
860 return Ok(Statement::ForIn {
861 variable: variable.literal,
862 array,
863 statements,
864 });
865 }
866 self.parse_assignment_statement_with_identifier(variable)?
867 } else {
868 self.parse_simple_statement()?
869 };
870 self.skip_newlines();
871 if self.current_token.kind != TokenKind::Semicolon {
872 return Err(self.expected_semicolon());
873 }
874 self.next_token_in_regex_context();
875 self.skip_newlines_in_regex_context();
876
877 let condition = if self.current_token.kind == TokenKind::Semicolon {
878 Expression::Number(1.0)
879 } else {
880 self.parse_expression()?
881 };
882 self.skip_newlines();
883 if self.current_token.kind != TokenKind::Semicolon {
884 return Err(self.expected_semicolon());
885 }
886 self.next_token_in_regex_context();
887 self.skip_newlines_in_regex_context();
888
889 let update = if self.current_token.kind == TokenKind::RightParen {
890 Statement::Empty
891 } else {
892 self.parse_simple_statement()?
893 };
894 self.skip_newlines();
895 if self.current_token.kind != TokenKind::RightParen {
896 return Err(self.expected_right_paren());
897 }
898 self.next_token();
899 let statements = self.parse_control_statement_body()?;
900
901 Ok(Statement::For {
902 init: Box::new(init),
903 condition,
904 update: Box::new(update),
905 statements,
906 })
907 }
908
909 fn parse_print_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
910 let mut expressions = Vec::new();
911 let mut expect_more = false;
912 self.next_token();
913
914 loop {
915 if self.current_token.kind == TokenKind::RightCurlyBrace
916 || self.current_token.kind == TokenKind::RightParen
917 || self.current_token.kind == TokenKind::Eof
918 || self.current_token.kind == TokenKind::GreaterThan
919 || self.current_token.kind == TokenKind::Append
920 || self.current_token.kind == TokenKind::Pipe
921 {
922 break;
923 }
924
925 if self.current_token.kind == TokenKind::NewLine
926 || self.current_token.kind == TokenKind::Semicolon
927 {
928 if expect_more {
929 self.next_token();
930 continue;
931 }
932 break;
933 }
934
935 if self.current_token.kind == TokenKind::Comma {
936 self.next_token();
937 expect_more = true;
938 continue;
939 }
940
941 let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
942 let expression = self.parse_expression()?;
943 if started_with_left_paren {
944 if let Some(grouped_expressions) =
945 Self::split_print_parenthesized_list(expression.clone())
946 {
947 expressions.extend(grouped_expressions);
948 } else {
949 expressions.push(expression);
950 }
951 } else {
952 expressions.push(expression);
953 }
954 expect_more = false;
955 }
956 if self.current_token.kind == TokenKind::GreaterThan
957 || self.current_token.kind == TokenKind::Append
958 {
959 let append = self.current_token.kind == TokenKind::Append;
960 self.next_token();
961 let target = self.parse_expression()?;
962 return Ok(Statement::PrintRedirect {
963 expressions,
964 target,
965 append,
966 });
967 }
968 if self.current_token.kind == TokenKind::Pipe {
969 self.next_token();
970 let target = self.parse_expression()?;
971 return Ok(Statement::PrintPipe {
972 expressions,
973 target,
974 });
975 }
976
977 Ok(Statement::Print(expressions))
978 }
979
980 fn parse_printf_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
981 self.next_token();
982 let expressions = if self.current_token.kind == TokenKind::LeftParen {
983 self.next_token_in_regex_context();
984 let mut expressions = Vec::new();
985 while self.current_token.kind != TokenKind::RightParen
986 && self.current_token.kind != TokenKind::Eof
987 {
988 if self.current_token.kind == TokenKind::Comma {
989 self.next_token();
990 continue;
991 }
992 expressions.push(self.parse_expression()?);
993 }
994 if self.current_token.kind == TokenKind::RightParen {
995 self.next_token();
996 }
997 expressions
998 } else {
999 self.parse_expression_list_until_action_end_from_current()?
1000 };
1001
1002 if expressions.is_empty() {
1003 return Err(self.missing_printf_format_string());
1004 }
1005
1006 Ok(Statement::Printf(expressions))
1007 }
1008
1009 fn parse_gsub_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
1010 self.next_token();
1011 if self.current_token.kind != TokenKind::LeftParen {
1012 return Err(self.expected_left_paren());
1013 }
1014
1015 self.next_token_in_regex_context();
1016 let pattern = self.parse_expression()?;
1017
1018 if self.current_token.kind != TokenKind::Comma {
1019 return Err(self.expected_comma());
1020 }
1021 self.next_token();
1022 let replacement = self.parse_expression()?;
1023
1024 let target = if self.current_token.kind == TokenKind::Comma {
1025 self.next_token();
1026 Some(self.parse_expression()?)
1027 } else {
1028 None
1029 };
1030
1031 if self.current_token.kind != TokenKind::RightParen {
1032 return Err(self.expected_right_paren());
1033 }
1034 self.next_token();
1035
1036 Ok(Statement::Gsub {
1037 pattern,
1038 replacement,
1039 target,
1040 })
1041 }
1042
1043 fn parse_sub_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
1044 self.next_token();
1045 if self.current_token.kind != TokenKind::LeftParen {
1046 return Err(self.expected_left_paren());
1047 }
1048
1049 self.next_token_in_regex_context();
1050 let pattern = self.parse_expression()?;
1051
1052 if self.current_token.kind != TokenKind::Comma {
1053 return Err(self.expected_comma());
1054 }
1055 self.next_token();
1056 let replacement = self.parse_expression()?;
1057
1058 if self.current_token.kind == TokenKind::Comma {
1059 return Err(self.unsupported_sub_target());
1060 }
1061
1062 if self.current_token.kind != TokenKind::RightParen {
1063 return Err(self.expected_right_paren());
1064 }
1065 self.next_token();
1066
1067 Ok(Statement::Sub {
1068 pattern,
1069 replacement,
1070 })
1071 }
1072
1073 fn parse_system_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
1074 self.next_token();
1075 if self.current_token.kind != TokenKind::LeftParen {
1076 return Err(self.expected_left_paren());
1077 }
1078 self.next_token();
1079 let command = self.parse_expression()?;
1080 if self.current_token.kind != TokenKind::RightParen {
1081 return Err(self.expected_right_paren());
1082 }
1083 self.next_token();
1084 Ok(Statement::System(command))
1085 }
1086
1087 fn parse_expression_list_until_action_end_from_current(
1088 &mut self,
1089 ) -> Result<Vec<Expression<'a>>, ParseError<'a>> {
1090 let mut expressions = Vec::new();
1091 let mut expect_more = false;
1092
1093 loop {
1094 if self.current_token.kind == TokenKind::RightCurlyBrace
1095 || self.current_token.kind == TokenKind::RightParen
1096 || self.current_token.kind == TokenKind::Eof
1097 {
1098 break;
1099 }
1100
1101 if self.current_token.kind == TokenKind::NewLine
1102 || self.current_token.kind == TokenKind::Semicolon
1103 {
1104 if expect_more {
1105 self.next_token();
1106 continue;
1107 }
1108 break;
1109 }
1110
1111 if self.current_token.kind == TokenKind::Comma {
1112 self.next_token();
1113 expect_more = true;
1114 continue;
1115 }
1116
1117 let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
1118 let expression = self.parse_expression()?;
1119 expressions.push(expression);
1120 if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
1121 while self.current_token.kind == TokenKind::Comma {
1122 self.next_token();
1123 expressions.push(self.parse_expression()?);
1124 }
1125 if self.current_token.kind != TokenKind::RightParen {
1126 return Err(self.expected_right_paren());
1127 }
1128 self.next_token();
1129 }
1130 expect_more = false;
1131 }
1132
1133 Ok(expressions)
1134 }
1135
1136 fn parse_expression(&mut self) -> Result<Expression<'a>, ParseError<'a>> {
1137 self.parse_expression_with_min_precedence(0)
1138 }
1139
1140 fn parse_expression_with_min_precedence(
1141 &mut self,
1142 min_precedence: u8,
1143 ) -> Result<Expression<'a>, ParseError<'a>> {
1144 let left = self.parse_primary_expression()?;
1145 self.parse_expression_suffix(left, min_precedence)
1146 }
1147
1148 fn parse_expression_suffix(
1149 &mut self,
1150 mut left: Expression<'a>,
1151 min_precedence: u8,
1152 ) -> Result<Expression<'a>, ParseError<'a>> {
1153 const CONCAT_LEFT_PRECEDENCE: u8 = 6;
1154 const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
1155
1156 loop {
1157 if self.current_token.kind == TokenKind::QuestionMark {
1158 if min_precedence > 0 {
1159 break;
1160 }
1161 self.next_token_in_regex_context();
1162 let then_expr = self.parse_expression_with_min_precedence(0)?;
1163 if self.current_token.kind != TokenKind::Colon {
1164 return Err(self.expected_colon());
1165 }
1166 self.next_token_in_regex_context();
1167 let else_expr = self.parse_expression_with_min_precedence(0)?;
1168 left = Expression::Ternary {
1169 condition: Box::new(left),
1170 then_expr: Box::new(then_expr),
1171 else_expr: Box::new(else_expr),
1172 };
1173 continue;
1174 }
1175
1176 if infix_operator_precedence(&self.current_token.kind).is_none()
1177 && is_expression_start(&self.current_token.kind)
1178 {
1179 if CONCAT_LEFT_PRECEDENCE < min_precedence {
1180 break;
1181 }
1182
1183 let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE)?;
1184 left = Expression::Concatenation {
1185 left: Box::new(left),
1186 right: Box::new(right),
1187 };
1188 continue;
1189 }
1190
1191 let (left_precedence, right_precedence) =
1192 match infix_operator_precedence(&self.current_token.kind) {
1193 Some(value) => value,
1194 None => break,
1195 };
1196
1197 if left_precedence < min_precedence {
1198 break;
1199 }
1200
1201 let operator = self.current_token.clone();
1202 if matches!(
1203 operator.kind,
1204 TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
1205 ) {
1206 self.next_token_in_regex_context();
1207 } else {
1208 self.next_token();
1209 }
1210 let right = self.parse_expression_with_min_precedence(right_precedence)?;
1211
1212 left = Expression::Infix {
1213 left: Box::new(left),
1214 operator,
1215 right: Box::new(right),
1216 };
1217 }
1218
1219 Ok(left)
1220 }
1221
1222 fn parse_condition_in_parens(&mut self) -> Result<Expression<'a>, ParseError<'a>> {
1223 let mut condition = self.parse_expression()?;
1224 if self.current_token.kind == TokenKind::Comma {
1225 while self.current_token.kind == TokenKind::Comma {
1226 let operator = self.current_token.clone();
1227 self.next_token_in_regex_context();
1228 let right = self.parse_expression()?;
1229 condition = Expression::Infix {
1230 left: Box::new(condition),
1231 operator,
1232 right: Box::new(right),
1233 };
1234 }
1235 if self.current_token.kind != TokenKind::RightParen {
1236 return Err(self.expected_right_paren());
1237 }
1238 self.next_token();
1239 condition = self.parse_expression_suffix(condition, 0)?;
1240 }
1241 Ok(condition)
1242 }
1243
1244 fn parse_primary_expression(&mut self) -> Result<Expression<'a>, ParseError<'a>> {
1245 if self.current_token.kind == TokenKind::Minus {
1246 let operator = self.current_token.clone();
1247 self.next_token();
1248 let right = self.parse_primary_expression()?;
1249 return Ok(Expression::Infix {
1250 left: Box::new(Expression::Number(0.0)),
1251 operator,
1252 right: Box::new(right),
1253 });
1254 }
1255 if self.current_token.kind == TokenKind::Plus {
1256 self.next_token();
1257 return self.parse_primary_expression();
1258 }
1259 if self.current_token.kind == TokenKind::ExclamationMark {
1260 self.next_token_in_regex_context();
1261 let expression = self.parse_primary_expression()?;
1262 return Ok(Expression::Not(Box::new(expression)));
1263 }
1264 if self.current_token.kind == TokenKind::Increment {
1265 self.next_token();
1266 let expression = self.parse_primary_expression()?;
1267 return Ok(Expression::PreIncrement(Box::new(expression)));
1268 }
1269 if self.current_token.kind == TokenKind::Decrement {
1270 self.next_token();
1271 let expression = self.parse_primary_expression()?;
1272 return Ok(Expression::PreDecrement(Box::new(expression)));
1273 }
1274
1275 let mut expression = self.parse_primary_atom()?;
1276 match self.current_token.kind {
1277 TokenKind::Increment => {
1278 self.next_token();
1279 expression = Expression::PostIncrement(Box::new(expression));
1280 }
1281 TokenKind::Decrement => {
1282 self.next_token();
1283 expression = Expression::PostDecrement(Box::new(expression));
1284 }
1285 _ => {}
1286 }
1287 Ok(expression)
1288 }
1289
1290 fn parse_primary_atom(&mut self) -> Result<Expression<'a>, ParseError<'a>> {
1291 match self.current_token.kind {
1292 TokenKind::String => {
1293 let expression = Expression::String(self.current_token.literal);
1294 self.next_token();
1295 Ok(expression)
1296 }
1297 TokenKind::Regex => {
1298 let expression = Expression::Regex(self.current_token.literal);
1299 self.next_token();
1300 Ok(expression)
1301 }
1302 TokenKind::Number => {
1303 let expression = self
1304 .parse_number_expression()
1305 .ok_or_else(|| self.invalid_numeric_literal())?;
1306 self.next_token();
1307 Ok(expression)
1308 }
1309 TokenKind::DollarSign => {
1310 self.next_token();
1311 let expression = self.parse_primary_atom()?;
1312 Ok(Expression::Field(Box::new(expression)))
1313 }
1314 TokenKind::LeftParen => {
1315 self.next_token_in_regex_context();
1316 let mut expression = self.parse_expression()?;
1317 while self.current_token.kind == TokenKind::Comma {
1318 let operator = self.current_token.clone();
1319 self.next_token_in_regex_context();
1320 let right = self.parse_expression()?;
1321 expression = Expression::Infix {
1322 left: Box::new(expression),
1323 operator,
1324 right: Box::new(right),
1325 };
1326 }
1327 if self.current_token.kind != TokenKind::RightParen {
1328 return Err(self.expected_right_paren());
1329 }
1330 self.next_token();
1331 Ok(expression)
1332 }
1333 TokenKind::Identifier => {
1334 let identifier = self.current_token.clone();
1335 self.next_token();
1336 if self.current_token.kind == TokenKind::LeftParen
1337 && self.token_is_immediately_after(&identifier)
1338 {
1339 let args = self.parse_call_arguments()?;
1340 return Ok(Expression::FunctionCall {
1341 name: identifier.literal,
1342 args,
1343 });
1344 }
1345 if self.current_token.kind == TokenKind::LeftSquareBracket {
1346 self.next_token_in_regex_context();
1347 let index = self.parse_array_index_expression()?;
1348 if self.current_token.kind != TokenKind::RightSquareBracket {
1349 return Err(self.expected_right_square_bracket());
1350 }
1351 self.next_token();
1352 Ok(Expression::ArrayAccess {
1353 identifier: identifier.literal,
1354 index: Box::new(index),
1355 })
1356 } else {
1357 Ok(Expression::Identifier(identifier.literal))
1358 }
1359 }
1360 TokenKind::Length => {
1361 self.next_token();
1362 if self.current_token.kind == TokenKind::LeftParen {
1363 self.next_token();
1364 if self.current_token.kind == TokenKind::RightParen {
1365 self.next_token();
1366 Ok(Expression::Length(None))
1367 } else {
1368 let expression = self.parse_expression()?;
1369 if self.current_token.kind != TokenKind::RightParen {
1370 return Err(self.expected_right_paren());
1371 }
1372 self.next_token();
1373 Ok(Expression::Length(Some(Box::new(expression))))
1374 }
1375 } else {
1376 Ok(Expression::Length(None))
1377 }
1378 }
1379 TokenKind::Substr => {
1380 self.next_token();
1381 if self.current_token.kind != TokenKind::LeftParen {
1382 return Err(self.expected_left_paren());
1383 }
1384 self.next_token();
1385 let string = self.parse_expression()?;
1386 if self.current_token.kind != TokenKind::Comma {
1387 return Err(self.expected_comma());
1388 }
1389 self.next_token();
1390 let start = self.parse_expression()?;
1391 let mut length = None;
1392 if self.current_token.kind == TokenKind::Comma {
1393 self.next_token();
1394 length = Some(Box::new(self.parse_expression()?));
1395 }
1396 if self.current_token.kind != TokenKind::RightParen {
1397 return Err(self.expected_right_paren());
1398 }
1399 self.next_token();
1400 Ok(Expression::Substr {
1401 string: Box::new(string),
1402 start: Box::new(start),
1403 length,
1404 })
1405 }
1406 TokenKind::Rand => {
1407 self.next_token();
1408 if self.current_token.kind == TokenKind::LeftParen {
1409 self.next_token();
1410 if self.current_token.kind != TokenKind::RightParen {
1411 return Err(self.expected_right_paren());
1412 }
1413 self.next_token();
1414 }
1415 Ok(Expression::Rand)
1416 }
1417 TokenKind::Close
1418 | TokenKind::Cos
1419 | TokenKind::Exp
1420 | TokenKind::Index
1421 | TokenKind::Int
1422 | TokenKind::Log
1423 | TokenKind::Match
1424 | TokenKind::Sin
1425 | TokenKind::Sprintf
1426 | TokenKind::Split
1427 | TokenKind::Sqrt
1428 | TokenKind::Srand => {
1429 let name = self.current_token.literal;
1430 self.next_token();
1431 if self.current_token.kind == TokenKind::LeftParen {
1432 let args = self.parse_call_arguments()?;
1433 return Ok(Expression::FunctionCall { name, args });
1434 }
1435 Err(self.expected_left_paren())
1436 }
1437 _ => Err(self.expected_statement()),
1438 }
1439 }
1440
1441 pub fn try_parse_program(&mut self) -> Result<Program<'_>, ParseError<'a>> {
1442 let mut program = Program::new();
1443
1444 while !self.is_eof() {
1445 match self.parse_next_rule()? {
1446 Some(Rule::Begin(action)) => program.add_begin_block(action),
1447 Some(Rule::End(action)) => program.add_end_block(action),
1448 Some(rule) => program.add_rule(rule),
1449 None => {}
1450 }
1451 self.next_token_in_regex_context();
1452 }
1453
1454 for definition in self.function_definitions.drain(..) {
1455 program.add_function_definition(definition);
1456 }
1457
1458 Ok(program)
1459 }
1460
1461 pub fn parse_program(&mut self) -> Program<'_> {
1462 self.try_parse_program()
1463 .unwrap_or_else(|err| panic!("{err}"))
1464 }
1465
1466 fn parse_call_arguments(&mut self) -> Result<Vec<Expression<'a>>, ParseError<'a>> {
1467 if self.current_token.kind != TokenKind::LeftParen {
1468 return Ok(vec![]);
1469 }
1470 self.next_token_in_regex_context();
1471 let mut args = Vec::new();
1472 while self.current_token.kind != TokenKind::RightParen
1473 && self.current_token.kind != TokenKind::Eof
1474 {
1475 if self.current_token.kind == TokenKind::Comma {
1476 self.next_token();
1477 continue;
1478 }
1479 args.push(self.parse_expression()?);
1480 }
1481 if self.current_token.kind == TokenKind::RightParen {
1482 self.next_token();
1483 }
1484 Ok(args)
1485 }
1486}
1487
1488fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
1489 match kind {
1490 TokenKind::Assign
1491 | TokenKind::AddAssign
1492 | TokenKind::SubtractAssign
1493 | TokenKind::MultiplyAssign
1494 | TokenKind::DivideAssign
1495 | TokenKind::ModuloAssign
1496 | TokenKind::PowerAssign => Some((0, 0)),
1497 TokenKind::Or => Some((1, 2)),
1498 TokenKind::And => Some((3, 4)),
1499 TokenKind::Equal
1500 | TokenKind::NotEqual
1501 | TokenKind::GreaterThan
1502 | TokenKind::GreaterThanOrEqual
1503 | TokenKind::In
1504 | TokenKind::LessThan
1505 | TokenKind::LessThanOrEqual
1506 | TokenKind::Tilde
1507 | TokenKind::NoMatch => Some((5, 6)),
1508 TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
1509 TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
1510 TokenKind::Caret => Some((13, 12)),
1511 _ => None,
1512 }
1513}
1514
1515fn is_expression_start(kind: &TokenKind) -> bool {
1516 matches!(
1517 kind,
1518 TokenKind::String
1519 | TokenKind::Regex
1520 | TokenKind::Number
1521 | TokenKind::DollarSign
1522 | TokenKind::LeftParen
1523 | TokenKind::Identifier
1524 | TokenKind::Cos
1525 | TokenKind::Exp
1526 | TokenKind::Index
1527 | TokenKind::Int
1528 | TokenKind::Length
1529 | TokenKind::Log
1530 | TokenKind::Match
1531 | TokenKind::Rand
1532 | TokenKind::Sin
1533 | TokenKind::Sprintf
1534 | TokenKind::Split
1535 | TokenKind::Sqrt
1536 | TokenKind::Srand
1537 | TokenKind::Substr
1538 | TokenKind::Increment
1539 | TokenKind::Decrement
1540 )
1541}
1542
1543fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
1544 let (kind, literal) = match token.kind {
1545 TokenKind::AddAssign => (TokenKind::Plus, "+"),
1546 TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
1547 TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
1548 TokenKind::DivideAssign => (TokenKind::Division, "/"),
1549 TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
1550 TokenKind::PowerAssign => (TokenKind::Caret, "^"),
1551 _ => unreachable!(
1552 "compound_assign_operator called with non-compound token: {:?}",
1553 token.kind
1554 ),
1555 };
1556
1557 Token::new(kind, literal, token.span.start)
1558}
1559
1560#[cfg(test)]
1561mod tests {
1562 use super::*;
1563
1564 #[test]
1565 fn create_parser() {
1566 let mut parser = Parser::new(Lexer::new("42 == 42"));
1567
1568 assert_eq!(parser.current_token.literal, "42");
1569 parser.next_token();
1570 assert_eq!(parser.current_token.literal, "==");
1571 }
1572
1573 #[test]
1574 fn parse_empty_program() {
1575 let mut parser = Parser::new(Lexer::new(""));
1576
1577 let program = parser.parse_program();
1578
1579 assert_eq!(program.len(), 0);
1580 }
1581
1582 #[test]
1583 fn parse_statement_with_unhandled_token_returns_parse_error() {
1584 let mut parser = Parser::new(Lexer::new("BEGIN { else }"));
1585
1586 let err = parser
1587 .try_parse_program()
1588 .expect_err("expected parse error for stray else");
1589
1590 assert_eq!(err.kind, ParseErrorKind::ExpectedStatement);
1591 assert_eq!(err.token.kind, TokenKind::Else);
1592 }
1593
1594 #[test]
1595 fn parse_begin_without_left_brace_returns_parse_error() {
1596 let mut parser = Parser::new(Lexer::new("BEGIN print }"));
1597
1598 let err = parser
1599 .try_parse_program()
1600 .expect_err("expected parse error for missing left brace");
1601
1602 assert_eq!(err.kind, ParseErrorKind::ExpectedLeftBrace);
1603 assert_eq!(err.token.kind, TokenKind::Print);
1604 }
1605
1606 #[test]
1607 fn parse_delete_without_identifier_returns_parse_error() {
1608 let mut parser = Parser::new(Lexer::new("{ delete 1 }"));
1609
1610 let err = parser
1611 .try_parse_program()
1612 .expect_err("expected parse error for delete without identifier");
1613
1614 assert_eq!(err.kind, ParseErrorKind::ExpectedIdentifier);
1615 assert_eq!(err.token.kind, TokenKind::Number);
1616 }
1617
1618 #[test]
1619 fn parse_if_without_right_paren_returns_parse_error() {
1620 let mut parser = Parser::new(Lexer::new("{ if (x print }"));
1621
1622 let err = parser
1623 .try_parse_program()
1624 .expect_err("expected parse error for missing right paren");
1625
1626 assert_eq!(err.kind, ParseErrorKind::ExpectedRightParen);
1627 assert_eq!(err.token.kind, TokenKind::Print);
1628 }
1629
1630 #[test]
1631 fn parse_if_without_left_paren_returns_parse_error() {
1632 let mut parser = Parser::new(Lexer::new("{ if x) print }"));
1633
1634 let err = parser
1635 .try_parse_program()
1636 .expect_err("expected parse error for missing left paren after if");
1637
1638 assert_eq!(err.kind, ParseErrorKind::ExpectedLeftParen);
1639 assert_eq!(err.token.kind, TokenKind::Identifier);
1640 }
1641
1642 #[test]
1643 fn parse_grouped_expression_without_right_paren_returns_parse_error() {
1644 let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2 }"));
1645
1646 let err = parser
1647 .try_parse_program()
1648 .expect_err("expected parse error for missing right paren in grouped expression");
1649
1650 assert_eq!(err.kind, ParseErrorKind::ExpectedRightParen);
1651 assert_eq!(err.token.kind, TokenKind::RightCurlyBrace);
1652 }
1653
1654 #[test]
1655 fn parse_array_assignment_without_right_square_bracket_returns_parse_error() {
1656 let mut parser = Parser::new(Lexer::new("BEGIN { a[1 = 2 }"));
1657
1658 let err = parser
1659 .try_parse_program()
1660 .expect_err("expected parse error for missing right square bracket");
1661
1662 assert_eq!(err.kind, ParseErrorKind::ExpectedRightSquareBracket);
1663 assert_eq!(err.token.kind, TokenKind::RightCurlyBrace);
1664 }
1665
1666 #[test]
1667 fn parse_split_without_comma_returns_parse_error() {
1668 let mut parser = Parser::new(Lexer::new("BEGIN { split($0 arr) }"));
1669
1670 let err = parser
1671 .try_parse_program()
1672 .expect_err("expected parse error for missing comma in split");
1673
1674 assert_eq!(err.kind, ParseErrorKind::ExpectedComma);
1675 assert_eq!(err.token.kind, TokenKind::RightParen);
1676 }
1677
1678 #[test]
1679 fn parse_identifier_expression_statement_returns_parse_error() {
1680 let mut parser = Parser::new(Lexer::new("BEGIN { x + 1 }"));
1681
1682 let err = parser
1683 .try_parse_program()
1684 .expect_err("expected parse error for unsupported identifier expression statement");
1685
1686 assert_eq!(err.kind, ParseErrorKind::UnsupportedStatement);
1687 assert_eq!(err.token.kind, TokenKind::Plus);
1688 }
1689
1690 #[test]
1691 fn parse_array_multiply_assignment_returns_parse_error() {
1692 let mut parser = Parser::new(Lexer::new("BEGIN { a[1] *= 2 }"));
1693
1694 let err = parser
1695 .try_parse_program()
1696 .expect_err("expected parse error for unsupported array compound assignment");
1697
1698 assert_eq!(err.kind, ParseErrorKind::UnsupportedStatement);
1699 assert_eq!(err.token.kind, TokenKind::MultiplyAssign);
1700 }
1701
1702 #[test]
1703 fn parse_sub_with_target_returns_parse_error() {
1704 let mut parser = Parser::new(Lexer::new(r#"BEGIN { sub(/a/, "b", t) }"#));
1705
1706 let err = parser
1707 .try_parse_program()
1708 .expect_err("expected parse error for unsupported sub target argument");
1709
1710 assert_eq!(err.kind, ParseErrorKind::UnsupportedSubTarget);
1711 assert_eq!(err.token.kind, TokenKind::Comma);
1712 }
1713
1714 #[test]
1715 fn parse_ternary_without_colon_returns_parse_error() {
1716 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 ? 2 }"));
1717
1718 let err = parser
1719 .try_parse_program()
1720 .expect_err("expected parse error for missing colon in ternary");
1721
1722 assert_eq!(err.kind, ParseErrorKind::ExpectedColon);
1723 assert_eq!(err.token.kind, TokenKind::RightCurlyBrace);
1724 }
1725
1726 #[test]
1727 fn parse_print_with_extra_right_paren_returns_parse_error() {
1728 let mut parser = Parser::new(Lexer::new("BEGIN { print 1) }"));
1729
1730 let err = parser
1731 .try_parse_program()
1732 .expect_err("expected parse error for stray right paren after print expression");
1733
1734 assert_eq!(err.kind, ParseErrorKind::ExpectedStatement);
1735 assert_eq!(err.token.kind, TokenKind::RightParen);
1736 }
1737
1738 #[test]
1739 fn parse_for_without_first_semicolon_returns_parse_error() {
1740 let mut parser = Parser::new(Lexer::new("BEGIN { for (i = 0 i < 3; i++) print i }"));
1741
1742 let err = parser
1743 .try_parse_program()
1744 .expect_err("expected parse error for missing first semicolon in for");
1745
1746 assert_eq!(err.kind, ParseErrorKind::ExpectedSemicolon);
1747 assert_eq!(err.token.kind, TokenKind::RightParen);
1748 }
1749
1750 #[test]
1751 fn parse_do_without_while_returns_parse_error() {
1752 let mut parser = Parser::new(Lexer::new("BEGIN { do print 1 }"));
1753
1754 let err = parser
1755 .try_parse_program()
1756 .expect_err("expected parse error for missing while after do body");
1757
1758 assert_eq!(err.kind, ParseErrorKind::ExpectedWhile);
1759 assert_eq!(err.token.kind, TokenKind::RightCurlyBrace);
1760 }
1761
1762 #[test]
1763 #[should_panic(expected = "compound_assign_operator called with non-compound token")]
1764 fn compound_assign_operator_panics_for_non_compound_token() {
1765 let token = Token::new(TokenKind::Assign, "=", 0);
1766 let _ = compound_assign_operator(&token);
1767 }
1768
1769 #[test]
1770 fn parse_printf_expression_list_with_extra_right_paren_returns_parse_error() {
1771 let mut parser = Parser::new(Lexer::new(r#"BEGIN { printf "%s", 1) }"#));
1772
1773 let err = parser
1774 .try_parse_program()
1775 .expect_err("expected parse error for stray right paren after printf arguments");
1776
1777 assert_eq!(err.kind, ParseErrorKind::ExpectedStatement);
1778 assert_eq!(err.token.kind, TokenKind::RightParen);
1779 }
1780
1781 #[test]
1782 fn parse_action_without_right_brace_returns_parse_error() {
1783 let mut parser = Parser::new(Lexer::new("BEGIN { print 1"));
1784
1785 let err = parser
1786 .try_parse_program()
1787 .expect_err("expected parse error for missing right brace in action");
1788
1789 assert_eq!(err.kind, ParseErrorKind::ExpectedRightBrace);
1790 assert_eq!(err.token.kind, TokenKind::Eof);
1791 }
1792
1793 #[test]
1794 fn parse_nested_block_without_right_brace_returns_parse_error() {
1795 let mut parser = Parser::new(Lexer::new("{ if (1) { print 1 }"));
1796
1797 let err = parser
1798 .try_parse_program()
1799 .expect_err("expected parse error for missing right brace in nested block");
1800
1801 assert_eq!(err.kind, ParseErrorKind::ExpectedRightBrace);
1802 assert_eq!(err.token.kind, TokenKind::Eof);
1803 }
1804
1805 #[test]
1806 fn parse_action_without_pattern() {
1807 let mut parser = Parser::new(Lexer::new("{ print }"));
1808
1809 let program = parser.parse_program();
1810
1811 assert_eq!(program.len(), 1);
1812 assert_eq!("{ print }", program.to_string());
1813 }
1814
1815 #[test]
1816 fn parse_action_with_leading_newlines() {
1817 let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
1818
1819 let program = parser.parse_program();
1820
1821 assert_eq!(program.len(), 1);
1822 assert_eq!("{ print }", program.to_string());
1823 }
1824
1825 #[test]
1826 fn parse_begin_block() {
1827 let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
1828
1829 let program = parser.parse_program();
1830
1831 assert_eq!(program.len(), 1);
1832 assert_eq!("BEGIN { print }", program.to_string());
1833 }
1834
1835 #[test]
1836 fn parse_end_block() {
1837 let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1838
1839 let program = parser.parse_program();
1840
1841 assert_eq!(program.len(), 1);
1842 assert_eq!("END { print 42 }", program.to_string());
1843 }
1844
1845 #[test]
1846 fn parse_regex_pattern_action() {
1847 let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1848
1849 let program = parser.parse_program();
1850
1851 assert_eq!(program.len(), 1);
1852 assert_eq!("/foo/ { print }", program.to_string());
1853 }
1854
1855 #[test]
1856 fn parse_print_infix_expression() {
1857 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1858
1859 let program = parser.parse_program();
1860 let mut begin_blocks = program.begin_blocks_iter();
1861 let Action { statements } = begin_blocks.next().expect("expected begin block");
1862
1863 let exprs = match &statements[0] {
1864 Statement::Print(expressions) => expressions,
1865 _ => panic!("expected print statement"),
1866 };
1867
1868 match &exprs[0] {
1869 Expression::Infix {
1870 left,
1871 operator,
1872 right,
1873 } => {
1874 assert!(matches!(**left, Expression::Number(1.0)));
1875 assert_eq!(operator.kind, TokenKind::Plus);
1876 assert!(matches!(**right, Expression::Number(2.0)));
1877 }
1878 _ => panic!("expected infix expression"),
1879 }
1880 }
1881
1882 #[test]
1883 fn parse_print_parenthesized_expression() {
1884 let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1885
1886 let program = parser.parse_program();
1887 let mut begin_blocks = program.begin_blocks_iter();
1888 let Action { statements } = begin_blocks.next().expect("expected begin block");
1889
1890 let exprs = match &statements[0] {
1891 Statement::Print(expressions) => expressions,
1892 _ => panic!("expected print statement"),
1893 };
1894
1895 match &exprs[0] {
1896 Expression::Infix {
1897 left,
1898 operator,
1899 right,
1900 } => {
1901 assert_eq!(operator.kind, TokenKind::Asterisk);
1902 assert!(matches!(**right, Expression::Number(3.0)));
1903 assert!(matches!(**left, Expression::Infix { .. }));
1904 }
1905 _ => panic!("expected infix expression"),
1906 }
1907 }
1908
1909 #[test]
1910 fn parse_print_multiplication_has_higher_precedence_than_addition() {
1911 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1912
1913 let program = parser.parse_program();
1914 let mut begin_blocks = program.begin_blocks_iter();
1915 let Action { statements } = begin_blocks.next().expect("expected begin block");
1916
1917 let exprs = match &statements[0] {
1918 Statement::Print(expressions) => expressions,
1919 _ => panic!("expected print statement"),
1920 };
1921
1922 match &exprs[0] {
1923 Expression::Infix {
1924 left,
1925 operator,
1926 right,
1927 } => {
1928 assert_eq!(operator.kind, TokenKind::Plus);
1929 assert!(matches!(**left, Expression::Number(1.0)));
1930 match &**right {
1931 Expression::Infix {
1932 operator: right_op, ..
1933 } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1934 _ => panic!("expected nested infix expression"),
1935 }
1936 }
1937 _ => panic!("expected infix expression"),
1938 }
1939 }
1940
1941 #[test]
1942 fn parse_print_power_is_right_associative() {
1943 let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1944
1945 let program = parser.parse_program();
1946 let mut begin_blocks = program.begin_blocks_iter();
1947 let Action { statements } = begin_blocks.next().expect("expected begin block");
1948
1949 let exprs = match &statements[0] {
1950 Statement::Print(expressions) => expressions,
1951 _ => panic!("expected print statement"),
1952 };
1953
1954 match &exprs[0] {
1955 Expression::Infix {
1956 left,
1957 operator,
1958 right,
1959 } => {
1960 assert_eq!(operator.kind, TokenKind::Caret);
1961 assert!(matches!(**left, Expression::Number(2.0)));
1962 match &**right {
1963 Expression::Infix {
1964 operator: right_op, ..
1965 } => assert_eq!(right_op.kind, TokenKind::Caret),
1966 _ => panic!("expected nested infix expression"),
1967 }
1968 }
1969 _ => panic!("expected infix expression"),
1970 }
1971 }
1972
1973 #[test]
1974 fn parse_print_minus_is_left_associative() {
1975 let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1976
1977 let program = parser.parse_program();
1978 let mut begin_blocks = program.begin_blocks_iter();
1979 let Action { statements } = begin_blocks.next().expect("expected begin block");
1980
1981 let exprs = match &statements[0] {
1982 Statement::Print(expressions) => expressions,
1983 _ => panic!("expected print statement"),
1984 };
1985
1986 match &exprs[0] {
1987 Expression::Infix {
1988 left,
1989 operator,
1990 right,
1991 } => {
1992 assert_eq!(operator.kind, TokenKind::Minus);
1993 match &**left {
1994 Expression::Infix {
1995 operator: left_op, ..
1996 } => assert_eq!(left_op.kind, TokenKind::Minus),
1997 _ => panic!("expected nested infix expression"),
1998 }
1999 assert!(matches!(**right, Expression::Number(1.0)));
2000 }
2001 _ => panic!("expected infix expression"),
2002 }
2003 }
2004
2005 #[test]
2006 fn parse_print_concatenation() {
2007 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
2008
2009 let program = parser.parse_program();
2010 let mut begin_blocks = program.begin_blocks_iter();
2011 let Action { statements } = begin_blocks.next().expect("expected begin block");
2012
2013 let exprs = match &statements[0] {
2014 Statement::Print(expressions) => expressions,
2015 _ => panic!("expected print statement"),
2016 };
2017
2018 assert_eq!(exprs.len(), 1);
2019 match &exprs[0] {
2020 Expression::Concatenation { left, right } => {
2021 assert!(matches!(**left, Expression::String("Value:")));
2022 assert!(matches!(**right, Expression::Number(42.0)));
2023 }
2024 _ => panic!("expected concatenation expression"),
2025 }
2026 }
2027
2028 #[test]
2029 fn parse_continue_statement() {
2030 let mut parser = Parser::new(Lexer::new(r#"{ continue }"#));
2031
2032 let program = parser.parse_program();
2033 let mut rules = program.rules_iter();
2034 let rule = rules.next().expect("expected rule");
2035
2036 let statements = match rule {
2037 Rule::Action(Action { statements }) => statements,
2038 _ => panic!("expected action rule"),
2039 };
2040
2041 assert!(matches!(statements[0], Statement::Continue));
2042 }
2043
2044 #[test]
2045 fn parse_identifier_followed_by_spaced_parentheses_as_concatenation() {
2046 let mut parser = Parser::new(Lexer::new(r#"{ x = $1; print x (++i) }"#));
2047
2048 let program = parser.parse_program();
2049 let mut rules = program.rules_iter();
2050 let rule = rules.next().expect("expected rule");
2051
2052 let statements = match rule {
2053 Rule::Action(Action { statements }) => statements,
2054 _ => panic!("expected action rule"),
2055 };
2056
2057 let exprs = match &statements[1] {
2058 Statement::Print(expressions) => expressions,
2059 _ => panic!("expected print statement"),
2060 };
2061
2062 assert_eq!(exprs.len(), 1);
2063 match &exprs[0] {
2064 Expression::Concatenation { left, right } => {
2065 assert!(matches!(**left, Expression::Identifier("x")));
2066 assert!(matches!(**right, Expression::PreIncrement(_)));
2067 }
2068 _ => panic!("expected concatenation expression"),
2069 }
2070 }
2071
2072 #[test]
2073 fn parse_print_field_expression() {
2074 let mut parser = Parser::new(Lexer::new("{ print $1 }"));
2075
2076 let program = parser.parse_program();
2077 let mut rules = program.rules_iter();
2078 let rule = rules.next().expect("expected rule");
2079
2080 let statements = match rule {
2081 Rule::Action(Action { statements }) => statements,
2082 _ => panic!("expected action rule"),
2083 };
2084
2085 let exprs = match &statements[0] {
2086 Statement::Print(expressions) => expressions,
2087 _ => panic!("expected print statement"),
2088 };
2089
2090 match &exprs[0] {
2091 Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
2092 _ => panic!("expected field expression"),
2093 }
2094 }
2095
2096 #[test]
2097 fn parse_print_with_commas() {
2098 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
2099
2100 let program = parser.parse_program();
2101
2102 assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
2103 }
2104
2105 #[test]
2106 fn parse_number_of_fields_identifier() {
2107 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
2108
2109 let program = parser.parse_program();
2110
2111 assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
2112 }
2113
2114 #[test]
2115 fn parse_printf_with_format_and_arguments() {
2116 let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
2117
2118 let program = parser.parse_program();
2119
2120 assert_eq!(
2121 r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
2122 program.to_string()
2123 );
2124 }
2125
2126 #[test]
2127 fn parse_print_ternary_expression() {
2128 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print x ? y : z }"#));
2129
2130 let program = parser.parse_program();
2131 let mut begin_blocks = program.begin_blocks_iter();
2132 let Action { statements } = begin_blocks.next().expect("expected begin block");
2133
2134 let exprs = match &statements[0] {
2135 Statement::Print(expressions) => expressions,
2136 _ => panic!("expected print statement"),
2137 };
2138
2139 assert_eq!(exprs.len(), 1);
2140 match &exprs[0] {
2141 Expression::Ternary {
2142 condition,
2143 then_expr,
2144 else_expr,
2145 } => {
2146 assert!(matches!(**condition, Expression::Identifier("x")));
2147 assert!(matches!(**then_expr, Expression::Identifier("y")));
2148 assert!(matches!(**else_expr, Expression::Identifier("z")));
2149 }
2150 _ => panic!("expected ternary expression"),
2151 }
2152 }
2153
2154 #[test]
2155 fn parse_printf_without_arguments_returns_parse_error() {
2156 let mut parser = Parser::new(Lexer::new(r#"{ printf }"#));
2157
2158 let err = parser
2159 .try_parse_program()
2160 .expect_err("expected parse error for printf without arguments");
2161
2162 assert_eq!(err.kind, ParseErrorKind::MissingPrintfFormatString);
2163 }
2164
2165 #[test]
2166 fn parse_printf_without_arguments_in_parentheses_returns_parse_error() {
2167 let mut parser = Parser::new(Lexer::new(r#"{ printf() }"#));
2168
2169 let err = parser
2170 .try_parse_program()
2171 .expect_err("expected parse error for empty printf call");
2172
2173 assert_eq!(err.kind, ParseErrorKind::MissingPrintfFormatString);
2174 }
2175
2176 #[test]
2177 fn parse_add_assignment_and_pre_increment() {
2178 let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
2179
2180 let program = parser.parse_program();
2181
2182 assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
2183 }
2184
2185 #[test]
2186 fn parse_regex_match_pattern_action() {
2187 let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
2188
2189 let program = parser.parse_program();
2190
2191 assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
2192 }
2193
2194 #[test]
2195 fn parse_not_pattern_action() {
2196 let mut parser = Parser::new(Lexer::new(r#"!($1 < 2000) { print $1 }"#));
2197
2198 let program = parser.parse_program();
2199 let mut rules = program.rules_iter();
2200 let rule = rules.next().expect("expected rule");
2201
2202 match rule {
2203 Rule::PatternAction {
2204 pattern: Some(Expression::Not(inner)),
2205 action: Some(Action { statements }),
2206 } => {
2207 assert!(matches!(**inner, Expression::Infix { .. }));
2208 assert!(matches!(statements[0], Statement::Print(_)));
2209 }
2210 _ => panic!("expected negated pattern action"),
2211 }
2212 }
2213
2214 #[test]
2215 fn parse_print_with_line_continuation_after_comma() {
2216 let mut parser = Parser::new(Lexer::new(
2217 "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
2218 ));
2219
2220 let program = parser.parse_program();
2221
2222 assert_eq!(
2223 "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
2224 program.to_string()
2225 );
2226 }
2227
2228 #[test]
2229 fn parse_gsub_statement() {
2230 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
2231
2232 let program = parser.parse_program();
2233
2234 assert_eq!(
2235 r#"{ gsub(/USA/, "United States"); print }"#,
2236 program.to_string()
2237 );
2238 }
2239
2240 #[test]
2241 fn parse_gsub_statement_with_target() {
2242 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/[ \t]+/, "", t) }"#));
2243
2244 let program = parser.parse_program();
2245
2246 assert_eq!(r#"{ gsub(/[ \t]+/, "", t) }"#, program.to_string());
2247 }
2248
2249 #[test]
2250 fn parse_system_statement() {
2251 let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
2252
2253 let program = parser.parse_program();
2254
2255 assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
2256 }
2257
2258 #[test]
2259 fn parse_print_length_builtin_expression() {
2260 let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
2261
2262 let program = parser.parse_program();
2263
2264 assert_eq!(r#"{ print length, $0 }"#, program.to_string());
2265 }
2266
2267 #[test]
2268 fn parse_length_expression_as_rule_pattern() {
2269 let mut parser = Parser::new(Lexer::new(
2270 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
2271 ));
2272
2273 let program = parser.parse_program();
2274
2275 assert_eq!(
2276 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
2277 program.to_string()
2278 );
2279 }
2280
2281 #[test]
2282 fn parse_field_assignment_with_substr() {
2283 let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
2284
2285 let program = parser.parse_program();
2286
2287 assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
2288 }
2289
2290 #[test]
2291 fn parse_assignment_with_concatenation_and_substr() {
2292 let mut parser = Parser::new(Lexer::new(r#"{ s = s " " substr($1, 1, 3) }"#));
2293
2294 let program = parser.parse_program();
2295
2296 assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
2297 }
2298
2299 #[test]
2300 fn parse_field_divide_assignment() {
2301 let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
2302
2303 let program = parser.parse_program();
2304
2305 assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
2306 }
2307
2308 #[test]
2309 fn parse_chained_assignment() {
2310 let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
2311
2312 let program = parser.parse_program();
2313
2314 assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
2315 }
2316
2317 #[test]
2318 fn parse_if_statement_with_block() {
2319 let mut parser = Parser::new(Lexer::new(
2320 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
2321 ));
2322
2323 let program = parser.parse_program();
2324
2325 assert_eq!(
2326 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
2327 program.to_string()
2328 );
2329 }
2330
2331 #[test]
2332 fn parse_while_with_post_increment() {
2333 let mut parser = Parser::new(Lexer::new(
2334 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
2335 ));
2336
2337 let program = parser.parse_program();
2338
2339 assert_eq!(
2340 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
2341 program.to_string()
2342 );
2343 }
2344
2345 #[test]
2346 fn parse_while_with_single_body_statement() {
2347 let mut parser = Parser::new(Lexer::new(r#"{ while (n > 1) print n }"#));
2348
2349 let program = parser.parse_program();
2350
2351 assert_eq!(r#"{ while (n > 1) { print n } }"#, program.to_string());
2352 }
2353
2354 #[test]
2355 fn parse_do_while_with_post_increment() {
2356 let mut parser = Parser::new(Lexer::new(
2357 r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
2358 ));
2359
2360 let program = parser.parse_program();
2361
2362 assert_eq!(
2363 r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
2364 program.to_string()
2365 );
2366 }
2367
2368 #[test]
2369 fn parse_for_with_empty_body_statement() {
2370 let mut parser = Parser::new(Lexer::new(
2371 r#"{ for (i = 1; i <= NF; s += $(i++)) ; print s }"#,
2372 ));
2373
2374 let program = parser.parse_program();
2375
2376 assert_eq!(
2377 r#"{ for (i = 1; i <= NF; s += $i++) { }; print s }"#,
2378 program.to_string()
2379 );
2380 }
2381
2382 #[test]
2383 fn parse_post_decrement_statement() {
2384 let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
2385
2386 let program = parser.parse_program();
2387
2388 assert_eq!(r#"{ k--; n-- }"#, program.to_string());
2389 }
2390
2391 #[test]
2392 fn parse_rand_expression() {
2393 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
2394
2395 let program = parser.parse_program();
2396
2397 assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
2398 }
2399
2400 #[test]
2401 fn parse_math_builtin_expressions() {
2402 let mut parser = Parser::new(Lexer::new(
2403 r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
2404 ));
2405
2406 let program = parser.parse_program();
2407
2408 assert_eq!(
2409 r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
2410 program.to_string()
2411 );
2412 }
2413
2414 #[test]
2415 fn parse_index_builtin_expression() {
2416 let mut parser = Parser::new(Lexer::new(r#"{ print index(1, $1) }"#));
2417
2418 let program = parser.parse_program();
2419
2420 assert_eq!(r#"{ print index(1, $1) }"#, program.to_string());
2421 }
2422
2423 #[test]
2424 fn parse_match_builtin_expression() {
2425 let mut parser = Parser::new(Lexer::new(r#"{ print match($NF, $1), RSTART, RLENGTH }"#));
2426
2427 let program = parser.parse_program();
2428
2429 assert_eq!(
2430 r#"{ print match($NF, $1), RSTART, RLENGTH }"#,
2431 program.to_string()
2432 );
2433 }
2434
2435 #[test]
2436 fn parse_in_membership_expression() {
2437 let mut parser = Parser::new(Lexer::new(r#"{ print 1 in x }"#));
2438
2439 let program = parser.parse_program();
2440
2441 assert_eq!(r#"{ print 1 in x }"#, program.to_string());
2442 }
2443
2444 #[test]
2445 fn parse_parenthesized_composite_membership_expression() {
2446 let mut parser = Parser::new(Lexer::new(r#"{ if (($0, $1) in x) print "yes" }"#));
2447
2448 let program = parser.parse_program();
2449
2450 assert_eq!(
2451 r#"{ if ($0, $1 in x) { print "yes" } }"#,
2452 program.to_string()
2453 );
2454 }
2455
2456 #[test]
2457 fn parse_for_loop_with_single_body_statement() {
2458 let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; i++) print $i }"#));
2459
2460 let program = parser.parse_program();
2461
2462 assert_eq!(
2463 r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
2464 program.to_string()
2465 );
2466 }
2467
2468 #[test]
2469 fn parse_if_with_single_statement_body() {
2470 let mut parser = Parser::new(Lexer::new(
2471 r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
2472 ));
2473
2474 let program = parser.parse_program();
2475
2476 assert_eq!(
2477 r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
2478 program.to_string()
2479 );
2480 }
2481
2482 #[test]
2483 fn parse_exit_statement() {
2484 let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
2485
2486 let program = parser.parse_program();
2487
2488 assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
2489 }
2490
2491 #[test]
2492 fn parse_exit_statement_with_status() {
2493 let mut parser = Parser::new(Lexer::new(r#"$1 < 5000 { exit NR }"#));
2494
2495 let program = parser.parse_program();
2496
2497 assert_eq!(r#"$1 < 5000 { exit NR }"#, program.to_string());
2498 }
2499
2500 #[test]
2501 fn parse_user_defined_function_call_statement() {
2502 let mut parser = Parser::new(Lexer::new(
2503 "BEGIN { myabort(1) }\nfunction myabort(n) { exit n }",
2504 ));
2505
2506 let program = parser.parse_program();
2507
2508 let definition = program
2509 .function_definition("myabort")
2510 .expect("expected function definition");
2511 assert_eq!(definition.parameters, vec!["n"]);
2512 assert_eq!(definition.statements.len(), 1);
2513 }
2514
2515 #[test]
2516 fn parse_delete_array_element_statement() {
2517 let mut parser = Parser::new(Lexer::new(r#"{ delete x[i, j] }"#));
2518
2519 let program = parser.parse_program();
2520
2521 assert_eq!(r#"{ delete x[i, j] }"#, program.to_string());
2522 }
2523
2524 #[test]
2525 fn parse_array_add_assignment_and_access() {
2526 let mut parser = Parser::new(Lexer::new(
2527 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2528 ));
2529
2530 let program = parser.parse_program();
2531
2532 assert_eq!(
2533 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2534 program.to_string()
2535 );
2536 }
2537
2538 #[test]
2539 fn parse_for_in_loop() {
2540 let mut parser = Parser::new(Lexer::new(
2541 r#"END { for (name in area) print name ":" area[name] }"#,
2542 ));
2543
2544 let program = parser.parse_program();
2545
2546 assert_eq!(
2547 r#"END { for (name in area) { print name ":" area[name] } }"#,
2548 program.to_string()
2549 );
2550 }
2551
2552 #[test]
2553 fn parse_print_redirection() {
2554 let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
2555
2556 let program = parser.parse_program();
2557
2558 assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
2559 }
2560
2561 #[test]
2562 fn parse_print_pipe() {
2563 let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
2564
2565 let program = parser.parse_program();
2566
2567 assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
2568 }
2569
2570 #[test]
2571 fn parse_hexadecimal_number() {
2572 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print 0xAA }"#));
2573
2574 let program = parser.parse_program();
2575
2576 assert_eq!(r#"BEGIN { print 0xAA }"#, program.to_string());
2577 }
2578
2579 #[test]
2580 fn parse_field_compound_assignment() {
2581 let mut parser = Parser::new(Lexer::new(r#"{ $1 += 2 }"#));
2582
2583 let program = parser.parse_program();
2584
2585 assert_eq!(r#"{ $1 = $1 + 2 }"#, program.to_string());
2586 }
2587
2588 #[test]
2589 fn parse_builtin_without_parens_returns_parse_error() {
2590 let mut parser = Parser::new(Lexer::new(r#"{ x = cos }"#));
2591
2592 let err = parser
2593 .try_parse_program()
2594 .expect_err("expected parse error for builtin used without parentheses");
2595
2596 assert_eq!(err.kind, ParseErrorKind::ExpectedLeftParen);
2597 assert_eq!(err.token.kind, TokenKind::RightCurlyBrace);
2598 }
2599
2600 #[test]
2601 fn parse_nested_function_calls() {
2602 let mut parser = Parser::new(Lexer::new(r#"{ x = substr(substr(s, 1, 2), 1) }"#));
2603
2604 let program = parser.parse_program();
2605
2606 assert_eq!(r#"{ x = substr(substr(s, 1, 2), 1) }"#, program.to_string());
2607 }
2608
2609 #[test]
2610 fn parse_chained_ternary_is_right_associative() {
2611 let mut parser = Parser::new(Lexer::new(r#"{ x = a ? b : c ? d : e }"#));
2612
2613 let program = parser.parse_program();
2614
2615 assert_eq!(r#"{ x = (a) ? b : (c) ? d : e }"#, program.to_string());
2616 }
2617
2618 #[test]
2619 fn parse_for_loop_with_empty_init_condition_update() {
2620 let mut parser = Parser::new(Lexer::new(r#"{ for (;;) break }"#));
2621
2622 let program = parser.parse_program();
2623
2624 assert_eq!(r#"{ for (; 1; ) { break } }"#, program.to_string());
2625 }
2626
2627 #[test]
2628 fn parse_assignment_with_regex_rhs() {
2629 let mut parser = Parser::new(Lexer::new(r#"{ x = /foo/ }"#));
2630
2631 let program = parser.parse_program();
2632
2633 assert_eq!(r#"{ x = /foo/ }"#, program.to_string());
2634 }
2635
2636 #[test]
2637 fn parse_field_assignment_with_regex_rhs() {
2638 let mut parser = Parser::new(Lexer::new(r#"{ $1 = /foo/ }"#));
2639
2640 let program = parser.parse_program();
2641
2642 assert_eq!(r#"{ $1 = /foo/ }"#, program.to_string());
2643 }
2644
2645 #[test]
2646 fn parse_array_assignment_with_regex_rhs() {
2647 let mut parser = Parser::new(Lexer::new(r#"{ a[i] = /foo/ }"#));
2648
2649 let program = parser.parse_program();
2650
2651 assert_eq!(r#"{ a[i] = /foo/ }"#, program.to_string());
2652 }
2653
2654 #[test]
2655 fn parse_for_loop_with_print_as_init_returns_parse_error() {
2656 let mut parser = Parser::new(Lexer::new(r#"{ for (print "hi"; i < 10; i++) print i }"#));
2657
2658 let err = parser
2659 .try_parse_program()
2660 .expect_err("expected parse error for print statement as for-loop initializer");
2661
2662 assert_eq!(err.kind, ParseErrorKind::UnsupportedStatement);
2663 assert_eq!(err.token.kind, TokenKind::Print);
2664 }
2665
2666 #[test]
2667 fn parse_for_loop_with_print_as_update_returns_parse_error() {
2668 let mut parser = Parser::new(Lexer::new(r#"{ for (i = 0; i < 10; print i) print i }"#));
2669
2670 let err = parser
2671 .try_parse_program()
2672 .expect_err("expected parse error for print statement as for-loop update");
2673
2674 assert_eq!(err.kind, ParseErrorKind::UnsupportedStatement);
2675 assert_eq!(err.token.kind, TokenKind::Print);
2676 }
2677
2678 #[test]
2679 fn parse_for_loop_with_field_assignment_as_init() {
2680 let mut parser = Parser::new(Lexer::new(r#"{ for ($1 = 0; $1 < 10; $1 += 1) print $1 }"#));
2681
2682 let program = parser.parse_program();
2683
2684 assert_eq!(
2685 r#"{ for ($1 = 0; $1 < 10; $1 = $1 + 1) { print $1 } }"#,
2686 program.to_string()
2687 );
2688 }
2689
2690 #[test]
2691 fn parse_primary_atom_with_invalid_number_literal_returns_parse_error() {
2692 let mut parser = Parser::new(Lexer::new(""));
2696 parser.current_token = Token::new(TokenKind::Number, "0xZZ", 7);
2697
2698 let err = parser
2699 .parse_primary_atom()
2700 .expect_err("expected parse error for invalid numeric literal");
2701
2702 assert_eq!(err.kind, ParseErrorKind::InvalidNumericLiteral);
2703 assert_eq!(err.token.kind, TokenKind::Number);
2704 assert_eq!(err.token.literal, "0xZZ");
2705 }
2706
2707 #[test]
2708 fn parse_unrecognized_token_in_expression_returns_parse_error() {
2709 let mut parser = Parser::new(Lexer::new("{ x = else }"));
2710
2711 let err = parser
2712 .try_parse_program()
2713 .expect_err("expected parse error for unrecognized token in expression");
2714
2715 assert_eq!(err.kind, ParseErrorKind::ExpectedStatement);
2716 assert_eq!(err.token.kind, TokenKind::Else);
2717 }
2718}