1use crate::{
2 Lexer, Program,
3 ast::{Action, Expression, FunctionDefinition, Rule, Statement},
4 token::{Token, TokenKind},
5};
6
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub enum ParseErrorKind {
9 UnexpectedToken { expected: &'static str },
10 MissingPrintfFormatString,
11}
12
13#[derive(Debug, Clone, PartialEq)]
14pub struct ParseError<'a> {
15 pub kind: ParseErrorKind,
16 pub token: Token<'a>,
17}
18
19impl std::fmt::Display for ParseError<'_> {
20 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21 match &self.kind {
22 ParseErrorKind::UnexpectedToken { expected } => write!(
23 f,
24 "unexpected token {:?} ({:?}) at byte {}: expected {}",
25 self.token.kind, self.token.literal, self.token.span.start, expected
26 ),
27 ParseErrorKind::MissingPrintfFormatString => write!(
28 f,
29 "printf requires a format string at byte {}",
30 self.token.span.start
31 ),
32 }
33 }
34}
35
36impl std::error::Error for ParseError<'_> {}
37
38#[derive(Debug)]
39pub struct Parser<'a> {
40 lexer: Lexer<'a>,
41 current_token: Token<'a>,
42 function_definitions: Vec<FunctionDefinition<'a>>,
43}
44
45impl<'a> Parser<'a> {
46 pub fn new(mut lexer: Lexer<'a>) -> Self {
47 let current_token = lexer.next_token_regex_aware();
48 Parser {
49 lexer,
50 current_token,
51 function_definitions: Vec::new(),
52 }
53 }
54
55 fn next_token(&mut self) {
56 self.current_token = self.lexer.next_token();
57 }
58
59 fn next_token_in_regex_context(&mut self) {
60 self.current_token = self.lexer.next_token_regex_aware();
61 }
62
63 fn is_eof(&self) -> bool {
64 self.current_token.kind == TokenKind::Eof
65 }
66
67 fn is_statement_terminator(&self) -> bool {
68 matches!(
69 self.current_token.kind,
70 TokenKind::Semicolon | TokenKind::NewLine | TokenKind::RightCurlyBrace | TokenKind::Eof
71 )
72 }
73
74 fn token_is_immediately_after(&self, previous: &Token<'a>) -> bool {
75 self.current_token.span.start == previous.span.start + previous.literal.len()
76 }
77
78 fn parse_number_expression(&self) -> Option<Expression<'a>> {
79 let literal = self.current_token.literal;
80 if let Some(hex_digits) = literal
81 .strip_prefix("0x")
82 .or_else(|| literal.strip_prefix("0X"))
83 {
84 let value = u64::from_str_radix(hex_digits, 16).ok()? as f64;
85 return Some(Expression::HexNumber { literal, value });
86 }
87
88 literal.parse::<f64>().ok().map(Expression::Number)
89 }
90
91 fn parse_array_index_expression(&mut self) -> Expression<'a> {
92 let mut index = self.parse_expression();
93 while self.current_token.kind == TokenKind::Comma {
94 let operator = self.current_token.clone();
95 self.next_token_in_regex_context();
96 let right = self.parse_expression();
97 index = Expression::Infix {
98 left: Box::new(index),
99 operator,
100 right: Box::new(right),
101 };
102 }
103 index
104 }
105
106 fn unexpected_token(&self, expected: &'static str) -> ParseError<'a> {
107 ParseError {
108 kind: ParseErrorKind::UnexpectedToken { expected },
109 token: self.current_token.clone(),
110 }
111 }
112
113 fn missing_printf_format_string(&self) -> ParseError<'a> {
114 ParseError {
115 kind: ParseErrorKind::MissingPrintfFormatString,
116 token: self.current_token.clone(),
117 }
118 }
119
120 fn parse_next_rule(&mut self) -> Result<Option<Rule<'a>>, ParseError<'a>> {
121 match &self.current_token.kind {
122 TokenKind::Begin => {
123 self.next_token();
124 let action = self.parse_action()?;
125 Ok(Some(Rule::Begin(action)))
126 }
127 TokenKind::NewLine => {
128 self.next_token_in_regex_context();
129 self.parse_next_rule()
130 }
131 TokenKind::Eof => Ok(None),
132 TokenKind::LeftCurlyBrace => {
133 self.parse_action().map(|action| Some(Rule::Action(action)))
134 }
135 TokenKind::Function => {
136 self.parse_function_definition()?;
137 Ok(None)
138 }
139 TokenKind::End => {
140 self.next_token();
141 let action = self.parse_action()?;
142 Ok(Some(Rule::End(action)))
143 }
144 TokenKind::Regex
145 | TokenKind::String
146 | TokenKind::Number
147 | TokenKind::DollarSign
148 | TokenKind::LeftParen
149 | TokenKind::Identifier
150 | TokenKind::Cos
151 | TokenKind::Exp
152 | TokenKind::Index
153 | TokenKind::Int
154 | TokenKind::Length
155 | TokenKind::Log
156 | TokenKind::Match
157 | TokenKind::Rand
158 | TokenKind::Sin
159 | TokenKind::Sprintf
160 | TokenKind::Split
161 | TokenKind::Sqrt
162 | TokenKind::Srand
163 | TokenKind::Substr
164 | TokenKind::ExclamationMark
165 | TokenKind::Increment
166 | TokenKind::Decrement => self.parse_pattern_rule(),
167 _ => Err(self.unexpected_token("rule")),
168 }
169 }
170
171 fn parse_pattern_rule(&mut self) -> Result<Option<Rule<'a>>, ParseError<'a>> {
172 let mut pattern = self.parse_expression();
173 if self.current_token.kind == TokenKind::Comma {
174 let operator = self.current_token.clone();
175 self.next_token_in_regex_context();
176 let right = self.parse_expression();
177 pattern = Expression::Infix {
178 left: Box::new(pattern),
179 operator,
180 right: Box::new(right),
181 };
182 }
183 let pattern = Some(pattern);
184
185 if self.current_token.kind == TokenKind::LeftCurlyBrace {
186 let action = self.parse_action()?;
187 Ok(Some(Rule::PatternAction {
188 pattern,
189 action: Some(action),
190 }))
191 } else {
192 Ok(Some(Rule::PatternAction {
193 pattern,
194 action: None,
195 }))
196 }
197 }
198
199 fn parse_action(&mut self) -> Result<Action<'a>, ParseError<'a>> {
200 self.next_token(); let mut statements = Vec::new();
203 while self.current_token.kind != TokenKind::RightCurlyBrace
204 && self.current_token.kind != TokenKind::Eof
205 {
206 while self.current_token.kind == TokenKind::NewLine
207 || self.current_token.kind == TokenKind::Semicolon
208 {
209 self.next_token();
210 }
211
212 if self.current_token.kind == TokenKind::RightCurlyBrace
213 || self.current_token.kind == TokenKind::Eof
214 {
215 break;
216 }
217
218 statements.push(self.parse_statement()?);
219 }
220
221 Ok(Action { statements })
222 }
223
224 fn parse_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
225 match self.current_token.kind {
226 TokenKind::Print => Ok(self.parse_print_function()),
227 TokenKind::Printf => self.parse_printf_function(),
228 TokenKind::System => Ok(self.parse_system_function()),
229 TokenKind::Split => Ok(self.parse_split_statement()),
230 TokenKind::Sub => Ok(self.parse_sub_function()),
231 TokenKind::Gsub => Ok(self.parse_gsub_function()),
232 TokenKind::Break => Ok(self.parse_break_statement()),
233 TokenKind::Continue => Ok(self.parse_continue_statement()),
234 TokenKind::Delete => Ok(self.parse_delete_statement()),
235 TokenKind::If => self.parse_if_statement(),
236 TokenKind::Do => self.parse_do_statement(),
237 TokenKind::While => self.parse_while_statement(),
238 TokenKind::For => self.parse_for_statement(),
239 TokenKind::Return => Ok(self.parse_return_statement()),
240 TokenKind::Next => Ok(self.parse_next_statement()),
241 TokenKind::Exit => Ok(self.parse_exit_statement()),
242 TokenKind::Identifier => Ok(self.parse_assignment_statement()),
243 TokenKind::DollarSign => Ok(self.parse_field_assignment_statement()),
244 TokenKind::Increment => Ok(self.parse_pre_increment_statement()),
245 TokenKind::Decrement => Ok(self.parse_pre_decrement_statement()),
246 TokenKind::Number
247 | TokenKind::String
248 | TokenKind::Regex
249 | TokenKind::LeftParen
250 | TokenKind::Close
251 | TokenKind::Cos
252 | TokenKind::Exp
253 | TokenKind::Index
254 | TokenKind::Int
255 | TokenKind::Length
256 | TokenKind::Log
257 | TokenKind::Match
258 | TokenKind::Rand
259 | TokenKind::Sin
260 | TokenKind::Sprintf
261 | TokenKind::Sqrt
262 | TokenKind::Srand
263 | TokenKind::Substr
264 | TokenKind::ToLower
265 | TokenKind::ToUpper => Ok(Statement::Expression(self.parse_expression())),
266 _ => Err(self.unexpected_token("statement")),
267 }
268 }
269
270 fn parse_function_definition(&mut self) -> Result<(), ParseError<'a>> {
271 self.next_token();
272 if self.current_token.kind != TokenKind::Identifier {
273 todo!()
274 }
275 let name = self.current_token.literal;
276 self.next_token();
277 if self.current_token.kind != TokenKind::LeftParen {
278 todo!()
279 }
280 self.next_token();
281
282 let mut parameters = Vec::new();
283 while self.current_token.kind != TokenKind::RightParen {
284 if self.current_token.kind != TokenKind::Identifier {
285 todo!()
286 }
287 parameters.push(self.current_token.literal);
288 self.next_token();
289 if self.current_token.kind == TokenKind::Comma {
290 self.next_token();
291 } else if self.current_token.kind != TokenKind::RightParen {
292 todo!()
293 }
294 }
295
296 self.next_token();
297 while self.current_token.kind == TokenKind::NewLine {
298 self.next_token();
299 }
300 if self.current_token.kind != TokenKind::LeftCurlyBrace {
301 todo!()
302 }
303
304 let mut statements = Vec::new();
305 self.next_token(); while self.current_token.kind != TokenKind::RightCurlyBrace
307 && self.current_token.kind != TokenKind::Eof
308 {
309 while self.current_token.kind == TokenKind::NewLine
310 || self.current_token.kind == TokenKind::Semicolon
311 {
312 self.next_token();
313 }
314
315 if self.current_token.kind == TokenKind::RightCurlyBrace
316 || self.current_token.kind == TokenKind::Eof
317 {
318 break;
319 }
320
321 statements.push(self.parse_statement()?);
322 }
323 self.function_definitions.push(FunctionDefinition {
324 name,
325 parameters,
326 statements,
327 });
328
329 Ok(())
330 }
331
332 fn parse_assignment_statement(&mut self) -> Statement<'a> {
333 let identifier = self.current_token.clone();
334 self.next_token();
335 self.parse_assignment_statement_with_identifier(identifier)
336 }
337
338 fn parse_assignment_statement_with_identifier(
339 &mut self,
340 identifier: Token<'a>,
341 ) -> Statement<'a> {
342 if self.current_token.kind == TokenKind::LeftParen
343 && self.token_is_immediately_after(&identifier)
344 {
345 let args = self.parse_call_arguments();
346 return Statement::Expression(Expression::FunctionCall {
347 name: identifier.literal,
348 args,
349 });
350 }
351 if self.current_token.kind == TokenKind::LeftSquareBracket {
352 self.next_token_in_regex_context();
353 let index = self.parse_array_index_expression();
354 if self.current_token.kind != TokenKind::RightSquareBracket {
355 todo!()
356 }
357 self.next_token();
358 if self.current_token.kind == TokenKind::Assign {
359 self.next_token();
360 let value = self.parse_expression();
361 return Statement::ArrayAssignment {
362 identifier: identifier.literal,
363 index,
364 value,
365 };
366 }
367 if self.current_token.kind == TokenKind::AddAssign {
368 self.next_token();
369 let value = self.parse_expression();
370 return Statement::ArrayAddAssignment {
371 identifier: identifier.literal,
372 index,
373 value,
374 };
375 }
376 if self.current_token.kind == TokenKind::Increment {
377 self.next_token();
378 return Statement::ArrayPostIncrement {
379 identifier: identifier.literal,
380 index,
381 };
382 }
383 if self.current_token.kind == TokenKind::Decrement {
384 self.next_token();
385 return Statement::ArrayPostDecrement {
386 identifier: identifier.literal,
387 index,
388 };
389 }
390 todo!()
391 }
392 if self.current_token.kind == TokenKind::Assign {
393 self.next_token();
394 if self.current_token.kind == TokenKind::Split {
395 return self.parse_split_assignment_statement(identifier.literal);
396 }
397 let value = self.parse_expression();
398 Statement::Assignment {
399 identifier: identifier.literal,
400 value,
401 }
402 } else if self.current_token.kind == TokenKind::Increment {
403 self.next_token();
404 Statement::PostIncrement {
405 identifier: identifier.literal,
406 }
407 } else if self.current_token.kind == TokenKind::Decrement {
408 self.next_token();
409 Statement::PostDecrement {
410 identifier: identifier.literal,
411 }
412 } else if self.current_token.kind == TokenKind::AddAssign {
413 self.next_token();
414 let value = self.parse_expression();
415 Statement::AddAssignment {
416 identifier: identifier.literal,
417 value,
418 }
419 } else if matches!(
420 self.current_token.kind,
421 TokenKind::SubtractAssign
422 | TokenKind::MultiplyAssign
423 | TokenKind::DivideAssign
424 | TokenKind::ModuloAssign
425 | TokenKind::PowerAssign
426 ) {
427 let assign_token = self.current_token.clone();
428 self.next_token();
429 let right_value = self.parse_expression();
430 Statement::Assignment {
431 identifier: identifier.literal,
432 value: Expression::Infix {
433 left: Box::new(Expression::Identifier(identifier.literal)),
434 operator: compound_assign_operator(&assign_token),
435 right: Box::new(right_value),
436 },
437 }
438 } else {
439 todo!()
440 }
441 }
442
443 fn parse_delete_statement(&mut self) -> Statement<'a> {
444 self.next_token();
445 if self.current_token.kind != TokenKind::Identifier {
446 todo!()
447 }
448 let identifier = self.current_token.literal;
449 self.next_token();
450 if self.current_token.kind != TokenKind::LeftSquareBracket {
451 return Statement::Delete {
452 identifier,
453 index: None,
454 };
455 }
456
457 self.next_token_in_regex_context();
458 let index = self.parse_array_index_expression();
459 if self.current_token.kind != TokenKind::RightSquareBracket {
460 todo!()
461 }
462 self.next_token();
463 Statement::Delete {
464 identifier,
465 index: Some(index),
466 }
467 }
468
469 fn parse_break_statement(&mut self) -> Statement<'a> {
470 self.next_token();
471 Statement::Break
472 }
473
474 fn parse_continue_statement(&mut self) -> Statement<'a> {
475 self.next_token();
476 Statement::Continue
477 }
478
479 fn parse_pre_increment_statement(&mut self) -> Statement<'a> {
480 self.next_token();
481 if self.current_token.kind != TokenKind::Identifier {
482 todo!()
483 }
484 let identifier = self.current_token.literal;
485 self.next_token();
486 Statement::PreIncrement { identifier }
487 }
488
489 fn parse_pre_decrement_statement(&mut self) -> Statement<'a> {
490 self.next_token();
491 if self.current_token.kind != TokenKind::Identifier {
492 todo!()
493 }
494 let identifier = self.current_token.literal;
495 self.next_token();
496 Statement::PreDecrement { identifier }
497 }
498
499 fn parse_split_assignment_statement(&mut self, identifier: &'a str) -> Statement<'a> {
500 self.next_token();
501 if self.current_token.kind != TokenKind::LeftParen {
502 todo!()
503 }
504 self.next_token_in_regex_context();
505 let string = self.parse_expression();
506 if self.current_token.kind != TokenKind::Comma {
507 todo!()
508 }
509 self.next_token();
510 if self.current_token.kind != TokenKind::Identifier {
511 todo!()
512 }
513 let array = self.current_token.literal;
514 self.next_token();
515 let separator = if self.current_token.kind == TokenKind::Comma {
516 self.next_token_in_regex_context();
517 Some(self.parse_expression())
518 } else {
519 None
520 };
521 if self.current_token.kind != TokenKind::RightParen {
522 todo!()
523 }
524 self.next_token();
525 Statement::SplitAssignment {
526 identifier,
527 string,
528 array,
529 separator,
530 }
531 }
532
533 fn parse_split_statement(&mut self) -> Statement<'a> {
534 self.next_token();
535 if self.current_token.kind != TokenKind::LeftParen {
536 todo!()
537 }
538 self.next_token_in_regex_context();
539 let string = self.parse_expression();
540 if self.current_token.kind != TokenKind::Comma {
541 todo!()
542 }
543 self.next_token();
544 if self.current_token.kind != TokenKind::Identifier {
545 todo!()
546 }
547 let array = self.current_token.literal;
548 self.next_token();
549 let separator = if self.current_token.kind == TokenKind::Comma {
550 self.next_token_in_regex_context();
551 Some(self.parse_expression())
552 } else {
553 None
554 };
555 if self.current_token.kind != TokenKind::RightParen {
556 todo!()
557 }
558 self.next_token();
559 Statement::Split {
560 string,
561 array,
562 separator,
563 }
564 }
565
566 fn parse_field_assignment_statement(&mut self) -> Statement<'a> {
567 self.next_token();
568 let field = self.parse_primary_expression();
569 let assign_token = self.current_token.clone();
570 self.next_token();
571 let right_value = self.parse_expression();
572
573 let value = if assign_token.kind == TokenKind::Assign {
574 right_value
575 } else {
576 let operator = compound_assign_operator(&assign_token);
577 Expression::Infix {
578 left: Box::new(Expression::Field(Box::new(field.clone()))),
579 operator,
580 right: Box::new(right_value),
581 }
582 };
583 Statement::FieldAssignment { field, value }
584 }
585
586 fn parse_if_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
587 self.next_token();
588 if self.current_token.kind != TokenKind::LeftParen {
589 todo!()
590 }
591 self.next_token_in_regex_context();
592 let condition = self.parse_condition_in_parens();
593 if self.current_token.kind != TokenKind::RightParen {
594 todo!()
595 }
596 self.next_token();
597 let then_statements = self.parse_control_statement_body()?;
598
599 while self.current_token.kind == TokenKind::NewLine
600 || self.current_token.kind == TokenKind::Semicolon
601 {
602 self.next_token();
603 }
604
605 if self.current_token.kind == TokenKind::Else {
606 self.next_token();
607 let else_statements = self.parse_control_statement_body()?;
608 return Ok(Statement::IfElse {
609 condition,
610 then_statements,
611 else_statements,
612 });
613 }
614
615 Ok(Statement::If {
616 condition,
617 then_statements,
618 })
619 }
620
621 fn parse_exit_statement(&mut self) -> Statement<'a> {
622 self.next_token();
623 let status = if self.is_statement_terminator() {
624 None
625 } else {
626 Some(self.parse_expression())
627 };
628 Statement::Exit(status)
629 }
630
631 fn parse_return_statement(&mut self) -> Statement<'a> {
632 self.next_token();
633 let value = if self.is_statement_terminator() {
634 None
635 } else {
636 Some(self.parse_expression())
637 };
638 Statement::Return(value)
639 }
640
641 fn parse_next_statement(&mut self) -> Statement<'a> {
642 self.next_token();
643 Statement::Next
644 }
645
646 fn parse_statement_block(&mut self) -> Result<Vec<Statement<'a>>, ParseError<'a>> {
647 self.next_token(); let mut statements = Vec::new();
649 while self.current_token.kind != TokenKind::RightCurlyBrace
650 && self.current_token.kind != TokenKind::Eof
651 {
652 while self.current_token.kind == TokenKind::NewLine
653 || self.current_token.kind == TokenKind::Semicolon
654 {
655 self.next_token();
656 }
657
658 if self.current_token.kind == TokenKind::RightCurlyBrace
659 || self.current_token.kind == TokenKind::Eof
660 {
661 break;
662 }
663 statements.push(self.parse_statement()?);
664 }
665 if self.current_token.kind == TokenKind::RightCurlyBrace {
666 self.next_token();
667 }
668 Ok(statements)
669 }
670
671 fn parse_control_statement_body(&mut self) -> Result<Vec<Statement<'a>>, ParseError<'a>> {
672 while self.current_token.kind == TokenKind::NewLine {
673 self.next_token();
674 }
675
676 if self.current_token.kind == TokenKind::LeftCurlyBrace {
677 return self.parse_statement_block();
678 }
679
680 if self.current_token.kind == TokenKind::Semicolon {
681 self.next_token();
682 return Ok(vec![Statement::Empty]);
683 }
684
685 Ok(vec![self.parse_statement()?])
686 }
687
688 fn parse_while_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
689 self.next_token();
690 if self.current_token.kind != TokenKind::LeftParen {
691 todo!()
692 }
693 self.next_token_in_regex_context();
694 let condition = self.parse_condition_in_parens();
695 if self.current_token.kind != TokenKind::RightParen {
696 todo!()
697 }
698 self.next_token();
699 let statements = self.parse_control_statement_body()?;
700 Ok(Statement::While {
701 condition,
702 statements,
703 })
704 }
705
706 fn parse_do_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
707 self.next_token();
708 let statements = self.parse_control_statement_body()?;
709
710 while self.current_token.kind == TokenKind::NewLine
711 || self.current_token.kind == TokenKind::Semicolon
712 {
713 self.next_token();
714 }
715
716 if self.current_token.kind != TokenKind::While {
717 todo!()
718 }
719 self.next_token();
720 if self.current_token.kind != TokenKind::LeftParen {
721 todo!()
722 }
723 self.next_token_in_regex_context();
724 let condition = self.parse_condition_in_parens();
725 if self.current_token.kind != TokenKind::RightParen {
726 todo!()
727 }
728 self.next_token();
729 Ok(Statement::DoWhile {
730 condition,
731 statements,
732 })
733 }
734
735 fn parse_for_statement(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
736 self.next_token();
737 if self.current_token.kind != TokenKind::LeftParen {
738 todo!()
739 }
740 self.next_token();
741 while self.current_token.kind == TokenKind::NewLine {
742 self.next_token();
743 }
744
745 let init = if self.current_token.kind == TokenKind::Semicolon {
746 Statement::Empty
747 } else if self.current_token.kind == TokenKind::Identifier {
748 let variable = self.current_token.clone();
749 self.next_token();
750 if self.current_token.kind == TokenKind::In {
751 self.next_token();
752 if self.current_token.kind != TokenKind::Identifier {
753 todo!()
754 }
755 let array = self.current_token.literal;
756 self.next_token();
757 if self.current_token.kind != TokenKind::RightParen {
758 todo!()
759 }
760 self.next_token();
761 let statements = self.parse_control_statement_body()?;
762 return Ok(Statement::ForIn {
763 variable: variable.literal,
764 array,
765 statements,
766 });
767 }
768 self.parse_assignment_statement_with_identifier(variable)
769 } else {
770 self.parse_statement()?
771 };
772 while self.current_token.kind == TokenKind::NewLine {
773 self.next_token();
774 }
775 if self.current_token.kind != TokenKind::Semicolon {
776 todo!()
777 }
778 self.next_token_in_regex_context();
779 while self.current_token.kind == TokenKind::NewLine {
780 self.next_token_in_regex_context();
781 }
782
783 let condition = if self.current_token.kind == TokenKind::Semicolon {
784 Expression::Number(1.0)
785 } else {
786 self.parse_expression()
787 };
788 while self.current_token.kind == TokenKind::NewLine {
789 self.next_token();
790 }
791 if self.current_token.kind != TokenKind::Semicolon {
792 todo!()
793 }
794 self.next_token_in_regex_context();
795 while self.current_token.kind == TokenKind::NewLine {
796 self.next_token_in_regex_context();
797 }
798
799 let update = if self.current_token.kind == TokenKind::RightParen {
800 Statement::Empty
801 } else {
802 self.parse_statement()?
803 };
804 while self.current_token.kind == TokenKind::NewLine {
805 self.next_token();
806 }
807 if self.current_token.kind != TokenKind::RightParen {
808 todo!()
809 }
810 self.next_token();
811 let statements = self.parse_control_statement_body()?;
812
813 Ok(Statement::For {
814 init: Box::new(init),
815 condition,
816 update: Box::new(update),
817 statements,
818 })
819 }
820
821 fn parse_print_function(&mut self) -> Statement<'a> {
822 let mut expressions = Vec::new();
823 let mut expect_more = false;
824 self.next_token();
825
826 loop {
827 if self.current_token.kind == TokenKind::RightCurlyBrace
828 || self.current_token.kind == TokenKind::RightParen
829 || self.current_token.kind == TokenKind::Eof
830 || self.current_token.kind == TokenKind::GreaterThan
831 || self.current_token.kind == TokenKind::Append
832 || self.current_token.kind == TokenKind::Pipe
833 {
834 break;
835 }
836
837 if self.current_token.kind == TokenKind::NewLine
838 || self.current_token.kind == TokenKind::Semicolon
839 {
840 if expect_more {
841 self.next_token();
842 continue;
843 }
844 break;
845 }
846
847 if self.current_token.kind == TokenKind::Comma {
848 self.next_token();
849 expect_more = true;
850 continue;
851 }
852
853 let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
854 let expression = self.parse_expression();
855 expressions.push(expression);
856 if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
857 while self.current_token.kind == TokenKind::Comma {
858 self.next_token();
859 expressions.push(self.parse_expression());
860 }
861 if self.current_token.kind != TokenKind::RightParen {
862 todo!()
863 }
864 self.next_token();
865 }
866 expect_more = false;
867 }
868 if self.current_token.kind == TokenKind::RightParen {
869 self.next_token();
870 }
871
872 if self.current_token.kind == TokenKind::GreaterThan
873 || self.current_token.kind == TokenKind::Append
874 {
875 let append = self.current_token.kind == TokenKind::Append;
876 self.next_token();
877 let target = self.parse_expression();
878 return Statement::PrintRedirect {
879 expressions,
880 target,
881 append,
882 };
883 }
884 if self.current_token.kind == TokenKind::Pipe {
885 self.next_token();
886 let target = self.parse_expression();
887 return Statement::PrintPipe {
888 expressions,
889 target,
890 };
891 }
892
893 Statement::Print(expressions)
894 }
895
896 fn parse_printf_function(&mut self) -> Result<Statement<'a>, ParseError<'a>> {
897 self.next_token();
898 let expressions = if self.current_token.kind == TokenKind::LeftParen {
899 self.next_token_in_regex_context();
900 let mut expressions = Vec::new();
901 while self.current_token.kind != TokenKind::RightParen
902 && self.current_token.kind != TokenKind::Eof
903 {
904 if self.current_token.kind == TokenKind::Comma {
905 self.next_token();
906 continue;
907 }
908 expressions.push(self.parse_expression());
909 }
910 if self.current_token.kind == TokenKind::RightParen {
911 self.next_token();
912 }
913 expressions
914 } else {
915 self.parse_expression_list_until_action_end_from_current()
916 };
917
918 if expressions.is_empty() {
919 return Err(self.missing_printf_format_string());
920 }
921
922 Ok(Statement::Printf(expressions))
923 }
924
925 fn parse_gsub_function(&mut self) -> Statement<'a> {
926 self.next_token();
927 if self.current_token.kind != TokenKind::LeftParen {
928 todo!()
929 }
930
931 self.next_token_in_regex_context();
932 let pattern = self.parse_expression();
933
934 if self.current_token.kind != TokenKind::Comma {
935 todo!()
936 }
937 self.next_token();
938 let replacement = self.parse_expression();
939
940 let target = if self.current_token.kind == TokenKind::Comma {
941 self.next_token();
942 Some(self.parse_expression())
943 } else {
944 None
945 };
946
947 if self.current_token.kind != TokenKind::RightParen {
948 todo!()
949 }
950 self.next_token();
951
952 Statement::Gsub {
953 pattern,
954 replacement,
955 target,
956 }
957 }
958
959 fn parse_sub_function(&mut self) -> Statement<'a> {
960 self.next_token();
961 if self.current_token.kind != TokenKind::LeftParen {
962 todo!()
963 }
964
965 self.next_token_in_regex_context();
966 let pattern = self.parse_expression();
967
968 if self.current_token.kind != TokenKind::Comma {
969 todo!()
970 }
971 self.next_token();
972 let replacement = self.parse_expression();
973
974 if self.current_token.kind == TokenKind::Comma {
975 todo!()
976 }
977
978 if self.current_token.kind != TokenKind::RightParen {
979 todo!()
980 }
981 self.next_token();
982
983 Statement::Sub {
984 pattern,
985 replacement,
986 }
987 }
988
989 fn parse_system_function(&mut self) -> Statement<'a> {
990 self.next_token();
991 if self.current_token.kind != TokenKind::LeftParen {
992 todo!()
993 }
994 self.next_token();
995 let command = self.parse_expression();
996 if self.current_token.kind != TokenKind::RightParen {
997 todo!()
998 }
999 self.next_token();
1000 Statement::System(command)
1001 }
1002
1003 fn parse_expression_list_until_action_end_from_current(&mut self) -> Vec<Expression<'a>> {
1004 let mut expressions = Vec::new();
1005 let mut expect_more = false;
1006
1007 loop {
1008 if self.current_token.kind == TokenKind::RightCurlyBrace
1009 || self.current_token.kind == TokenKind::RightParen
1010 || self.current_token.kind == TokenKind::Eof
1011 {
1012 break;
1013 }
1014
1015 if self.current_token.kind == TokenKind::NewLine
1016 || self.current_token.kind == TokenKind::Semicolon
1017 {
1018 if expect_more {
1019 self.next_token();
1020 continue;
1021 }
1022 break;
1023 }
1024
1025 if self.current_token.kind == TokenKind::Comma {
1026 self.next_token();
1027 expect_more = true;
1028 continue;
1029 }
1030
1031 let started_with_left_paren = self.current_token.kind == TokenKind::LeftParen;
1032 let expression = self.parse_expression();
1033 expressions.push(expression);
1034 if started_with_left_paren && self.current_token.kind == TokenKind::Comma {
1035 while self.current_token.kind == TokenKind::Comma {
1036 self.next_token();
1037 expressions.push(self.parse_expression());
1038 }
1039 if self.current_token.kind != TokenKind::RightParen {
1040 todo!()
1041 }
1042 self.next_token();
1043 }
1044 expect_more = false;
1045 }
1046
1047 if self.current_token.kind == TokenKind::RightParen {
1048 self.next_token();
1049 }
1050
1051 expressions
1052 }
1053
1054 fn parse_expression(&mut self) -> Expression<'a> {
1055 self.parse_expression_with_min_precedence(0)
1056 }
1057
1058 fn parse_expression_with_min_precedence(&mut self, min_precedence: u8) -> Expression<'a> {
1059 let left = self.parse_primary_expression();
1060 self.parse_expression_suffix(left, min_precedence)
1061 }
1062
1063 fn parse_expression_suffix(
1064 &mut self,
1065 mut left: Expression<'a>,
1066 min_precedence: u8,
1067 ) -> Expression<'a> {
1068 const CONCAT_LEFT_PRECEDENCE: u8 = 6;
1069 const CONCAT_RIGHT_PRECEDENCE: u8 = 7;
1070
1071 loop {
1072 if self.current_token.kind == TokenKind::QuestionMark {
1073 if min_precedence > 0 {
1074 break;
1075 }
1076 self.next_token_in_regex_context();
1077 let then_expr = self.parse_expression_with_min_precedence(0);
1078 if self.current_token.kind != TokenKind::Colon {
1079 todo!()
1080 }
1081 self.next_token_in_regex_context();
1082 let else_expr = self.parse_expression_with_min_precedence(0);
1083 left = Expression::Ternary {
1084 condition: Box::new(left),
1085 then_expr: Box::new(then_expr),
1086 else_expr: Box::new(else_expr),
1087 };
1088 continue;
1089 }
1090
1091 if infix_operator_precedence(&self.current_token.kind).is_none()
1092 && is_expression_start(&self.current_token.kind)
1093 {
1094 if CONCAT_LEFT_PRECEDENCE < min_precedence {
1095 break;
1096 }
1097
1098 let right = self.parse_expression_with_min_precedence(CONCAT_RIGHT_PRECEDENCE);
1099 left = Expression::Concatenation {
1100 left: Box::new(left),
1101 right: Box::new(right),
1102 };
1103 continue;
1104 }
1105
1106 let (left_precedence, right_precedence) =
1107 match infix_operator_precedence(&self.current_token.kind) {
1108 Some(value) => value,
1109 None => break,
1110 };
1111
1112 if left_precedence < min_precedence {
1113 break;
1114 }
1115
1116 let operator = self.current_token.clone();
1117 if matches!(
1118 operator.kind,
1119 TokenKind::Tilde | TokenKind::NoMatch | TokenKind::And | TokenKind::Or
1120 ) {
1121 self.next_token_in_regex_context();
1122 } else {
1123 self.next_token();
1124 }
1125 let right = self.parse_expression_with_min_precedence(right_precedence);
1126
1127 left = Expression::Infix {
1128 left: Box::new(left),
1129 operator,
1130 right: Box::new(right),
1131 };
1132 }
1133
1134 left
1135 }
1136
1137 fn parse_condition_in_parens(&mut self) -> Expression<'a> {
1138 let mut condition = self.parse_expression();
1139 if self.current_token.kind == TokenKind::Comma {
1140 while self.current_token.kind == TokenKind::Comma {
1141 let operator = self.current_token.clone();
1142 self.next_token_in_regex_context();
1143 let right = self.parse_expression();
1144 condition = Expression::Infix {
1145 left: Box::new(condition),
1146 operator,
1147 right: Box::new(right),
1148 };
1149 }
1150 if self.current_token.kind != TokenKind::RightParen {
1151 todo!()
1152 }
1153 self.next_token();
1154 condition = self.parse_expression_suffix(condition, 0);
1155 }
1156 condition
1157 }
1158
1159 fn parse_primary_expression(&mut self) -> Expression<'a> {
1160 if self.current_token.kind == TokenKind::Minus {
1161 let operator = self.current_token.clone();
1162 self.next_token();
1163 let right = self.parse_primary_expression();
1164 return Expression::Infix {
1165 left: Box::new(Expression::Number(0.0)),
1166 operator,
1167 right: Box::new(right),
1168 };
1169 }
1170 if self.current_token.kind == TokenKind::Plus {
1171 self.next_token();
1172 return self.parse_primary_expression();
1173 }
1174 if self.current_token.kind == TokenKind::ExclamationMark {
1175 self.next_token_in_regex_context();
1176 let expression = self.parse_primary_expression();
1177 return Expression::Not(Box::new(expression));
1178 }
1179 if self.current_token.kind == TokenKind::Increment {
1180 self.next_token();
1181 let expression = self.parse_primary_expression();
1182 return Expression::PreIncrement(Box::new(expression));
1183 }
1184 if self.current_token.kind == TokenKind::Decrement {
1185 self.next_token();
1186 let expression = self.parse_primary_expression();
1187 return Expression::PreDecrement(Box::new(expression));
1188 }
1189
1190 let mut expression = self.parse_primary_atom();
1191 if self.current_token.kind == TokenKind::Increment {
1192 self.next_token();
1193 expression = Expression::PostIncrement(Box::new(expression));
1194 } else if self.current_token.kind == TokenKind::Decrement {
1195 self.next_token();
1196 expression = Expression::PostDecrement(Box::new(expression));
1197 }
1198 expression
1199 }
1200
1201 fn parse_primary_atom(&mut self) -> Expression<'a> {
1202 match self.current_token.kind {
1203 TokenKind::String => {
1204 let expression = Expression::String(self.current_token.literal);
1205 self.next_token();
1206 expression
1207 }
1208 TokenKind::Regex => {
1209 let expression = Expression::Regex(self.current_token.literal);
1210 self.next_token();
1211 expression
1212 }
1213 TokenKind::Number => {
1214 let expression = self.parse_number_expression().unwrap_or_else(|| {
1215 panic!(
1216 "failed to parse numeric literal: {}",
1217 self.current_token.literal
1218 )
1219 });
1220 self.next_token();
1221 expression
1222 }
1223 TokenKind::DollarSign => {
1224 self.next_token();
1225 let expression = self.parse_primary_atom();
1226 Expression::Field(Box::new(expression))
1227 }
1228 TokenKind::LeftParen => {
1229 self.next_token();
1230 let expression = self.parse_expression();
1231 if self.current_token.kind == TokenKind::RightParen {
1232 self.next_token();
1233 }
1234 expression
1235 }
1236 TokenKind::Identifier => {
1237 let identifier = self.current_token.clone();
1238 self.next_token();
1239 if self.current_token.kind == TokenKind::LeftParen
1240 && self.token_is_immediately_after(&identifier)
1241 {
1242 let args = self.parse_call_arguments();
1243 return Expression::FunctionCall {
1244 name: identifier.literal,
1245 args,
1246 };
1247 }
1248 if self.current_token.kind == TokenKind::LeftSquareBracket {
1249 self.next_token_in_regex_context();
1250 let index = self.parse_array_index_expression();
1251 if self.current_token.kind != TokenKind::RightSquareBracket {
1252 todo!()
1253 }
1254 self.next_token();
1255 Expression::ArrayAccess {
1256 identifier: identifier.literal,
1257 index: Box::new(index),
1258 }
1259 } else {
1260 Expression::Identifier(identifier.literal)
1261 }
1262 }
1263 TokenKind::Length => {
1264 self.next_token();
1265 if self.current_token.kind == TokenKind::LeftParen {
1266 self.next_token();
1267 if self.current_token.kind == TokenKind::RightParen {
1268 self.next_token();
1269 Expression::Length(None)
1270 } else {
1271 let expression = self.parse_expression();
1272 if self.current_token.kind != TokenKind::RightParen {
1273 todo!()
1274 }
1275 self.next_token();
1276 Expression::Length(Some(Box::new(expression)))
1277 }
1278 } else {
1279 Expression::Length(None)
1280 }
1281 }
1282 TokenKind::Substr => {
1283 self.next_token();
1284 if self.current_token.kind != TokenKind::LeftParen {
1285 todo!()
1286 }
1287 self.next_token();
1288 let string = self.parse_expression();
1289 if self.current_token.kind != TokenKind::Comma {
1290 todo!()
1291 }
1292 self.next_token();
1293 let start = self.parse_expression();
1294 let mut length = None;
1295 if self.current_token.kind == TokenKind::Comma {
1296 self.next_token();
1297 length = Some(Box::new(self.parse_expression()));
1298 }
1299 if self.current_token.kind != TokenKind::RightParen {
1300 todo!()
1301 }
1302 self.next_token();
1303 Expression::Substr {
1304 string: Box::new(string),
1305 start: Box::new(start),
1306 length,
1307 }
1308 }
1309 TokenKind::Rand => {
1310 self.next_token();
1311 if self.current_token.kind == TokenKind::LeftParen {
1312 self.next_token();
1313 if self.current_token.kind != TokenKind::RightParen {
1314 todo!()
1315 }
1316 self.next_token();
1317 }
1318 Expression::Rand
1319 }
1320 TokenKind::Close
1321 | TokenKind::Cos
1322 | TokenKind::Exp
1323 | TokenKind::Index
1324 | TokenKind::Int
1325 | TokenKind::Log
1326 | TokenKind::Match
1327 | TokenKind::Sin
1328 | TokenKind::Sprintf
1329 | TokenKind::Split
1330 | TokenKind::Sqrt
1331 | TokenKind::Srand => {
1332 let name = self.current_token.literal;
1333 self.next_token();
1334 if self.current_token.kind == TokenKind::LeftParen {
1335 let args = self.parse_call_arguments();
1336 return Expression::FunctionCall { name, args };
1337 }
1338 Expression::Number(0.0)
1339 }
1340 _ => {
1341 panic!(
1342 "parse_primary_expression not yet implemented, found token: {:?}",
1343 self.current_token
1344 )
1345 }
1346 }
1347 }
1348
1349 pub fn try_parse_program(&mut self) -> Result<Program<'_>, ParseError<'a>> {
1350 let mut program = Program::new();
1351
1352 while !self.is_eof() {
1353 match self.parse_next_rule()? {
1354 Some(Rule::Begin(action)) => program.add_begin_block(action),
1355 Some(Rule::End(action)) => program.add_end_block(action),
1356 Some(rule) => program.add_rule(rule),
1357 None => {}
1358 }
1359 self.next_token_in_regex_context();
1360 }
1361
1362 for definition in self.function_definitions.drain(..) {
1363 program.add_function_definition(definition);
1364 }
1365
1366 Ok(program)
1367 }
1368
1369 pub fn parse_program(&mut self) -> Program<'_> {
1370 self.try_parse_program()
1371 .unwrap_or_else(|err| panic!("{err}"))
1372 }
1373
1374 fn parse_call_arguments(&mut self) -> Vec<Expression<'a>> {
1375 if self.current_token.kind != TokenKind::LeftParen {
1376 return vec![];
1377 }
1378 self.next_token_in_regex_context();
1379 let mut args = Vec::new();
1380 while self.current_token.kind != TokenKind::RightParen
1381 && self.current_token.kind != TokenKind::Eof
1382 {
1383 if self.current_token.kind == TokenKind::Comma {
1384 self.next_token();
1385 continue;
1386 }
1387 args.push(self.parse_expression());
1388 }
1389 if self.current_token.kind == TokenKind::RightParen {
1390 self.next_token();
1391 }
1392 args
1393 }
1394}
1395
1396fn infix_operator_precedence(kind: &TokenKind) -> Option<(u8, u8)> {
1397 match kind {
1398 TokenKind::Assign
1399 | TokenKind::AddAssign
1400 | TokenKind::SubtractAssign
1401 | TokenKind::MultiplyAssign
1402 | TokenKind::DivideAssign
1403 | TokenKind::ModuloAssign
1404 | TokenKind::PowerAssign => Some((0, 0)),
1405 TokenKind::Or => Some((1, 2)),
1406 TokenKind::And => Some((3, 4)),
1407 TokenKind::Equal
1408 | TokenKind::NotEqual
1409 | TokenKind::GreaterThan
1410 | TokenKind::GreaterThanOrEqual
1411 | TokenKind::In
1412 | TokenKind::LessThan
1413 | TokenKind::LessThanOrEqual
1414 | TokenKind::Tilde
1415 | TokenKind::NoMatch => Some((5, 6)),
1416 TokenKind::Plus | TokenKind::Minus => Some((7, 8)),
1417 TokenKind::Asterisk | TokenKind::Division | TokenKind::Percent => Some((9, 10)),
1418 TokenKind::Caret => Some((13, 12)),
1419 _ => None,
1420 }
1421}
1422
1423fn is_expression_start(kind: &TokenKind) -> bool {
1424 matches!(
1425 kind,
1426 TokenKind::String
1427 | TokenKind::Regex
1428 | TokenKind::Number
1429 | TokenKind::DollarSign
1430 | TokenKind::LeftParen
1431 | TokenKind::Identifier
1432 | TokenKind::Cos
1433 | TokenKind::Exp
1434 | TokenKind::Index
1435 | TokenKind::Int
1436 | TokenKind::Length
1437 | TokenKind::Log
1438 | TokenKind::Match
1439 | TokenKind::Rand
1440 | TokenKind::Sin
1441 | TokenKind::Sprintf
1442 | TokenKind::Split
1443 | TokenKind::Sqrt
1444 | TokenKind::Srand
1445 | TokenKind::Substr
1446 | TokenKind::Increment
1447 | TokenKind::Decrement
1448 )
1449}
1450
1451fn compound_assign_operator(token: &Token<'_>) -> Token<'static> {
1452 let (kind, literal) = match token.kind {
1453 TokenKind::AddAssign => (TokenKind::Plus, "+"),
1454 TokenKind::SubtractAssign => (TokenKind::Minus, "-"),
1455 TokenKind::MultiplyAssign => (TokenKind::Asterisk, "*"),
1456 TokenKind::DivideAssign => (TokenKind::Division, "/"),
1457 TokenKind::ModuloAssign => (TokenKind::Percent, "%"),
1458 TokenKind::PowerAssign => (TokenKind::Caret, "^"),
1459 _ => todo!(),
1460 };
1461
1462 Token::new(kind, literal, token.span.start)
1463}
1464
1465#[cfg(test)]
1466mod tests {
1467 use super::*;
1468
1469 #[test]
1470 fn create_parser() {
1471 let mut parser = Parser::new(Lexer::new("42 == 42"));
1472
1473 assert_eq!(parser.current_token.literal, "42");
1474 parser.next_token();
1475 assert_eq!(parser.current_token.literal, "==");
1476 }
1477
1478 #[test]
1479 fn parse_empty_program() {
1480 let mut parser = Parser::new(Lexer::new(""));
1481
1482 let program = parser.parse_program();
1483
1484 assert_eq!(program.len(), 0);
1485 }
1486
1487 #[test]
1488 fn parse_statement_with_unhandled_token_returns_parse_error() {
1489 let mut parser = Parser::new(Lexer::new("BEGIN { else }"));
1490
1491 let err = parser
1492 .try_parse_program()
1493 .expect_err("expected parse error for stray else");
1494
1495 assert_eq!(
1496 err.kind,
1497 ParseErrorKind::UnexpectedToken {
1498 expected: "statement"
1499 }
1500 );
1501 assert_eq!(err.token.kind, TokenKind::Else);
1502 }
1503
1504 #[test]
1505 fn parse_action_without_pattern() {
1506 let mut parser = Parser::new(Lexer::new("{ print }"));
1507
1508 let program = parser.parse_program();
1509
1510 assert_eq!(program.len(), 1);
1511 assert_eq!("{ print }", program.to_string());
1512 }
1513
1514 #[test]
1515 fn parse_action_with_leading_newlines() {
1516 let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
1517
1518 let program = parser.parse_program();
1519
1520 assert_eq!(program.len(), 1);
1521 assert_eq!("{ print }", program.to_string());
1522 }
1523
1524 #[test]
1525 fn parse_begin_block() {
1526 let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
1527
1528 let program = parser.parse_program();
1529
1530 assert_eq!(program.len(), 1);
1531 assert_eq!("BEGIN { print }", program.to_string());
1532 }
1533
1534 #[test]
1535 fn parse_end_block() {
1536 let mut parser = Parser::new(Lexer::new("END { print 42 }"));
1537
1538 let program = parser.parse_program();
1539
1540 assert_eq!(program.len(), 1);
1541 assert_eq!("END { print 42 }", program.to_string());
1542 }
1543
1544 #[test]
1545 fn parse_regex_pattern_action() {
1546 let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
1547
1548 let program = parser.parse_program();
1549
1550 assert_eq!(program.len(), 1);
1551 assert_eq!("/foo/ { print }", program.to_string());
1552 }
1553
1554 #[test]
1555 fn parse_print_infix_expression() {
1556 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
1557
1558 let program = parser.parse_program();
1559 let mut begin_blocks = program.begin_blocks_iter();
1560 let Action { statements } = begin_blocks.next().expect("expected begin block");
1561
1562 let exprs = match &statements[0] {
1563 Statement::Print(expressions) => expressions,
1564 _ => panic!("expected print statement"),
1565 };
1566
1567 match &exprs[0] {
1568 Expression::Infix {
1569 left,
1570 operator,
1571 right,
1572 } => {
1573 assert!(matches!(**left, Expression::Number(1.0)));
1574 assert_eq!(operator.kind, TokenKind::Plus);
1575 assert!(matches!(**right, Expression::Number(2.0)));
1576 }
1577 _ => panic!("expected infix expression"),
1578 }
1579 }
1580
1581 #[test]
1582 fn parse_print_parenthesized_expression() {
1583 let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
1584
1585 let program = parser.parse_program();
1586 let mut begin_blocks = program.begin_blocks_iter();
1587 let Action { statements } = begin_blocks.next().expect("expected begin block");
1588
1589 let exprs = match &statements[0] {
1590 Statement::Print(expressions) => expressions,
1591 _ => panic!("expected print statement"),
1592 };
1593
1594 match &exprs[0] {
1595 Expression::Infix {
1596 left,
1597 operator,
1598 right,
1599 } => {
1600 assert_eq!(operator.kind, TokenKind::Asterisk);
1601 assert!(matches!(**right, Expression::Number(3.0)));
1602 assert!(matches!(**left, Expression::Infix { .. }));
1603 }
1604 _ => panic!("expected infix expression"),
1605 }
1606 }
1607
1608 #[test]
1609 fn parse_print_multiplication_has_higher_precedence_than_addition() {
1610 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 * 3 }"));
1611
1612 let program = parser.parse_program();
1613 let mut begin_blocks = program.begin_blocks_iter();
1614 let Action { statements } = begin_blocks.next().expect("expected begin block");
1615
1616 let exprs = match &statements[0] {
1617 Statement::Print(expressions) => expressions,
1618 _ => panic!("expected print statement"),
1619 };
1620
1621 match &exprs[0] {
1622 Expression::Infix {
1623 left,
1624 operator,
1625 right,
1626 } => {
1627 assert_eq!(operator.kind, TokenKind::Plus);
1628 assert!(matches!(**left, Expression::Number(1.0)));
1629 match &**right {
1630 Expression::Infix {
1631 operator: right_op, ..
1632 } => assert_eq!(right_op.kind, TokenKind::Asterisk),
1633 _ => panic!("expected nested infix expression"),
1634 }
1635 }
1636 _ => panic!("expected infix expression"),
1637 }
1638 }
1639
1640 #[test]
1641 fn parse_print_power_is_right_associative() {
1642 let mut parser = Parser::new(Lexer::new("BEGIN { print 2 ^ 3 ^ 2 }"));
1643
1644 let program = parser.parse_program();
1645 let mut begin_blocks = program.begin_blocks_iter();
1646 let Action { statements } = begin_blocks.next().expect("expected begin block");
1647
1648 let exprs = match &statements[0] {
1649 Statement::Print(expressions) => expressions,
1650 _ => panic!("expected print statement"),
1651 };
1652
1653 match &exprs[0] {
1654 Expression::Infix {
1655 left,
1656 operator,
1657 right,
1658 } => {
1659 assert_eq!(operator.kind, TokenKind::Caret);
1660 assert!(matches!(**left, Expression::Number(2.0)));
1661 match &**right {
1662 Expression::Infix {
1663 operator: right_op, ..
1664 } => assert_eq!(right_op.kind, TokenKind::Caret),
1665 _ => panic!("expected nested infix expression"),
1666 }
1667 }
1668 _ => panic!("expected infix expression"),
1669 }
1670 }
1671
1672 #[test]
1673 fn parse_print_minus_is_left_associative() {
1674 let mut parser = Parser::new(Lexer::new("BEGIN { print 5 - 3 - 1 }"));
1675
1676 let program = parser.parse_program();
1677 let mut begin_blocks = program.begin_blocks_iter();
1678 let Action { statements } = begin_blocks.next().expect("expected begin block");
1679
1680 let exprs = match &statements[0] {
1681 Statement::Print(expressions) => expressions,
1682 _ => panic!("expected print statement"),
1683 };
1684
1685 match &exprs[0] {
1686 Expression::Infix {
1687 left,
1688 operator,
1689 right,
1690 } => {
1691 assert_eq!(operator.kind, TokenKind::Minus);
1692 match &**left {
1693 Expression::Infix {
1694 operator: left_op, ..
1695 } => assert_eq!(left_op.kind, TokenKind::Minus),
1696 _ => panic!("expected nested infix expression"),
1697 }
1698 assert!(matches!(**right, Expression::Number(1.0)));
1699 }
1700 _ => panic!("expected infix expression"),
1701 }
1702 }
1703
1704 #[test]
1705 fn parse_print_concatenation() {
1706 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
1707
1708 let program = parser.parse_program();
1709 let mut begin_blocks = program.begin_blocks_iter();
1710 let Action { statements } = begin_blocks.next().expect("expected begin block");
1711
1712 let exprs = match &statements[0] {
1713 Statement::Print(expressions) => expressions,
1714 _ => panic!("expected print statement"),
1715 };
1716
1717 assert_eq!(exprs.len(), 1);
1718 match &exprs[0] {
1719 Expression::Concatenation { left, right } => {
1720 assert!(matches!(**left, Expression::String("Value:")));
1721 assert!(matches!(**right, Expression::Number(42.0)));
1722 }
1723 _ => panic!("expected concatenation expression"),
1724 }
1725 }
1726
1727 #[test]
1728 fn parse_continue_statement() {
1729 let mut parser = Parser::new(Lexer::new(r#"{ continue }"#));
1730
1731 let program = parser.parse_program();
1732 let mut rules = program.rules_iter();
1733 let rule = rules.next().expect("expected rule");
1734
1735 let statements = match rule {
1736 Rule::Action(Action { statements }) => statements,
1737 _ => panic!("expected action rule"),
1738 };
1739
1740 assert!(matches!(statements[0], Statement::Continue));
1741 }
1742
1743 #[test]
1744 fn parse_identifier_followed_by_spaced_parentheses_as_concatenation() {
1745 let mut parser = Parser::new(Lexer::new(r#"{ x = $1; print x (++i) }"#));
1746
1747 let program = parser.parse_program();
1748 let mut rules = program.rules_iter();
1749 let rule = rules.next().expect("expected rule");
1750
1751 let statements = match rule {
1752 Rule::Action(Action { statements }) => statements,
1753 _ => panic!("expected action rule"),
1754 };
1755
1756 let exprs = match &statements[1] {
1757 Statement::Print(expressions) => expressions,
1758 _ => panic!("expected print statement"),
1759 };
1760
1761 assert_eq!(exprs.len(), 1);
1762 match &exprs[0] {
1763 Expression::Concatenation { left, right } => {
1764 assert!(matches!(**left, Expression::Identifier("x")));
1765 assert!(matches!(**right, Expression::PreIncrement(_)));
1766 }
1767 _ => panic!("expected concatenation expression"),
1768 }
1769 }
1770
1771 #[test]
1772 fn parse_print_field_expression() {
1773 let mut parser = Parser::new(Lexer::new("{ print $1 }"));
1774
1775 let program = parser.parse_program();
1776 let mut rules = program.rules_iter();
1777 let rule = rules.next().expect("expected rule");
1778
1779 let statements = match rule {
1780 Rule::Action(Action { statements }) => statements,
1781 _ => panic!("expected action rule"),
1782 };
1783
1784 let exprs = match &statements[0] {
1785 Statement::Print(expressions) => expressions,
1786 _ => panic!("expected print statement"),
1787 };
1788
1789 match &exprs[0] {
1790 Expression::Field(inner) => assert!(matches!(**inner, Expression::Number(1.0))),
1791 _ => panic!("expected field expression"),
1792 }
1793 }
1794
1795 #[test]
1796 fn parse_print_with_commas() {
1797 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:", 42, $1 }"#));
1798
1799 let program = parser.parse_program();
1800
1801 assert_eq!(r#"BEGIN { print "Value:", 42, $1 }"#, program.to_string());
1802 }
1803
1804 #[test]
1805 fn parse_number_of_fields_identifier() {
1806 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print NF }"#));
1807
1808 let program = parser.parse_program();
1809
1810 assert_eq!(r#"BEGIN { print NF }"#, program.to_string());
1811 }
1812
1813 #[test]
1814 fn parse_printf_with_format_and_arguments() {
1815 let mut parser = Parser::new(Lexer::new(r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#));
1816
1817 let program = parser.parse_program();
1818
1819 assert_eq!(
1820 r#"{ printf "[%10s] [%-16d]\n", $1, $3 }"#,
1821 program.to_string()
1822 );
1823 }
1824
1825 #[test]
1826 fn parse_print_ternary_expression() {
1827 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print x ? y : z }"#));
1828
1829 let program = parser.parse_program();
1830 let mut begin_blocks = program.begin_blocks_iter();
1831 let Action { statements } = begin_blocks.next().expect("expected begin block");
1832
1833 let exprs = match &statements[0] {
1834 Statement::Print(expressions) => expressions,
1835 _ => panic!("expected print statement"),
1836 };
1837
1838 assert_eq!(exprs.len(), 1);
1839 match &exprs[0] {
1840 Expression::Ternary {
1841 condition,
1842 then_expr,
1843 else_expr,
1844 } => {
1845 assert!(matches!(**condition, Expression::Identifier("x")));
1846 assert!(matches!(**then_expr, Expression::Identifier("y")));
1847 assert!(matches!(**else_expr, Expression::Identifier("z")));
1848 }
1849 _ => panic!("expected ternary expression"),
1850 }
1851 }
1852
1853 #[test]
1854 fn parse_printf_without_arguments_returns_parse_error() {
1855 let mut parser = Parser::new(Lexer::new(r#"{ printf }"#));
1856
1857 let err = parser
1858 .try_parse_program()
1859 .expect_err("expected parse error for printf without arguments");
1860
1861 assert_eq!(err.kind, ParseErrorKind::MissingPrintfFormatString);
1862 }
1863
1864 #[test]
1865 fn parse_printf_without_arguments_in_parentheses_returns_parse_error() {
1866 let mut parser = Parser::new(Lexer::new(r#"{ printf() }"#));
1867
1868 let err = parser
1869 .try_parse_program()
1870 .expect_err("expected parse error for empty printf call");
1871
1872 assert_eq!(err.kind, ParseErrorKind::MissingPrintfFormatString);
1873 }
1874
1875 #[test]
1876 fn parse_add_assignment_and_pre_increment() {
1877 let mut parser = Parser::new(Lexer::new(r#"/Asia/ { pop += $3; ++n }"#));
1878
1879 let program = parser.parse_program();
1880
1881 assert_eq!(r#"/Asia/ { pop += $3; ++n }"#, program.to_string());
1882 }
1883
1884 #[test]
1885 fn parse_regex_match_pattern_action() {
1886 let mut parser = Parser::new(Lexer::new(r#"$4 ~ /Asia/ { print $1 }"#));
1887
1888 let program = parser.parse_program();
1889
1890 assert_eq!(r#"$4 ~ /Asia/ { print $1 }"#, program.to_string());
1891 }
1892
1893 #[test]
1894 fn parse_not_pattern_action() {
1895 let mut parser = Parser::new(Lexer::new(r#"!($1 < 2000) { print $1 }"#));
1896
1897 let program = parser.parse_program();
1898 let mut rules = program.rules_iter();
1899 let rule = rules.next().expect("expected rule");
1900
1901 match rule {
1902 Rule::PatternAction {
1903 pattern: Some(Expression::Not(inner)),
1904 action: Some(Action { statements }),
1905 } => {
1906 assert!(matches!(**inner, Expression::Infix { .. }));
1907 assert!(matches!(statements[0], Statement::Print(_)));
1908 }
1909 _ => panic!("expected negated pattern action"),
1910 }
1911 }
1912
1913 #[test]
1914 fn parse_print_with_line_continuation_after_comma() {
1915 let mut parser = Parser::new(Lexer::new(
1916 "END { print \"population of\", n,\\\n\"Asian countries in millions is\", pop }",
1917 ));
1918
1919 let program = parser.parse_program();
1920
1921 assert_eq!(
1922 "END { print \"population of\", n, \"Asian countries in millions is\", pop }",
1923 program.to_string()
1924 );
1925 }
1926
1927 #[test]
1928 fn parse_gsub_statement() {
1929 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/USA/, "United States"); print }"#));
1930
1931 let program = parser.parse_program();
1932
1933 assert_eq!(
1934 r#"{ gsub(/USA/, "United States"); print }"#,
1935 program.to_string()
1936 );
1937 }
1938
1939 #[test]
1940 fn parse_gsub_statement_with_target() {
1941 let mut parser = Parser::new(Lexer::new(r#"{ gsub(/[ \t]+/, "", t) }"#));
1942
1943 let program = parser.parse_program();
1944
1945 assert_eq!(r#"{ gsub(/[ \t]+/, "", t) }"#, program.to_string());
1946 }
1947
1948 #[test]
1949 fn parse_system_statement() {
1950 let mut parser = Parser::new(Lexer::new(r#"{ system("cat " $2) }"#));
1951
1952 let program = parser.parse_program();
1953
1954 assert_eq!(r#"{ system("cat " $2) }"#, program.to_string());
1955 }
1956
1957 #[test]
1958 fn parse_print_length_builtin_expression() {
1959 let mut parser = Parser::new(Lexer::new(r#"{ print length, $0 }"#));
1960
1961 let program = parser.parse_program();
1962
1963 assert_eq!(r#"{ print length, $0 }"#, program.to_string());
1964 }
1965
1966 #[test]
1967 fn parse_length_expression_as_rule_pattern() {
1968 let mut parser = Parser::new(Lexer::new(
1969 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1970 ));
1971
1972 let program = parser.parse_program();
1973
1974 assert_eq!(
1975 r#"length($1) > max { max = length($1); name = $1 } END { print name }"#,
1976 program.to_string()
1977 );
1978 }
1979
1980 #[test]
1981 fn parse_field_assignment_with_substr() {
1982 let mut parser = Parser::new(Lexer::new(r#"{ $1 = substr($1, 1, 3); print }"#));
1983
1984 let program = parser.parse_program();
1985
1986 assert_eq!(r#"{ $1 = substr($1, 1, 3); print }"#, program.to_string());
1987 }
1988
1989 #[test]
1990 fn parse_assignment_with_concatenation_and_substr() {
1991 let mut parser = Parser::new(Lexer::new(r#"{ s = s " " substr($1, 1, 3) }"#));
1992
1993 let program = parser.parse_program();
1994
1995 assert_eq!(r#"{ s = s " " substr($1, 1, 3) }"#, program.to_string());
1996 }
1997
1998 #[test]
1999 fn parse_field_divide_assignment() {
2000 let mut parser = Parser::new(Lexer::new(r#"{ $2 /= 1000; print }"#));
2001
2002 let program = parser.parse_program();
2003
2004 assert_eq!(r#"{ $2 = $2 / 1000; print }"#, program.to_string());
2005 }
2006
2007 #[test]
2008 fn parse_chained_assignment() {
2009 let mut parser = Parser::new(Lexer::new(r#"BEGIN { FS = OFS = "\t" }"#));
2010
2011 let program = parser.parse_program();
2012
2013 assert_eq!(r#"BEGIN { FS = OFS = "\t" }"#, program.to_string());
2014 }
2015
2016 #[test]
2017 fn parse_if_statement_with_block() {
2018 let mut parser = Parser::new(Lexer::new(
2019 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
2020 ));
2021
2022 let program = parser.parse_program();
2023
2024 assert_eq!(
2025 r#"{ if (maxpop < $3) { maxpop = $3; country = $1 } }"#,
2026 program.to_string()
2027 );
2028 }
2029
2030 #[test]
2031 fn parse_while_with_post_increment() {
2032 let mut parser = Parser::new(Lexer::new(
2033 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
2034 ));
2035
2036 let program = parser.parse_program();
2037
2038 assert_eq!(
2039 r#"{ i = 1; while (i <= NF) { print $i; i++ } }"#,
2040 program.to_string()
2041 );
2042 }
2043
2044 #[test]
2045 fn parse_while_with_single_body_statement() {
2046 let mut parser = Parser::new(Lexer::new(r#"{ while (n > 1) print n }"#));
2047
2048 let program = parser.parse_program();
2049
2050 assert_eq!(r#"{ while (n > 1) { print n } }"#, program.to_string());
2051 }
2052
2053 #[test]
2054 fn parse_do_while_with_post_increment() {
2055 let mut parser = Parser::new(Lexer::new(
2056 r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
2057 ));
2058
2059 let program = parser.parse_program();
2060
2061 assert_eq!(
2062 r#"{ i = 1; do { print $i; i++ } while (i <= NF) }"#,
2063 program.to_string()
2064 );
2065 }
2066
2067 #[test]
2068 fn parse_for_with_empty_body_statement() {
2069 let mut parser = Parser::new(Lexer::new(
2070 r#"{ for (i = 1; i <= NF; s += $(i++)) ; print s }"#,
2071 ));
2072
2073 let program = parser.parse_program();
2074
2075 assert_eq!(
2076 r#"{ for (i = 1; i <= NF; s += $i++) { }; print s }"#,
2077 program.to_string()
2078 );
2079 }
2080
2081 #[test]
2082 fn parse_post_decrement_statement() {
2083 let mut parser = Parser::new(Lexer::new(r#"{ k-- ; n-- }"#));
2084
2085 let program = parser.parse_program();
2086
2087 assert_eq!(r#"{ k--; n-- }"#, program.to_string());
2088 }
2089
2090 #[test]
2091 fn parse_rand_expression() {
2092 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print rand() }"#));
2093
2094 let program = parser.parse_program();
2095
2096 assert_eq!(r#"BEGIN { print rand() }"#, program.to_string());
2097 }
2098
2099 #[test]
2100 fn parse_math_builtin_expressions() {
2101 let mut parser = Parser::new(Lexer::new(
2102 r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
2103 ));
2104
2105 let program = parser.parse_program();
2106
2107 assert_eq!(
2108 r#"{ print log($1), sqrt($1), int(sqrt($1)), exp($1 % 10) }"#,
2109 program.to_string()
2110 );
2111 }
2112
2113 #[test]
2114 fn parse_index_builtin_expression() {
2115 let mut parser = Parser::new(Lexer::new(r#"{ print index(1, $1) }"#));
2116
2117 let program = parser.parse_program();
2118
2119 assert_eq!(r#"{ print index(1, $1) }"#, program.to_string());
2120 }
2121
2122 #[test]
2123 fn parse_match_builtin_expression() {
2124 let mut parser = Parser::new(Lexer::new(r#"{ print match($NF, $1), RSTART, RLENGTH }"#));
2125
2126 let program = parser.parse_program();
2127
2128 assert_eq!(
2129 r#"{ print match($NF, $1), RSTART, RLENGTH }"#,
2130 program.to_string()
2131 );
2132 }
2133
2134 #[test]
2135 fn parse_in_membership_expression() {
2136 let mut parser = Parser::new(Lexer::new(r#"{ print 1 in x }"#));
2137
2138 let program = parser.parse_program();
2139
2140 assert_eq!(r#"{ print 1 in x }"#, program.to_string());
2141 }
2142
2143 #[test]
2144 fn parse_parenthesized_composite_membership_expression() {
2145 let mut parser = Parser::new(Lexer::new(r#"{ if (($0, $1) in x) print "yes" }"#));
2146
2147 let program = parser.parse_program();
2148
2149 assert_eq!(
2150 r#"{ if ($0, $1 in x) { print "yes" } }"#,
2151 program.to_string()
2152 );
2153 }
2154
2155 #[test]
2156 fn parse_for_loop_with_single_body_statement() {
2157 let mut parser = Parser::new(Lexer::new(r#"{ for (i = 1; i <= NF; i++) print $i }"#));
2158
2159 let program = parser.parse_program();
2160
2161 assert_eq!(
2162 r#"{ for (i = 1; i <= NF; i++) { print $i } }"#,
2163 program.to_string()
2164 );
2165 }
2166
2167 #[test]
2168 fn parse_if_with_single_statement_body() {
2169 let mut parser = Parser::new(Lexer::new(
2170 r#"END { if (NR < 10) print FILENAME " has only " NR " lines" }"#,
2171 ));
2172
2173 let program = parser.parse_program();
2174
2175 assert_eq!(
2176 r#"END { if (NR < 10) { print FILENAME " has only " NR " lines" } }"#,
2177 program.to_string()
2178 );
2179 }
2180
2181 #[test]
2182 fn parse_exit_statement() {
2183 let mut parser = Parser::new(Lexer::new(r#"NR >= 10 { exit }"#));
2184
2185 let program = parser.parse_program();
2186
2187 assert_eq!(r#"NR >= 10 { exit }"#, program.to_string());
2188 }
2189
2190 #[test]
2191 fn parse_exit_statement_with_status() {
2192 let mut parser = Parser::new(Lexer::new(r#"$1 < 5000 { exit NR }"#));
2193
2194 let program = parser.parse_program();
2195
2196 assert_eq!(r#"$1 < 5000 { exit NR }"#, program.to_string());
2197 }
2198
2199 #[test]
2200 fn parse_user_defined_function_call_statement() {
2201 let mut parser = Parser::new(Lexer::new(
2202 "BEGIN { myabort(1) }\nfunction myabort(n) { exit n }",
2203 ));
2204
2205 let program = parser.parse_program();
2206
2207 let definition = program
2208 .function_definition("myabort")
2209 .expect("expected function definition");
2210 assert_eq!(definition.parameters, vec!["n"]);
2211 assert_eq!(definition.statements.len(), 1);
2212 }
2213
2214 #[test]
2215 fn parse_delete_array_element_statement() {
2216 let mut parser = Parser::new(Lexer::new(r#"{ delete x[i, j] }"#));
2217
2218 let program = parser.parse_program();
2219
2220 assert_eq!(r#"{ delete x[i, j] }"#, program.to_string());
2221 }
2222
2223 #[test]
2224 fn parse_array_add_assignment_and_access() {
2225 let mut parser = Parser::new(Lexer::new(
2226 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2227 ));
2228
2229 let program = parser.parse_program();
2230
2231 assert_eq!(
2232 r#"/Asia/ { pop["Asia"] += $3 } END { print pop["Asia"] }"#,
2233 program.to_string()
2234 );
2235 }
2236
2237 #[test]
2238 fn parse_for_in_loop() {
2239 let mut parser = Parser::new(Lexer::new(
2240 r#"END { for (name in area) print name ":" area[name] }"#,
2241 ));
2242
2243 let program = parser.parse_program();
2244
2245 assert_eq!(
2246 r#"END { for (name in area) { print name ":" area[name] } }"#,
2247 program.to_string()
2248 );
2249 }
2250
2251 #[test]
2252 fn parse_print_redirection() {
2253 let mut parser = Parser::new(Lexer::new(r#"{ print >"tempbig" }"#));
2254
2255 let program = parser.parse_program();
2256
2257 assert_eq!(r#"{ print > "tempbig" }"#, program.to_string());
2258 }
2259
2260 #[test]
2261 fn parse_print_pipe() {
2262 let mut parser = Parser::new(Lexer::new(r#"{ print c ":" pop[c] | "sort" }"#));
2263
2264 let program = parser.parse_program();
2265
2266 assert_eq!(r#"{ print c ":" pop[c] | "sort" }"#, program.to_string());
2267 }
2268
2269 #[test]
2270 fn parse_hexadecimal_number() {
2271 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print 0xAA }"#));
2272
2273 let program = parser.parse_program();
2274
2275 assert_eq!(r#"BEGIN { print 0xAA }"#, program.to_string());
2276 }
2277}