1use crate::ast::*;
2use crate::error::{Error, Result, SourceLocation};
3use crate::lexer::{Token, TokenKind};
4
5pub struct Parser {
7 tokens: Vec<Token>,
8 current: usize,
9}
10
11impl Parser {
12 pub fn new(tokens: Vec<Token>) -> Self {
13 Self { tokens, current: 0 }
14 }
15
16 pub fn parse(&mut self) -> Result<Program> {
18 let mut program = Program::new();
19
20 self.skip_newlines();
21
22 while !self.is_at_end() {
23 if self.check(&TokenKind::Function) {
25 program.functions.push(self.parse_function()?);
26 } else {
27 program.rules.push(self.parse_rule()?);
28 }
29 self.skip_newlines();
30 }
31
32 Ok(program)
33 }
34
35 fn parse_function(&mut self) -> Result<FunctionDef> {
37 let location = self.current_location();
38 self.expect(&TokenKind::Function)?;
39
40 let name = self.expect_identifier()?;
41 self.expect(&TokenKind::LeftParen)?;
42
43 let mut params = Vec::new();
44 if !self.check(&TokenKind::RightParen) {
45 params.push(self.expect_identifier()?);
46 while self.match_token(&TokenKind::Comma) {
47 params.push(self.expect_identifier()?);
48 }
49 }
50 self.expect(&TokenKind::RightParen)?;
51 self.skip_newlines();
52
53 let body = self.parse_block()?;
54
55 Ok(FunctionDef {
56 name,
57 params,
58 body,
59 location,
60 })
61 }
62
63 fn parse_rule(&mut self) -> Result<Rule> {
65 let location = self.current_location();
66
67 if self.check(&TokenKind::Begin) {
69 self.advance();
70 self.skip_newlines();
71 let action = Some(self.parse_block()?);
72 return Ok(Rule {
73 pattern: Some(Pattern::Begin),
74 action,
75 location,
76 });
77 }
78
79 if self.check(&TokenKind::End) {
80 self.advance();
81 self.skip_newlines();
82 let action = Some(self.parse_block()?);
83 return Ok(Rule {
84 pattern: Some(Pattern::End),
85 action,
86 location,
87 });
88 }
89
90 if self.check(&TokenKind::BeginFile) {
91 self.advance();
92 self.skip_newlines();
93 let action = Some(self.parse_block()?);
94 return Ok(Rule {
95 pattern: Some(Pattern::BeginFile),
96 action,
97 location,
98 });
99 }
100
101 if self.check(&TokenKind::EndFile) {
102 self.advance();
103 self.skip_newlines();
104 let action = Some(self.parse_block()?);
105 return Ok(Rule {
106 pattern: Some(Pattern::EndFile),
107 action,
108 location,
109 });
110 }
111
112 if self.check(&TokenKind::LeftBrace) {
114 let action = Some(self.parse_block()?);
115 return Ok(Rule {
116 pattern: None,
117 action,
118 location,
119 });
120 }
121
122 let pattern = Some(self.parse_pattern()?);
124 self.skip_newlines();
125
126 let action = if self.check(&TokenKind::LeftBrace) {
128 Some(self.parse_block()?)
129 } else {
130 None
131 };
132
133 Ok(Rule {
134 pattern,
135 action,
136 location,
137 })
138 }
139
140 fn parse_pattern(&mut self) -> Result<Pattern> {
142 if let Some(TokenKind::Regex(pattern)) = self.peek_kind() {
144 let pattern = pattern.clone();
145 self.advance();
146
147 if self.match_token(&TokenKind::Comma) {
149 self.skip_newlines();
150 let end = self.parse_pattern()?;
151 return Ok(Pattern::Range {
152 start: Box::new(Pattern::Regex(pattern)),
153 end: Box::new(end),
154 });
155 }
156
157 return Ok(Pattern::Regex(pattern));
158 }
159
160 let expr = self.parse_expression()?;
162
163 if self.match_token(&TokenKind::Comma) {
165 self.skip_newlines();
166 let end = self.parse_pattern()?;
167 return Ok(Pattern::Range {
168 start: Box::new(Pattern::Expr(expr)),
169 end: Box::new(end),
170 });
171 }
172
173 Ok(Pattern::Expr(expr))
174 }
175
176 fn parse_block(&mut self) -> Result<Block> {
178 let location = self.current_location();
179 self.expect(&TokenKind::LeftBrace)?;
180 self.skip_newlines();
181
182 let mut statements = Vec::new();
183
184 while !self.check(&TokenKind::RightBrace) && !self.is_at_end() {
185 statements.push(self.parse_statement()?);
186 self.skip_terminators();
187 }
188
189 self.expect(&TokenKind::RightBrace)?;
190
191 Ok(Block::new(statements, location))
192 }
193
194 fn parse_statement(&mut self) -> Result<Stmt> {
196 self.skip_newlines();
197
198 let location = self.current_location();
199
200 if self.check(&TokenKind::Semicolon) {
202 self.advance();
203 return Ok(Stmt::Empty);
204 }
205
206 if self.check(&TokenKind::LeftBrace) {
208 return Ok(Stmt::Block(self.parse_block()?));
209 }
210
211 if self.match_token(&TokenKind::If) {
213 return self.parse_if_statement(location);
214 }
215
216 if self.match_token(&TokenKind::While) {
218 return self.parse_while_statement(location);
219 }
220
221 if self.match_token(&TokenKind::For) {
223 return self.parse_for_statement(location);
224 }
225
226 if self.match_token(&TokenKind::Do) {
228 return self.parse_do_while_statement(location);
229 }
230
231 if self.match_token(&TokenKind::Break) {
233 return Ok(Stmt::Break { location });
234 }
235
236 if self.match_token(&TokenKind::Continue) {
238 return Ok(Stmt::Continue { location });
239 }
240
241 if self.match_token(&TokenKind::Next) {
243 return Ok(Stmt::Next { location });
244 }
245
246 if self.match_token(&TokenKind::Nextfile) {
248 return Ok(Stmt::Nextfile { location });
249 }
250
251 if self.match_token(&TokenKind::Exit) {
253 let code = if self.can_start_expression() {
254 Some(self.parse_expression()?)
255 } else {
256 None
257 };
258 return Ok(Stmt::Exit { code, location });
259 }
260
261 if self.match_token(&TokenKind::Return) {
263 let value = if self.can_start_expression() {
264 Some(self.parse_expression()?)
265 } else {
266 None
267 };
268 return Ok(Stmt::Return { value, location });
269 }
270
271 if self.match_token(&TokenKind::Delete) {
273 let name = self.expect_identifier()?;
274
275 let indices = if self.match_token(&TokenKind::LeftBracket) {
277 let mut indices = vec![self.parse_expression()?];
278 while self.match_token(&TokenKind::Comma) {
279 indices.push(self.parse_expression()?);
280 }
281 self.expect(&TokenKind::RightBracket)?;
282 indices
283 } else {
284 Vec::new()
286 };
287
288 return Ok(Stmt::Delete {
289 array: name,
290 index: indices,
291 location,
292 });
293 }
294
295 if self.match_token(&TokenKind::Print) {
297 return self.parse_print_statement(location);
298 }
299
300 if self.match_token(&TokenKind::Printf) {
302 return self.parse_printf_statement(location);
303 }
304
305 let expr = self.parse_expression()?;
307 Ok(Stmt::Expr(expr))
308 }
309
310 fn parse_if_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
311 self.expect(&TokenKind::LeftParen)?;
312 let condition = self.parse_expression()?;
313 self.expect(&TokenKind::RightParen)?;
314 self.skip_newlines();
315
316 let then_branch = Box::new(self.parse_statement()?);
317
318 self.skip_terminators();
320 let else_branch = if self.match_token(&TokenKind::Else) {
321 self.skip_newlines();
322 Some(Box::new(self.parse_statement()?))
323 } else {
324 None
325 };
326
327 Ok(Stmt::If {
328 condition,
329 then_branch,
330 else_branch,
331 location,
332 })
333 }
334
335 fn parse_while_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
336 self.expect(&TokenKind::LeftParen)?;
337 let condition = self.parse_expression()?;
338 self.expect(&TokenKind::RightParen)?;
339 self.skip_newlines();
340
341 let body = Box::new(self.parse_statement()?);
342
343 Ok(Stmt::While {
344 condition,
345 body,
346 location,
347 })
348 }
349
350 fn parse_for_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
351 self.expect(&TokenKind::LeftParen)?;
352
353 if let Some(TokenKind::Identifier(name)) = self.peek_kind() {
355 let name = name.clone();
356 let saved_pos = self.current;
357 self.advance();
358
359 if self.match_token(&TokenKind::In) {
360 let array = self.expect_identifier()?;
361 self.expect(&TokenKind::RightParen)?;
362 self.skip_newlines();
363 let body = Box::new(self.parse_statement()?);
364
365 return Ok(Stmt::ForIn {
366 var: name,
367 array,
368 body,
369 location,
370 });
371 }
372
373 self.current = saved_pos;
375 }
376
377 let init = if !self.check(&TokenKind::Semicolon) {
379 Some(Box::new(self.parse_statement()?))
380 } else {
381 None
382 };
383 self.expect(&TokenKind::Semicolon)?;
384
385 let condition = if !self.check(&TokenKind::Semicolon) {
386 Some(self.parse_expression()?)
387 } else {
388 None
389 };
390 self.expect(&TokenKind::Semicolon)?;
391
392 let update = if !self.check(&TokenKind::RightParen) {
393 Some(self.parse_expression()?)
394 } else {
395 None
396 };
397 self.expect(&TokenKind::RightParen)?;
398 self.skip_newlines();
399
400 let body = Box::new(self.parse_statement()?);
401
402 Ok(Stmt::For {
403 init,
404 condition,
405 update,
406 body,
407 location,
408 })
409 }
410
411 fn parse_do_while_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
412 self.skip_newlines();
413 let body = Box::new(self.parse_statement()?);
414 self.skip_newlines();
415 self.expect(&TokenKind::While)?;
416 self.expect(&TokenKind::LeftParen)?;
417 let condition = self.parse_expression()?;
418 self.expect(&TokenKind::RightParen)?;
419
420 Ok(Stmt::DoWhile {
421 body,
422 condition,
423 location,
424 })
425 }
426
427 fn parse_print_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
428 let mut args = Vec::new();
429
430 if self.can_start_expression()
432 && !self.check(&TokenKind::Greater)
433 && !self.check(&TokenKind::Append)
434 && !self.check(&TokenKind::Pipe)
435 {
436 args.push(self.parse_print_arg()?);
437 while self.match_token(&TokenKind::Comma) {
438 args.push(self.parse_print_arg()?);
439 }
440 }
441
442 let output = self.parse_output_redirect()?;
444
445 Ok(Stmt::Print {
446 args,
447 output,
448 location,
449 })
450 }
451
452 fn parse_printf_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
453 let format = self.parse_print_arg()?;
454 let mut args = Vec::new();
455
456 while self.match_token(&TokenKind::Comma) {
457 args.push(self.parse_print_arg()?);
458 }
459
460 let output = self.parse_output_redirect()?;
461
462 Ok(Stmt::Printf {
463 format,
464 args,
465 output,
466 location,
467 })
468 }
469
470 fn parse_print_arg(&mut self) -> Result<Expr> {
471 self.parse_print_ternary()
474 }
475
476 fn parse_print_ternary(&mut self) -> Result<Expr> {
477 let expr = self.parse_print_or()?;
478
479 if self.match_token(&TokenKind::Question) {
480 let location = self.current_location();
481 let then_expr = self.parse_print_ternary()?;
482 self.expect(&TokenKind::Colon)?;
483 let else_expr = self.parse_print_ternary()?;
484 return Ok(Expr::Ternary {
485 condition: Box::new(expr),
486 then_expr: Box::new(then_expr),
487 else_expr: Box::new(else_expr),
488 location,
489 });
490 }
491
492 Ok(expr)
493 }
494
495 fn parse_print_or(&mut self) -> Result<Expr> {
496 let mut expr = self.parse_print_and()?;
497
498 while self.match_token(&TokenKind::Or) {
499 let location = self.current_location();
500 let right = self.parse_print_and()?;
501 expr = Expr::Binary {
502 left: Box::new(expr),
503 op: BinaryOp::Or,
504 right: Box::new(right),
505 location,
506 };
507 }
508
509 Ok(expr)
510 }
511
512 fn parse_print_and(&mut self) -> Result<Expr> {
513 let mut expr = self.parse_print_in()?;
514
515 while self.match_token(&TokenKind::And) {
516 let location = self.current_location();
517 let right = self.parse_print_in()?;
518 expr = Expr::Binary {
519 left: Box::new(expr),
520 op: BinaryOp::And,
521 right: Box::new(right),
522 location,
523 };
524 }
525
526 Ok(expr)
527 }
528
529 fn parse_print_in(&mut self) -> Result<Expr> {
530 let expr = self.parse_print_match()?;
531
532 if self.match_token(&TokenKind::In) {
533 let location = self.current_location();
534 let array = self.expect_identifier()?;
535 return Ok(Expr::InArray {
536 key: vec![expr],
537 array,
538 location,
539 });
540 }
541
542 Ok(expr)
543 }
544
545 fn parse_print_match(&mut self) -> Result<Expr> {
546 let expr = self.parse_print_comparison()?;
547
548 let location = self.current_location();
549 if self.match_token(&TokenKind::Match) {
550 let pattern = self.parse_print_comparison()?;
551 return Ok(Expr::Match {
552 expr: Box::new(expr),
553 pattern: Box::new(pattern),
554 negated: false,
555 location,
556 });
557 }
558
559 if self.match_token(&TokenKind::NotMatch) {
560 let pattern = self.parse_print_comparison()?;
561 return Ok(Expr::Match {
562 expr: Box::new(expr),
563 pattern: Box::new(pattern),
564 negated: true,
565 location,
566 });
567 }
568
569 Ok(expr)
570 }
571
572 fn parse_print_comparison(&mut self) -> Result<Expr> {
573 let mut expr = self.parse_concat()?;
574
575 loop {
578 let location = self.current_location();
579 let op = if self.match_token(&TokenKind::Less) {
580 BinaryOp::Lt
581 } else if self.match_token(&TokenKind::LessEqual) {
582 BinaryOp::Le
583 } else if self.match_token(&TokenKind::GreaterEqual) {
584 BinaryOp::Ge
585 } else if self.match_token(&TokenKind::Equal) {
586 BinaryOp::Eq
587 } else if self.match_token(&TokenKind::NotEqual) {
588 BinaryOp::Ne
589 } else {
590 break;
592 };
593
594 let right = self.parse_concat()?;
595 expr = Expr::Binary {
596 left: Box::new(expr),
597 op,
598 right: Box::new(right),
599 location,
600 };
601 }
602
603 Ok(expr)
604 }
605
606 fn parse_output_redirect(&mut self) -> Result<Option<OutputRedirect>> {
607 if self.match_token(&TokenKind::Greater) {
608 let target = self.parse_print_arg()?;
609 Ok(Some(OutputRedirect::Truncate(target)))
610 } else if self.match_token(&TokenKind::Append) {
611 let target = self.parse_print_arg()?;
612 Ok(Some(OutputRedirect::Append(target)))
613 } else if self.match_token(&TokenKind::Pipe) {
614 let target = self.parse_print_arg()?;
615 Ok(Some(OutputRedirect::Pipe(target)))
616 } else {
617 Ok(None)
618 }
619 }
620
621 fn parse_expression(&mut self) -> Result<Expr> {
623 self.parse_assignment()
624 }
625
626 fn parse_assignment(&mut self) -> Result<Expr> {
627 let expr = self.parse_ternary()?;
628
629 let location = self.current_location();
631 let op = if self.match_token(&TokenKind::Assign) {
632 Some(AssignOp::Assign)
633 } else if self.match_token(&TokenKind::PlusAssign) {
634 Some(AssignOp::AddAssign)
635 } else if self.match_token(&TokenKind::MinusAssign) {
636 Some(AssignOp::SubAssign)
637 } else if self.match_token(&TokenKind::StarAssign) {
638 Some(AssignOp::MulAssign)
639 } else if self.match_token(&TokenKind::SlashAssign) {
640 Some(AssignOp::DivAssign)
641 } else if self.match_token(&TokenKind::PercentAssign) {
642 Some(AssignOp::ModAssign)
643 } else if self.match_token(&TokenKind::CaretAssign) {
644 Some(AssignOp::PowAssign)
645 } else {
646 None
647 };
648
649 if let Some(op) = op {
650 let value = self.parse_assignment()?;
651 return Ok(Expr::Assign {
652 target: Box::new(expr),
653 op,
654 value: Box::new(value),
655 location,
656 });
657 }
658
659 Ok(expr)
660 }
661
662 fn parse_ternary(&mut self) -> Result<Expr> {
663 let expr = self.parse_or()?;
664
665 if self.match_token(&TokenKind::Question) {
666 let location = self.current_location();
667 let then_expr = self.parse_expression()?;
668 self.expect(&TokenKind::Colon)?;
669 let else_expr = self.parse_ternary()?;
670 return Ok(Expr::Ternary {
671 condition: Box::new(expr),
672 then_expr: Box::new(then_expr),
673 else_expr: Box::new(else_expr),
674 location,
675 });
676 }
677
678 Ok(expr)
679 }
680
681 fn parse_or(&mut self) -> Result<Expr> {
682 let mut expr = self.parse_and()?;
683
684 while self.match_token(&TokenKind::Or) {
685 let location = self.current_location();
686 let right = self.parse_and()?;
687 expr = Expr::Binary {
688 left: Box::new(expr),
689 op: BinaryOp::Or,
690 right: Box::new(right),
691 location,
692 };
693 }
694
695 Ok(expr)
696 }
697
698 fn parse_and(&mut self) -> Result<Expr> {
699 let mut expr = self.parse_in()?;
700
701 while self.match_token(&TokenKind::And) {
702 let location = self.current_location();
703 let right = self.parse_in()?;
704 expr = Expr::Binary {
705 left: Box::new(expr),
706 op: BinaryOp::And,
707 right: Box::new(right),
708 location,
709 };
710 }
711
712 Ok(expr)
713 }
714
715 fn parse_in(&mut self) -> Result<Expr> {
716 let expr = self.parse_pipe_getline()?;
717
718 if self.match_token(&TokenKind::In) {
721 let location = self.current_location();
722 let array = self.expect_identifier()?;
723 return Ok(Expr::InArray {
724 key: vec![expr],
725 array,
726 location,
727 });
728 }
729
730 Ok(expr)
731 }
732
733 fn parse_pipe_getline(&mut self) -> Result<Expr> {
735 let expr = self.parse_match()?;
736
737 if self.check(&TokenKind::Pipe) {
739 let saved_pos = self.current;
741 self.advance(); if self.check(&TokenKind::Getline) {
744 let location = self.current_location();
745 self.advance(); let var = if let Some(TokenKind::Identifier(name)) = self.peek_kind() {
749 let name = name.clone();
750 self.advance();
751 Some(name)
752 } else {
753 None
754 };
755
756 return Ok(Expr::Getline {
757 var,
758 input: Some(GetlineInput::Pipe(Box::new(expr))),
759 location,
760 });
761 } else {
762 self.current = saved_pos;
764 }
765 }
766
767 Ok(expr)
768 }
769
770 fn parse_match(&mut self) -> Result<Expr> {
771 let expr = self.parse_comparison()?;
772
773 let location = self.current_location();
774 if self.match_token(&TokenKind::Match) {
775 let pattern = self.parse_comparison()?;
776 return Ok(Expr::Match {
777 expr: Box::new(expr),
778 pattern: Box::new(pattern),
779 negated: false,
780 location,
781 });
782 }
783
784 if self.match_token(&TokenKind::NotMatch) {
785 let pattern = self.parse_comparison()?;
786 return Ok(Expr::Match {
787 expr: Box::new(expr),
788 pattern: Box::new(pattern),
789 negated: true,
790 location,
791 });
792 }
793
794 Ok(expr)
795 }
796
797 fn parse_comparison(&mut self) -> Result<Expr> {
798 let mut expr = self.parse_concat()?;
799
800 loop {
801 let location = self.current_location();
802 let op = if self.match_token(&TokenKind::Less) {
803 BinaryOp::Lt
804 } else if self.match_token(&TokenKind::LessEqual) {
805 BinaryOp::Le
806 } else if self.match_token(&TokenKind::Greater) {
807 BinaryOp::Gt
808 } else if self.match_token(&TokenKind::GreaterEqual) {
809 BinaryOp::Ge
810 } else if self.match_token(&TokenKind::Equal) {
811 BinaryOp::Eq
812 } else if self.match_token(&TokenKind::NotEqual) {
813 BinaryOp::Ne
814 } else {
815 break;
816 };
817
818 let right = self.parse_concat()?;
819 expr = Expr::Binary {
820 left: Box::new(expr),
821 op,
822 right: Box::new(right),
823 location,
824 };
825 }
826
827 Ok(expr)
828 }
829
830 fn parse_concat(&mut self) -> Result<Expr> {
831 let mut expr = self.parse_additive()?;
832
833 while self.can_start_concat_operand() {
836 let right = self.parse_additive()?;
837 let location = expr.location();
838 expr = Expr::Binary {
839 left: Box::new(expr),
840 op: BinaryOp::Concat,
841 right: Box::new(right),
842 location,
843 };
844 }
845
846 Ok(expr)
847 }
848
849 fn can_start_concat_operand(&mut self) -> bool {
850 if let Some(kind) = self.peek_kind() {
851 matches!(
852 kind,
853 TokenKind::Number(_)
854 | TokenKind::String(_)
855 | TokenKind::Identifier(_)
856 | TokenKind::Dollar
857 | TokenKind::LeftParen
858 | TokenKind::Not
859 | TokenKind::Increment
860 | TokenKind::Decrement
861 )
862 } else {
863 false
864 }
865 }
866
867 fn parse_additive(&mut self) -> Result<Expr> {
868 let mut expr = self.parse_multiplicative()?;
869
870 loop {
871 let location = self.current_location();
872 let op = if self.match_token(&TokenKind::Plus) {
873 BinaryOp::Add
874 } else if self.match_token(&TokenKind::Minus) {
875 BinaryOp::Sub
876 } else {
877 break;
878 };
879
880 let right = self.parse_multiplicative()?;
881 expr = Expr::Binary {
882 left: Box::new(expr),
883 op,
884 right: Box::new(right),
885 location,
886 };
887 }
888
889 Ok(expr)
890 }
891
892 fn parse_multiplicative(&mut self) -> Result<Expr> {
893 let mut expr = self.parse_power()?;
894
895 loop {
896 let location = self.current_location();
897 let op = if self.match_token(&TokenKind::Star) {
898 BinaryOp::Mul
899 } else if self.match_token(&TokenKind::Slash) {
900 BinaryOp::Div
901 } else if self.match_token(&TokenKind::Percent) {
902 BinaryOp::Mod
903 } else {
904 break;
905 };
906
907 let right = self.parse_power()?;
908 expr = Expr::Binary {
909 left: Box::new(expr),
910 op,
911 right: Box::new(right),
912 location,
913 };
914 }
915
916 Ok(expr)
917 }
918
919 fn parse_power(&mut self) -> Result<Expr> {
920 let expr = self.parse_unary()?;
921
922 if self.match_token(&TokenKind::Caret) {
924 let location = self.current_location();
925 let right = self.parse_power()?;
926 return Ok(Expr::Binary {
927 left: Box::new(expr),
928 op: BinaryOp::Pow,
929 right: Box::new(right),
930 location,
931 });
932 }
933
934 Ok(expr)
935 }
936
937 fn parse_unary(&mut self) -> Result<Expr> {
938 let location = self.current_location();
939
940 if self.match_token(&TokenKind::Not) {
941 let operand = self.parse_unary()?;
942 return Ok(Expr::Unary {
943 op: UnaryOp::Not,
944 operand: Box::new(operand),
945 location,
946 });
947 }
948
949 if self.match_token(&TokenKind::Minus) {
950 let operand = self.parse_unary()?;
951 return Ok(Expr::Unary {
952 op: UnaryOp::Neg,
953 operand: Box::new(operand),
954 location,
955 });
956 }
957
958 if self.match_token(&TokenKind::Plus) {
959 let operand = self.parse_unary()?;
960 return Ok(Expr::Unary {
961 op: UnaryOp::Pos,
962 operand: Box::new(operand),
963 location,
964 });
965 }
966
967 if self.match_token(&TokenKind::Increment) {
968 let operand = self.parse_unary()?;
969 return Ok(Expr::PreIncrement(Box::new(operand), location));
970 }
971
972 if self.match_token(&TokenKind::Decrement) {
973 let operand = self.parse_unary()?;
974 return Ok(Expr::PreDecrement(Box::new(operand), location));
975 }
976
977 self.parse_postfix()
978 }
979
980 fn parse_postfix(&mut self) -> Result<Expr> {
981 let mut expr = self.parse_field()?;
982
983 loop {
984 let location = self.current_location();
985
986 if self.match_token(&TokenKind::Increment) {
987 expr = Expr::PostIncrement(Box::new(expr), location);
988 } else if self.match_token(&TokenKind::Decrement) {
989 expr = Expr::PostDecrement(Box::new(expr), location);
990 } else if self.match_token(&TokenKind::LeftBracket) {
991 if let Expr::Var(name, _) = expr {
993 let mut indices = vec![self.parse_expression()?];
994 while self.match_token(&TokenKind::Comma) {
995 indices.push(self.parse_expression()?);
996 }
997 self.expect(&TokenKind::RightBracket)?;
998 expr = Expr::ArrayAccess {
999 array: name,
1000 indices,
1001 location,
1002 };
1003 } else {
1004 return Err(Error::parser(
1005 "array access requires variable name",
1006 location.line,
1007 location.column,
1008 ));
1009 }
1010 } else {
1011 break;
1012 }
1013 }
1014
1015 Ok(expr)
1016 }
1017
1018 fn parse_field(&mut self) -> Result<Expr> {
1019 if self.match_token(&TokenKind::Dollar) {
1020 let location = self.current_location();
1021 let expr = self.parse_field()?;
1022 return Ok(Expr::Field(Box::new(expr), location));
1023 }
1024
1025 self.parse_primary()
1026 }
1027
1028 fn parse_primary(&mut self) -> Result<Expr> {
1029 let location = self.current_location();
1030
1031 if let Some(TokenKind::Number(n)) = self.peek_kind() {
1033 let n = *n;
1034 self.advance();
1035 return Ok(Expr::Number(n, location));
1036 }
1037
1038 if let Some(TokenKind::String(s)) = self.peek_kind() {
1040 let s = s.clone();
1041 self.advance();
1042 return Ok(Expr::String(s, location));
1043 }
1044
1045 if let Some(TokenKind::Regex(r)) = self.peek_kind() {
1047 let r = r.clone();
1048 self.advance();
1049 return Ok(Expr::Regex(r, location));
1050 }
1051
1052 if let Some(TokenKind::Identifier(name)) = self.peek_kind() {
1054 let name = name.clone();
1055 self.advance();
1056
1057 if self.match_token(&TokenKind::LeftParen) {
1059 let mut args = Vec::new();
1060 if !self.check(&TokenKind::RightParen) {
1061 args.push(self.parse_expression()?);
1062 while self.match_token(&TokenKind::Comma) {
1063 args.push(self.parse_expression()?);
1064 }
1065 }
1066 self.expect(&TokenKind::RightParen)?;
1067 return Ok(Expr::Call {
1068 name,
1069 args,
1070 location,
1071 });
1072 }
1073
1074 return Ok(Expr::Var(name, location));
1075 }
1076
1077 if self.match_token(&TokenKind::Getline) {
1079 let var = if let Some(TokenKind::Identifier(name)) = self.peek_kind() {
1080 let name = name.clone();
1081 self.advance();
1082 Some(name)
1083 } else {
1084 None
1085 };
1086
1087 let input = if self.match_token(&TokenKind::Less) {
1088 Some(GetlineInput::File(Box::new(self.parse_primary()?)))
1089 } else {
1090 None
1091 };
1092
1093 return Ok(Expr::Getline {
1094 var,
1095 input,
1096 location,
1097 });
1098 }
1099
1100 if self.match_token(&TokenKind::LeftParen) {
1102 let expr = self.parse_expression()?;
1103 self.expect(&TokenKind::RightParen)?;
1104 return Ok(Expr::Group(Box::new(expr), location));
1105 }
1106
1107 Err(Error::parser(
1108 format!("unexpected token {:?}", self.peek_kind()),
1109 location.line,
1110 location.column,
1111 ))
1112 }
1113
1114 fn peek_kind(&self) -> Option<&TokenKind> {
1117 self.tokens.get(self.current).map(|t| &t.kind)
1118 }
1119
1120 fn current_location(&self) -> SourceLocation {
1121 self.tokens
1122 .get(self.current)
1123 .map(|t| t.location)
1124 .unwrap_or(SourceLocation::new(0, 0))
1125 }
1126
1127 fn is_at_end(&self) -> bool {
1128 matches!(self.peek_kind(), None | Some(TokenKind::Eof))
1129 }
1130
1131 fn check(&self, kind: &TokenKind) -> bool {
1132 self.peek_kind()
1133 .map(|k| std::mem::discriminant(k) == std::mem::discriminant(kind))
1134 .unwrap_or(false)
1135 }
1136
1137 fn advance(&mut self) -> Option<&Token> {
1138 if !self.is_at_end() {
1139 self.current += 1;
1140 }
1141 self.tokens.get(self.current - 1)
1142 }
1143
1144 fn match_token(&mut self, kind: &TokenKind) -> bool {
1145 if self.check(kind) {
1146 self.advance();
1147 true
1148 } else {
1149 false
1150 }
1151 }
1152
1153 fn expect(&mut self, kind: &TokenKind) -> Result<&Token> {
1154 if self.check(kind) {
1155 Ok(self.advance().unwrap())
1156 } else {
1157 let loc = self.current_location();
1158 Err(Error::parser(
1159 format!("expected {:?}, found {:?}", kind, self.peek_kind()),
1160 loc.line,
1161 loc.column,
1162 ))
1163 }
1164 }
1165
1166 fn expect_identifier(&mut self) -> Result<String> {
1167 if let Some(TokenKind::Identifier(name)) = self.peek_kind() {
1168 let name = name.clone();
1169 self.advance();
1170 Ok(name)
1171 } else {
1172 let loc = self.current_location();
1173 Err(Error::parser(
1174 format!("expected identifier, found {:?}", self.peek_kind()),
1175 loc.line,
1176 loc.column,
1177 ))
1178 }
1179 }
1180
1181 fn skip_newlines(&mut self) {
1182 while self.match_token(&TokenKind::Newline) {}
1183 }
1184
1185 fn skip_terminators(&mut self) {
1186 while self.match_token(&TokenKind::Newline) || self.match_token(&TokenKind::Semicolon) {}
1187 }
1188
1189 fn can_start_expression(&self) -> bool {
1190 self.peek_kind()
1191 .map(|k| k.can_start_expression())
1192 .unwrap_or(false)
1193 }
1194}
1195
1196#[cfg(test)]
1197mod tests {
1198 use super::*;
1199 use crate::lexer::Lexer;
1200
1201 fn parse(source: &str) -> Result<Program> {
1202 let mut lexer = Lexer::new(source);
1203 let tokens = lexer.tokenize()?;
1204 let mut parser = Parser::new(tokens);
1205 parser.parse()
1206 }
1207
1208 #[test]
1209 fn test_simple_print() {
1210 let program = parse(r#"{ print "hello" }"#).unwrap();
1211 assert_eq!(program.rules.len(), 1);
1212 }
1213
1214 #[test]
1215 fn test_begin_end() {
1216 let program = parse(r#"BEGIN { x = 1 } END { print x }"#).unwrap();
1217 assert_eq!(program.rules.len(), 2);
1218 assert!(matches!(program.rules[0].pattern, Some(Pattern::Begin)));
1219 assert!(matches!(program.rules[1].pattern, Some(Pattern::End)));
1220 }
1221
1222 #[test]
1223 fn test_regex_pattern() {
1224 let program = parse(r#"/foo/ { print }"#).unwrap();
1225 assert_eq!(program.rules.len(), 1);
1226 assert!(matches!(
1227 &program.rules[0].pattern,
1228 Some(Pattern::Regex(r)) if r == "foo"
1229 ));
1230 }
1231
1232 #[test]
1233 fn test_arithmetic() {
1234 let program = parse(r#"{ x = 1 + 2 * 3 }"#).unwrap();
1235 assert_eq!(program.rules.len(), 1);
1236 }
1237
1238 #[test]
1239 fn test_function_def() {
1240 let program = parse(r#"function add(a, b) { return a + b }"#).unwrap();
1241 assert_eq!(program.functions.len(), 1);
1242 assert_eq!(program.functions[0].name, "add");
1243 assert_eq!(program.functions[0].params, vec!["a", "b"]);
1244 }
1245
1246 #[test]
1247 fn test_if_else() {
1248 let program = parse(r#"{ if (x) print 1; else print 2 }"#).unwrap();
1249 assert_eq!(program.rules.len(), 1);
1250 }
1251
1252 #[test]
1253 fn test_while_loop() {
1254 let program = parse(r#"{ while (x < 10) x++ }"#).unwrap();
1255 assert_eq!(program.rules.len(), 1);
1256 }
1257
1258 #[test]
1259 fn test_for_loop() {
1260 let program = parse(r#"{ for (i=0; i<10; i++) print i }"#).unwrap();
1261 assert_eq!(program.rules.len(), 1);
1262 }
1263
1264 #[test]
1265 fn test_for_in_loop() {
1266 let program = parse(r#"{ for (k in a) print k }"#).unwrap();
1267 assert_eq!(program.rules.len(), 1);
1268 }
1269
1270 #[test]
1271 fn test_do_while() {
1272 let program = parse(r#"{ do { x++ } while (x < 10) }"#).unwrap();
1273 assert_eq!(program.rules.len(), 1);
1274 }
1275
1276 #[test]
1277 fn test_delete() {
1278 let program = parse(r#"{ delete a[1] }"#).unwrap();
1279 assert_eq!(program.rules.len(), 1);
1280 }
1281
1282 #[test]
1283 fn test_delete_array() {
1284 let program = parse(r#"{ delete a }"#).unwrap();
1285 assert_eq!(program.rules.len(), 1);
1286 }
1287
1288 #[test]
1289 fn test_break_continue() {
1290 let program = parse(r#"{ break; continue }"#).unwrap();
1291 assert_eq!(program.rules.len(), 1);
1292 }
1293
1294 #[test]
1295 fn test_next_nextfile() {
1296 let program = parse(r#"{ next } { nextfile }"#).unwrap();
1297 assert_eq!(program.rules.len(), 2);
1298 }
1299
1300 #[test]
1301 fn test_exit() {
1302 let program = parse(r#"{ exit 0 }"#).unwrap();
1303 assert_eq!(program.rules.len(), 1);
1304 }
1305
1306 #[test]
1307 fn test_return() {
1308 let program = parse(r#"function f() { return 42 }"#).unwrap();
1309 assert_eq!(program.functions.len(), 1);
1310 }
1311
1312 #[test]
1313 fn test_printf() {
1314 let program = parse(r#"{ printf "%d", x }"#).unwrap();
1315 assert_eq!(program.rules.len(), 1);
1316 }
1317
1318 #[test]
1319 fn test_getline() {
1320 let program = parse(r#"{ getline x < "file" }"#).unwrap();
1321 assert_eq!(program.rules.len(), 1);
1322 }
1323
1324 #[test]
1325 fn test_pipe_getline() {
1326 let program = parse(r#"{ "cmd" | getline x }"#).unwrap();
1327 assert_eq!(program.rules.len(), 1);
1328 }
1329
1330 #[test]
1331 fn test_output_redirect() {
1332 let program = parse(r#"{ print "x" > "file" }"#).unwrap();
1333 assert_eq!(program.rules.len(), 1);
1334 }
1335
1336 #[test]
1337 fn test_output_append() {
1338 let program = parse(r#"{ print "x" >> "file" }"#).unwrap();
1339 assert_eq!(program.rules.len(), 1);
1340 }
1341
1342 #[test]
1343 fn test_output_pipe() {
1344 let program = parse(r#"{ print "x" | "cmd" }"#).unwrap();
1345 assert_eq!(program.rules.len(), 1);
1346 }
1347
1348 #[test]
1349 fn test_ternary() {
1350 let program = parse(r#"{ x = a ? b : c }"#).unwrap();
1351 assert_eq!(program.rules.len(), 1);
1352 }
1353
1354 #[test]
1355 fn test_logical_and_or() {
1356 let program = parse(r#"{ x = a && b || c }"#).unwrap();
1357 assert_eq!(program.rules.len(), 1);
1358 }
1359
1360 #[test]
1361 fn test_array_in() {
1362 let program = parse(r#"{ x = (1 in a) }"#).unwrap();
1363 assert_eq!(program.rules.len(), 1);
1364 }
1365
1366 #[test]
1367 fn test_regex_match() {
1368 let program = parse(r#"{ x = ($0 ~ /foo/) }"#).unwrap();
1369 assert_eq!(program.rules.len(), 1);
1370 }
1371
1372 #[test]
1373 fn test_concatenation() {
1374 let program = parse(r#"{ x = a b c }"#).unwrap();
1375 assert_eq!(program.rules.len(), 1);
1376 }
1377
1378 #[test]
1379 fn test_field_access() {
1380 let program = parse(r#"{ print $1, $NF, $(2+1) }"#).unwrap();
1381 assert_eq!(program.rules.len(), 1);
1382 }
1383
1384 #[test]
1385 fn test_array_multi_index() {
1386 let program = parse(r#"{ a[1,2,3] = x }"#).unwrap();
1387 assert_eq!(program.rules.len(), 1);
1388 }
1389
1390 #[test]
1391 fn test_function_call() {
1392 let program = parse(r#"{ x = substr(s, 1, 5) }"#).unwrap();
1393 assert_eq!(program.rules.len(), 1);
1394 }
1395
1396 #[test]
1397 fn test_pre_increment() {
1398 let program = parse(r#"{ ++x; --y }"#).unwrap();
1399 assert_eq!(program.rules.len(), 1);
1400 }
1401
1402 #[test]
1403 fn test_post_increment() {
1404 let program = parse(r#"{ x++; y-- }"#).unwrap();
1405 assert_eq!(program.rules.len(), 1);
1406 }
1407
1408 #[test]
1409 fn test_compound_assign() {
1410 let program = parse(r#"{ x += 1; y -= 1; z *= 2; w /= 2; v %= 3; p ^= 2 }"#).unwrap();
1411 assert_eq!(program.rules.len(), 1);
1412 }
1413
1414 #[test]
1415 fn test_range_pattern() {
1416 let program = parse(r#"/start/,/end/ { print }"#).unwrap();
1417 assert_eq!(program.rules.len(), 1);
1418 assert!(matches!(
1419 &program.rules[0].pattern,
1420 Some(Pattern::Range { .. })
1421 ));
1422 }
1423
1424 #[test]
1425 fn test_expression_pattern() {
1426 let program = parse(r#"NR > 5 { print }"#).unwrap();
1427 assert_eq!(program.rules.len(), 1);
1428 }
1429
1430 #[test]
1431 fn test_beginfile_endfile() {
1432 let program = parse(r#"BEGINFILE { x = 1 } ENDFILE { print }"#).unwrap();
1433 assert_eq!(program.rules.len(), 2);
1434 assert!(matches!(program.rules[0].pattern, Some(Pattern::BeginFile)));
1435 assert!(matches!(program.rules[1].pattern, Some(Pattern::EndFile)));
1436 }
1437
1438 #[test]
1439 fn test_empty_statement() {
1440 let program = parse(r#"{ ; ; ; }"#).unwrap();
1441 assert_eq!(program.rules.len(), 1);
1442 }
1443
1444 #[test]
1445 fn test_block_statement() {
1446 let program = parse(r#"{ { { x = 1 } } }"#).unwrap();
1447 assert_eq!(program.rules.len(), 1);
1448 }
1449
1450 #[test]
1451 fn test_multiple_rules() {
1452 let program = parse(r#"BEGIN { } { } END { }"#).unwrap();
1453 assert_eq!(program.rules.len(), 3);
1454 }
1455
1456 #[test]
1457 fn test_parenthesized_expression() {
1458 let program = parse(r#"{ x = (1 + 2) * 3 }"#).unwrap();
1459 assert_eq!(program.rules.len(), 1);
1460 }
1461
1462 #[test]
1463 fn test_unary_ops() {
1464 let program = parse(r#"{ x = -a + +b }"#).unwrap();
1465 assert_eq!(program.rules.len(), 1);
1466 }
1467
1468 #[test]
1469 fn test_not_operator() {
1470 let program = parse(r#"{ x = !a }"#).unwrap();
1471 assert_eq!(program.rules.len(), 1);
1472 }
1473
1474 #[test]
1475 fn test_comparison_ops() {
1476 let program =
1477 parse(r#"{ x = a < b && b <= c && c > d && d >= e && e == f && f != g }"#).unwrap();
1478 assert_eq!(program.rules.len(), 1);
1479 }
1480
1481 #[test]
1482 fn test_exponentiation() {
1483 let program = parse(r#"{ x = 2^3^4 }"#).unwrap(); assert_eq!(program.rules.len(), 1);
1485 }
1486}