awk_rs/parser/
mod.rs

1use crate::ast::*;
2use crate::error::{Error, Result, SourceLocation};
3use crate::lexer::{Token, TokenKind};
4
5/// AWK parser using recursive descent
6pub struct Parser {
7    tokens: Vec<Token>,
8    current: usize,
9}
10
11impl Parser {
12    pub fn new(tokens: Vec<Token>) -> Self {
13        Self { tokens, current: 0 }
14    }
15
16    /// Parse a complete AWK program
17    pub fn parse(&mut self) -> Result<Program> {
18        let mut program = Program::new();
19
20        self.skip_newlines();
21
22        while !self.is_at_end() {
23            // Check for function definition
24            if self.check(&TokenKind::Function) {
25                program.functions.push(self.parse_function()?);
26            } else {
27                program.rules.push(self.parse_rule()?);
28            }
29            self.skip_newlines();
30        }
31
32        Ok(program)
33    }
34
35    /// Parse a function definition
36    fn parse_function(&mut self) -> Result<FunctionDef> {
37        let location = self.current_location();
38        self.expect(&TokenKind::Function)?;
39
40        let name = self.expect_identifier()?;
41        self.expect(&TokenKind::LeftParen)?;
42
43        let mut params = Vec::new();
44        if !self.check(&TokenKind::RightParen) {
45            params.push(self.expect_identifier()?);
46            while self.match_token(&TokenKind::Comma) {
47                params.push(self.expect_identifier()?);
48            }
49        }
50        self.expect(&TokenKind::RightParen)?;
51        self.skip_newlines();
52
53        let body = self.parse_block()?;
54
55        Ok(FunctionDef {
56            name,
57            params,
58            body,
59            location,
60        })
61    }
62
63    /// Parse a pattern-action rule
64    fn parse_rule(&mut self) -> Result<Rule> {
65        let location = self.current_location();
66
67        // Check for BEGIN/END/BEGINFILE/ENDFILE
68        if self.check(&TokenKind::Begin) {
69            self.advance();
70            self.skip_newlines();
71            let action = Some(self.parse_block()?);
72            return Ok(Rule {
73                pattern: Some(Pattern::Begin),
74                action,
75                location,
76            });
77        }
78
79        if self.check(&TokenKind::End) {
80            self.advance();
81            self.skip_newlines();
82            let action = Some(self.parse_block()?);
83            return Ok(Rule {
84                pattern: Some(Pattern::End),
85                action,
86                location,
87            });
88        }
89
90        if self.check(&TokenKind::BeginFile) {
91            self.advance();
92            self.skip_newlines();
93            let action = Some(self.parse_block()?);
94            return Ok(Rule {
95                pattern: Some(Pattern::BeginFile),
96                action,
97                location,
98            });
99        }
100
101        if self.check(&TokenKind::EndFile) {
102            self.advance();
103            self.skip_newlines();
104            let action = Some(self.parse_block()?);
105            return Ok(Rule {
106                pattern: Some(Pattern::EndFile),
107                action,
108                location,
109            });
110        }
111
112        // Check for action-only rule (just a block)
113        if self.check(&TokenKind::LeftBrace) {
114            let action = Some(self.parse_block()?);
115            return Ok(Rule {
116                pattern: None,
117                action,
118                location,
119            });
120        }
121
122        // Parse pattern
123        let pattern = Some(self.parse_pattern()?);
124        self.skip_newlines();
125
126        // Optional action
127        let action = if self.check(&TokenKind::LeftBrace) {
128            Some(self.parse_block()?)
129        } else {
130            None
131        };
132
133        Ok(Rule {
134            pattern,
135            action,
136            location,
137        })
138    }
139
140    /// Parse a pattern (expression or regex)
141    fn parse_pattern(&mut self) -> Result<Pattern> {
142        // Check for regex pattern
143        if let Some(TokenKind::Regex(pattern)) = self.peek_kind() {
144            let pattern = pattern.clone();
145            self.advance();
146
147            // Check for range pattern
148            if self.match_token(&TokenKind::Comma) {
149                self.skip_newlines();
150                let end = self.parse_pattern()?;
151                return Ok(Pattern::Range {
152                    start: Box::new(Pattern::Regex(pattern)),
153                    end: Box::new(end),
154                });
155            }
156
157            return Ok(Pattern::Regex(pattern));
158        }
159
160        // Parse as expression pattern
161        let expr = self.parse_expression()?;
162
163        // Check for range pattern
164        if self.match_token(&TokenKind::Comma) {
165            self.skip_newlines();
166            let end = self.parse_pattern()?;
167            return Ok(Pattern::Range {
168                start: Box::new(Pattern::Expr(expr)),
169                end: Box::new(end),
170            });
171        }
172
173        Ok(Pattern::Expr(expr))
174    }
175
176    /// Parse a block { ... }
177    fn parse_block(&mut self) -> Result<Block> {
178        let location = self.current_location();
179        self.expect(&TokenKind::LeftBrace)?;
180        self.skip_newlines();
181
182        let mut statements = Vec::new();
183
184        while !self.check(&TokenKind::RightBrace) && !self.is_at_end() {
185            statements.push(self.parse_statement()?);
186            self.skip_terminators();
187        }
188
189        self.expect(&TokenKind::RightBrace)?;
190
191        Ok(Block::new(statements, location))
192    }
193
194    /// Parse a single statement
195    fn parse_statement(&mut self) -> Result<Stmt> {
196        self.skip_newlines();
197
198        let location = self.current_location();
199
200        // Empty statement
201        if self.check(&TokenKind::Semicolon) {
202            self.advance();
203            return Ok(Stmt::Empty);
204        }
205
206        // Block
207        if self.check(&TokenKind::LeftBrace) {
208            return Ok(Stmt::Block(self.parse_block()?));
209        }
210
211        // If statement
212        if self.match_token(&TokenKind::If) {
213            return self.parse_if_statement(location);
214        }
215
216        // While statement
217        if self.match_token(&TokenKind::While) {
218            return self.parse_while_statement(location);
219        }
220
221        // For statement
222        if self.match_token(&TokenKind::For) {
223            return self.parse_for_statement(location);
224        }
225
226        // Do-while statement
227        if self.match_token(&TokenKind::Do) {
228            return self.parse_do_while_statement(location);
229        }
230
231        // Break
232        if self.match_token(&TokenKind::Break) {
233            return Ok(Stmt::Break { location });
234        }
235
236        // Continue
237        if self.match_token(&TokenKind::Continue) {
238            return Ok(Stmt::Continue { location });
239        }
240
241        // Next
242        if self.match_token(&TokenKind::Next) {
243            return Ok(Stmt::Next { location });
244        }
245
246        // Nextfile
247        if self.match_token(&TokenKind::Nextfile) {
248            return Ok(Stmt::Nextfile { location });
249        }
250
251        // Exit
252        if self.match_token(&TokenKind::Exit) {
253            let code = if self.can_start_expression() {
254                Some(self.parse_expression()?)
255            } else {
256                None
257            };
258            return Ok(Stmt::Exit { code, location });
259        }
260
261        // Return
262        if self.match_token(&TokenKind::Return) {
263            let value = if self.can_start_expression() {
264                Some(self.parse_expression()?)
265            } else {
266                None
267            };
268            return Ok(Stmt::Return { value, location });
269        }
270
271        // Delete
272        if self.match_token(&TokenKind::Delete) {
273            let name = self.expect_identifier()?;
274
275            // Check if there's an index (delete array[i]) or not (delete array)
276            let indices = if self.match_token(&TokenKind::LeftBracket) {
277                let mut indices = vec![self.parse_expression()?];
278                while self.match_token(&TokenKind::Comma) {
279                    indices.push(self.parse_expression()?);
280                }
281                self.expect(&TokenKind::RightBracket)?;
282                indices
283            } else {
284                // delete array (entire array)
285                Vec::new()
286            };
287
288            return Ok(Stmt::Delete {
289                array: name,
290                index: indices,
291                location,
292            });
293        }
294
295        // Print statement
296        if self.match_token(&TokenKind::Print) {
297            return self.parse_print_statement(location);
298        }
299
300        // Printf statement
301        if self.match_token(&TokenKind::Printf) {
302            return self.parse_printf_statement(location);
303        }
304
305        // Expression statement
306        let expr = self.parse_expression()?;
307        Ok(Stmt::Expr(expr))
308    }
309
310    fn parse_if_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
311        self.expect(&TokenKind::LeftParen)?;
312        let condition = self.parse_expression()?;
313        self.expect(&TokenKind::RightParen)?;
314        self.skip_newlines();
315
316        let then_branch = Box::new(self.parse_statement()?);
317
318        // Skip terminators (semicolons and newlines) before checking for else
319        self.skip_terminators();
320        let else_branch = if self.match_token(&TokenKind::Else) {
321            self.skip_newlines();
322            Some(Box::new(self.parse_statement()?))
323        } else {
324            None
325        };
326
327        Ok(Stmt::If {
328            condition,
329            then_branch,
330            else_branch,
331            location,
332        })
333    }
334
335    fn parse_while_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
336        self.expect(&TokenKind::LeftParen)?;
337        let condition = self.parse_expression()?;
338        self.expect(&TokenKind::RightParen)?;
339        self.skip_newlines();
340
341        let body = Box::new(self.parse_statement()?);
342
343        Ok(Stmt::While {
344            condition,
345            body,
346            location,
347        })
348    }
349
350    fn parse_for_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
351        self.expect(&TokenKind::LeftParen)?;
352
353        // Check for for-in loop: for (var in array)
354        if let Some(TokenKind::Identifier(name)) = self.peek_kind() {
355            let name = name.clone();
356            let saved_pos = self.current;
357            self.advance();
358
359            if self.match_token(&TokenKind::In) {
360                let array = self.expect_identifier()?;
361                self.expect(&TokenKind::RightParen)?;
362                self.skip_newlines();
363                let body = Box::new(self.parse_statement()?);
364
365                return Ok(Stmt::ForIn {
366                    var: name,
367                    array,
368                    body,
369                    location,
370                });
371            }
372
373            // Not a for-in, backtrack
374            self.current = saved_pos;
375        }
376
377        // C-style for loop
378        let init = if !self.check(&TokenKind::Semicolon) {
379            Some(Box::new(self.parse_statement()?))
380        } else {
381            None
382        };
383        self.expect(&TokenKind::Semicolon)?;
384
385        let condition = if !self.check(&TokenKind::Semicolon) {
386            Some(self.parse_expression()?)
387        } else {
388            None
389        };
390        self.expect(&TokenKind::Semicolon)?;
391
392        let update = if !self.check(&TokenKind::RightParen) {
393            Some(self.parse_expression()?)
394        } else {
395            None
396        };
397        self.expect(&TokenKind::RightParen)?;
398        self.skip_newlines();
399
400        let body = Box::new(self.parse_statement()?);
401
402        Ok(Stmt::For {
403            init,
404            condition,
405            update,
406            body,
407            location,
408        })
409    }
410
411    fn parse_do_while_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
412        self.skip_newlines();
413        let body = Box::new(self.parse_statement()?);
414        self.skip_newlines();
415        self.expect(&TokenKind::While)?;
416        self.expect(&TokenKind::LeftParen)?;
417        let condition = self.parse_expression()?;
418        self.expect(&TokenKind::RightParen)?;
419
420        Ok(Stmt::DoWhile {
421            body,
422            condition,
423            location,
424        })
425    }
426
427    fn parse_print_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
428        let mut args = Vec::new();
429
430        // Check for arguments (print without args prints $0)
431        if self.can_start_expression()
432            && !self.check(&TokenKind::Greater)
433            && !self.check(&TokenKind::Append)
434            && !self.check(&TokenKind::Pipe)
435        {
436            args.push(self.parse_print_arg()?);
437            while self.match_token(&TokenKind::Comma) {
438                args.push(self.parse_print_arg()?);
439            }
440        }
441
442        // Check for output redirection
443        let output = self.parse_output_redirect()?;
444
445        Ok(Stmt::Print {
446            args,
447            output,
448            location,
449        })
450    }
451
452    fn parse_printf_statement(&mut self, location: SourceLocation) -> Result<Stmt> {
453        let format = self.parse_print_arg()?;
454        let mut args = Vec::new();
455
456        while self.match_token(&TokenKind::Comma) {
457            args.push(self.parse_print_arg()?);
458        }
459
460        let output = self.parse_output_redirect()?;
461
462        Ok(Stmt::Printf {
463            format,
464            args,
465            output,
466            location,
467        })
468    }
469
470    fn parse_print_arg(&mut self) -> Result<Expr> {
471        // Print arguments don't include comparison operators with > at the top level
472        // because > is used for output redirection. We parse up to concatenation level.
473        self.parse_print_ternary()
474    }
475
476    fn parse_print_ternary(&mut self) -> Result<Expr> {
477        let expr = self.parse_print_or()?;
478
479        if self.match_token(&TokenKind::Question) {
480            let location = self.current_location();
481            let then_expr = self.parse_print_ternary()?;
482            self.expect(&TokenKind::Colon)?;
483            let else_expr = self.parse_print_ternary()?;
484            return Ok(Expr::Ternary {
485                condition: Box::new(expr),
486                then_expr: Box::new(then_expr),
487                else_expr: Box::new(else_expr),
488                location,
489            });
490        }
491
492        Ok(expr)
493    }
494
495    fn parse_print_or(&mut self) -> Result<Expr> {
496        let mut expr = self.parse_print_and()?;
497
498        while self.match_token(&TokenKind::Or) {
499            let location = self.current_location();
500            let right = self.parse_print_and()?;
501            expr = Expr::Binary {
502                left: Box::new(expr),
503                op: BinaryOp::Or,
504                right: Box::new(right),
505                location,
506            };
507        }
508
509        Ok(expr)
510    }
511
512    fn parse_print_and(&mut self) -> Result<Expr> {
513        let mut expr = self.parse_print_in()?;
514
515        while self.match_token(&TokenKind::And) {
516            let location = self.current_location();
517            let right = self.parse_print_in()?;
518            expr = Expr::Binary {
519                left: Box::new(expr),
520                op: BinaryOp::And,
521                right: Box::new(right),
522                location,
523            };
524        }
525
526        Ok(expr)
527    }
528
529    fn parse_print_in(&mut self) -> Result<Expr> {
530        let expr = self.parse_print_match()?;
531
532        if self.match_token(&TokenKind::In) {
533            let location = self.current_location();
534            let array = self.expect_identifier()?;
535            return Ok(Expr::InArray {
536                key: vec![expr],
537                array,
538                location,
539            });
540        }
541
542        Ok(expr)
543    }
544
545    fn parse_print_match(&mut self) -> Result<Expr> {
546        let expr = self.parse_print_comparison()?;
547
548        let location = self.current_location();
549        if self.match_token(&TokenKind::Match) {
550            let pattern = self.parse_print_comparison()?;
551            return Ok(Expr::Match {
552                expr: Box::new(expr),
553                pattern: Box::new(pattern),
554                negated: false,
555                location,
556            });
557        }
558
559        if self.match_token(&TokenKind::NotMatch) {
560            let pattern = self.parse_print_comparison()?;
561            return Ok(Expr::Match {
562                expr: Box::new(expr),
563                pattern: Box::new(pattern),
564                negated: true,
565                location,
566            });
567        }
568
569        Ok(expr)
570    }
571
572    fn parse_print_comparison(&mut self) -> Result<Expr> {
573        let mut expr = self.parse_concat()?;
574
575        // For print args, we DON'T consume > or >> because they're used for output redirection
576        // But we DO handle >=, <, <=, ==, !=
577        loop {
578            let location = self.current_location();
579            let op = if self.match_token(&TokenKind::Less) {
580                BinaryOp::Lt
581            } else if self.match_token(&TokenKind::LessEqual) {
582                BinaryOp::Le
583            } else if self.match_token(&TokenKind::GreaterEqual) {
584                BinaryOp::Ge
585            } else if self.match_token(&TokenKind::Equal) {
586                BinaryOp::Eq
587            } else if self.match_token(&TokenKind::NotEqual) {
588                BinaryOp::Ne
589            } else {
590                // Don't consume > or >> for print arguments (used for output redirection)
591                break;
592            };
593
594            let right = self.parse_concat()?;
595            expr = Expr::Binary {
596                left: Box::new(expr),
597                op,
598                right: Box::new(right),
599                location,
600            };
601        }
602
603        Ok(expr)
604    }
605
606    fn parse_output_redirect(&mut self) -> Result<Option<OutputRedirect>> {
607        if self.match_token(&TokenKind::Greater) {
608            let target = self.parse_print_arg()?;
609            Ok(Some(OutputRedirect::Truncate(target)))
610        } else if self.match_token(&TokenKind::Append) {
611            let target = self.parse_print_arg()?;
612            Ok(Some(OutputRedirect::Append(target)))
613        } else if self.match_token(&TokenKind::Pipe) {
614            let target = self.parse_print_arg()?;
615            Ok(Some(OutputRedirect::Pipe(target)))
616        } else {
617            Ok(None)
618        }
619    }
620
621    /// Parse an expression
622    fn parse_expression(&mut self) -> Result<Expr> {
623        self.parse_assignment()
624    }
625
626    fn parse_assignment(&mut self) -> Result<Expr> {
627        let expr = self.parse_ternary()?;
628
629        // Check for assignment operators
630        let location = self.current_location();
631        let op = if self.match_token(&TokenKind::Assign) {
632            Some(AssignOp::Assign)
633        } else if self.match_token(&TokenKind::PlusAssign) {
634            Some(AssignOp::AddAssign)
635        } else if self.match_token(&TokenKind::MinusAssign) {
636            Some(AssignOp::SubAssign)
637        } else if self.match_token(&TokenKind::StarAssign) {
638            Some(AssignOp::MulAssign)
639        } else if self.match_token(&TokenKind::SlashAssign) {
640            Some(AssignOp::DivAssign)
641        } else if self.match_token(&TokenKind::PercentAssign) {
642            Some(AssignOp::ModAssign)
643        } else if self.match_token(&TokenKind::CaretAssign) {
644            Some(AssignOp::PowAssign)
645        } else {
646            None
647        };
648
649        if let Some(op) = op {
650            let value = self.parse_assignment()?;
651            return Ok(Expr::Assign {
652                target: Box::new(expr),
653                op,
654                value: Box::new(value),
655                location,
656            });
657        }
658
659        Ok(expr)
660    }
661
662    fn parse_ternary(&mut self) -> Result<Expr> {
663        let expr = self.parse_or()?;
664
665        if self.match_token(&TokenKind::Question) {
666            let location = self.current_location();
667            let then_expr = self.parse_expression()?;
668            self.expect(&TokenKind::Colon)?;
669            let else_expr = self.parse_ternary()?;
670            return Ok(Expr::Ternary {
671                condition: Box::new(expr),
672                then_expr: Box::new(then_expr),
673                else_expr: Box::new(else_expr),
674                location,
675            });
676        }
677
678        Ok(expr)
679    }
680
681    fn parse_or(&mut self) -> Result<Expr> {
682        let mut expr = self.parse_and()?;
683
684        while self.match_token(&TokenKind::Or) {
685            let location = self.current_location();
686            let right = self.parse_and()?;
687            expr = Expr::Binary {
688                left: Box::new(expr),
689                op: BinaryOp::Or,
690                right: Box::new(right),
691                location,
692            };
693        }
694
695        Ok(expr)
696    }
697
698    fn parse_and(&mut self) -> Result<Expr> {
699        let mut expr = self.parse_in()?;
700
701        while self.match_token(&TokenKind::And) {
702            let location = self.current_location();
703            let right = self.parse_in()?;
704            expr = Expr::Binary {
705                left: Box::new(expr),
706                op: BinaryOp::And,
707                right: Box::new(right),
708                location,
709            };
710        }
711
712        Ok(expr)
713    }
714
715    fn parse_in(&mut self) -> Result<Expr> {
716        let expr = self.parse_pipe_getline()?;
717
718        // Check for "in" (array membership)
719        // Format: (expr) in array or expr in array
720        if self.match_token(&TokenKind::In) {
721            let location = self.current_location();
722            let array = self.expect_identifier()?;
723            return Ok(Expr::InArray {
724                key: vec![expr],
725                array,
726                location,
727            });
728        }
729
730        Ok(expr)
731    }
732
733    /// Handle `cmd | getline [var]` syntax
734    fn parse_pipe_getline(&mut self) -> Result<Expr> {
735        let expr = self.parse_match()?;
736
737        // Check for pipe to getline: expr | getline [var]
738        if self.check(&TokenKind::Pipe) {
739            // Look ahead to see if getline follows
740            let saved_pos = self.current;
741            self.advance(); // consume |
742
743            if self.check(&TokenKind::Getline) {
744                let location = self.current_location();
745                self.advance(); // consume getline
746
747                // Optional variable name
748                let var = if let Some(TokenKind::Identifier(name)) = self.peek_kind() {
749                    let name = name.clone();
750                    self.advance();
751                    Some(name)
752                } else {
753                    None
754                };
755
756                return Ok(Expr::Getline {
757                    var,
758                    input: Some(GetlineInput::Pipe(Box::new(expr))),
759                    location,
760                });
761            } else {
762                // Not getline, backtrack
763                self.current = saved_pos;
764            }
765        }
766
767        Ok(expr)
768    }
769
770    fn parse_match(&mut self) -> Result<Expr> {
771        let expr = self.parse_comparison()?;
772
773        let location = self.current_location();
774        if self.match_token(&TokenKind::Match) {
775            let pattern = self.parse_comparison()?;
776            return Ok(Expr::Match {
777                expr: Box::new(expr),
778                pattern: Box::new(pattern),
779                negated: false,
780                location,
781            });
782        }
783
784        if self.match_token(&TokenKind::NotMatch) {
785            let pattern = self.parse_comparison()?;
786            return Ok(Expr::Match {
787                expr: Box::new(expr),
788                pattern: Box::new(pattern),
789                negated: true,
790                location,
791            });
792        }
793
794        Ok(expr)
795    }
796
797    fn parse_comparison(&mut self) -> Result<Expr> {
798        let mut expr = self.parse_concat()?;
799
800        loop {
801            let location = self.current_location();
802            let op = if self.match_token(&TokenKind::Less) {
803                BinaryOp::Lt
804            } else if self.match_token(&TokenKind::LessEqual) {
805                BinaryOp::Le
806            } else if self.match_token(&TokenKind::Greater) {
807                BinaryOp::Gt
808            } else if self.match_token(&TokenKind::GreaterEqual) {
809                BinaryOp::Ge
810            } else if self.match_token(&TokenKind::Equal) {
811                BinaryOp::Eq
812            } else if self.match_token(&TokenKind::NotEqual) {
813                BinaryOp::Ne
814            } else {
815                break;
816            };
817
818            let right = self.parse_concat()?;
819            expr = Expr::Binary {
820                left: Box::new(expr),
821                op,
822                right: Box::new(right),
823                location,
824            };
825        }
826
827        Ok(expr)
828    }
829
830    fn parse_concat(&mut self) -> Result<Expr> {
831        let mut expr = self.parse_additive()?;
832
833        // Concatenation is implicit between adjacent expressions
834        // But we need to be careful about operators that could follow
835        while self.can_start_concat_operand() {
836            let right = self.parse_additive()?;
837            let location = expr.location();
838            expr = Expr::Binary {
839                left: Box::new(expr),
840                op: BinaryOp::Concat,
841                right: Box::new(right),
842                location,
843            };
844        }
845
846        Ok(expr)
847    }
848
849    fn can_start_concat_operand(&mut self) -> bool {
850        if let Some(kind) = self.peek_kind() {
851            matches!(
852                kind,
853                TokenKind::Number(_)
854                    | TokenKind::String(_)
855                    | TokenKind::Identifier(_)
856                    | TokenKind::Dollar
857                    | TokenKind::LeftParen
858                    | TokenKind::Not
859                    | TokenKind::Increment
860                    | TokenKind::Decrement
861            )
862        } else {
863            false
864        }
865    }
866
867    fn parse_additive(&mut self) -> Result<Expr> {
868        let mut expr = self.parse_multiplicative()?;
869
870        loop {
871            let location = self.current_location();
872            let op = if self.match_token(&TokenKind::Plus) {
873                BinaryOp::Add
874            } else if self.match_token(&TokenKind::Minus) {
875                BinaryOp::Sub
876            } else {
877                break;
878            };
879
880            let right = self.parse_multiplicative()?;
881            expr = Expr::Binary {
882                left: Box::new(expr),
883                op,
884                right: Box::new(right),
885                location,
886            };
887        }
888
889        Ok(expr)
890    }
891
892    fn parse_multiplicative(&mut self) -> Result<Expr> {
893        let mut expr = self.parse_power()?;
894
895        loop {
896            let location = self.current_location();
897            let op = if self.match_token(&TokenKind::Star) {
898                BinaryOp::Mul
899            } else if self.match_token(&TokenKind::Slash) {
900                BinaryOp::Div
901            } else if self.match_token(&TokenKind::Percent) {
902                BinaryOp::Mod
903            } else {
904                break;
905            };
906
907            let right = self.parse_power()?;
908            expr = Expr::Binary {
909                left: Box::new(expr),
910                op,
911                right: Box::new(right),
912                location,
913            };
914        }
915
916        Ok(expr)
917    }
918
919    fn parse_power(&mut self) -> Result<Expr> {
920        let expr = self.parse_unary()?;
921
922        // Exponentiation is right-associative
923        if self.match_token(&TokenKind::Caret) {
924            let location = self.current_location();
925            let right = self.parse_power()?;
926            return Ok(Expr::Binary {
927                left: Box::new(expr),
928                op: BinaryOp::Pow,
929                right: Box::new(right),
930                location,
931            });
932        }
933
934        Ok(expr)
935    }
936
937    fn parse_unary(&mut self) -> Result<Expr> {
938        let location = self.current_location();
939
940        if self.match_token(&TokenKind::Not) {
941            let operand = self.parse_unary()?;
942            return Ok(Expr::Unary {
943                op: UnaryOp::Not,
944                operand: Box::new(operand),
945                location,
946            });
947        }
948
949        if self.match_token(&TokenKind::Minus) {
950            let operand = self.parse_unary()?;
951            return Ok(Expr::Unary {
952                op: UnaryOp::Neg,
953                operand: Box::new(operand),
954                location,
955            });
956        }
957
958        if self.match_token(&TokenKind::Plus) {
959            let operand = self.parse_unary()?;
960            return Ok(Expr::Unary {
961                op: UnaryOp::Pos,
962                operand: Box::new(operand),
963                location,
964            });
965        }
966
967        if self.match_token(&TokenKind::Increment) {
968            let operand = self.parse_unary()?;
969            return Ok(Expr::PreIncrement(Box::new(operand), location));
970        }
971
972        if self.match_token(&TokenKind::Decrement) {
973            let operand = self.parse_unary()?;
974            return Ok(Expr::PreDecrement(Box::new(operand), location));
975        }
976
977        self.parse_postfix()
978    }
979
980    fn parse_postfix(&mut self) -> Result<Expr> {
981        let mut expr = self.parse_field()?;
982
983        loop {
984            let location = self.current_location();
985
986            if self.match_token(&TokenKind::Increment) {
987                expr = Expr::PostIncrement(Box::new(expr), location);
988            } else if self.match_token(&TokenKind::Decrement) {
989                expr = Expr::PostDecrement(Box::new(expr), location);
990            } else if self.match_token(&TokenKind::LeftBracket) {
991                // Array access
992                if let Expr::Var(name, _) = expr {
993                    let mut indices = vec![self.parse_expression()?];
994                    while self.match_token(&TokenKind::Comma) {
995                        indices.push(self.parse_expression()?);
996                    }
997                    self.expect(&TokenKind::RightBracket)?;
998                    expr = Expr::ArrayAccess {
999                        array: name,
1000                        indices,
1001                        location,
1002                    };
1003                } else {
1004                    return Err(Error::parser(
1005                        "array access requires variable name",
1006                        location.line,
1007                        location.column,
1008                    ));
1009                }
1010            } else {
1011                break;
1012            }
1013        }
1014
1015        Ok(expr)
1016    }
1017
1018    fn parse_field(&mut self) -> Result<Expr> {
1019        if self.match_token(&TokenKind::Dollar) {
1020            let location = self.current_location();
1021            let expr = self.parse_field()?;
1022            return Ok(Expr::Field(Box::new(expr), location));
1023        }
1024
1025        self.parse_primary()
1026    }
1027
1028    fn parse_primary(&mut self) -> Result<Expr> {
1029        let location = self.current_location();
1030
1031        // Number literal
1032        if let Some(TokenKind::Number(n)) = self.peek_kind() {
1033            let n = *n;
1034            self.advance();
1035            return Ok(Expr::Number(n, location));
1036        }
1037
1038        // String literal
1039        if let Some(TokenKind::String(s)) = self.peek_kind() {
1040            let s = s.clone();
1041            self.advance();
1042            return Ok(Expr::String(s, location));
1043        }
1044
1045        // Regex literal
1046        if let Some(TokenKind::Regex(r)) = self.peek_kind() {
1047            let r = r.clone();
1048            self.advance();
1049            return Ok(Expr::Regex(r, location));
1050        }
1051
1052        // Identifier (variable or function call)
1053        if let Some(TokenKind::Identifier(name)) = self.peek_kind() {
1054            let name = name.clone();
1055            self.advance();
1056
1057            // Check for function call
1058            if self.match_token(&TokenKind::LeftParen) {
1059                let mut args = Vec::new();
1060                if !self.check(&TokenKind::RightParen) {
1061                    args.push(self.parse_expression()?);
1062                    while self.match_token(&TokenKind::Comma) {
1063                        args.push(self.parse_expression()?);
1064                    }
1065                }
1066                self.expect(&TokenKind::RightParen)?;
1067                return Ok(Expr::Call {
1068                    name,
1069                    args,
1070                    location,
1071                });
1072            }
1073
1074            return Ok(Expr::Var(name, location));
1075        }
1076
1077        // Getline
1078        if self.match_token(&TokenKind::Getline) {
1079            let var = if let Some(TokenKind::Identifier(name)) = self.peek_kind() {
1080                let name = name.clone();
1081                self.advance();
1082                Some(name)
1083            } else {
1084                None
1085            };
1086
1087            let input = if self.match_token(&TokenKind::Less) {
1088                Some(GetlineInput::File(Box::new(self.parse_primary()?)))
1089            } else {
1090                None
1091            };
1092
1093            return Ok(Expr::Getline {
1094                var,
1095                input,
1096                location,
1097            });
1098        }
1099
1100        // Parenthesized expression
1101        if self.match_token(&TokenKind::LeftParen) {
1102            let expr = self.parse_expression()?;
1103            self.expect(&TokenKind::RightParen)?;
1104            return Ok(Expr::Group(Box::new(expr), location));
1105        }
1106
1107        Err(Error::parser(
1108            format!("unexpected token {:?}", self.peek_kind()),
1109            location.line,
1110            location.column,
1111        ))
1112    }
1113
1114    // ===== Helper methods =====
1115
1116    fn peek_kind(&self) -> Option<&TokenKind> {
1117        self.tokens.get(self.current).map(|t| &t.kind)
1118    }
1119
1120    fn current_location(&self) -> SourceLocation {
1121        self.tokens
1122            .get(self.current)
1123            .map(|t| t.location)
1124            .unwrap_or(SourceLocation::new(0, 0))
1125    }
1126
1127    fn is_at_end(&self) -> bool {
1128        matches!(self.peek_kind(), None | Some(TokenKind::Eof))
1129    }
1130
1131    fn check(&self, kind: &TokenKind) -> bool {
1132        self.peek_kind()
1133            .map(|k| std::mem::discriminant(k) == std::mem::discriminant(kind))
1134            .unwrap_or(false)
1135    }
1136
1137    fn advance(&mut self) -> Option<&Token> {
1138        if !self.is_at_end() {
1139            self.current += 1;
1140        }
1141        self.tokens.get(self.current - 1)
1142    }
1143
1144    fn match_token(&mut self, kind: &TokenKind) -> bool {
1145        if self.check(kind) {
1146            self.advance();
1147            true
1148        } else {
1149            false
1150        }
1151    }
1152
1153    fn expect(&mut self, kind: &TokenKind) -> Result<&Token> {
1154        if self.check(kind) {
1155            Ok(self.advance().unwrap())
1156        } else {
1157            let loc = self.current_location();
1158            Err(Error::parser(
1159                format!("expected {:?}, found {:?}", kind, self.peek_kind()),
1160                loc.line,
1161                loc.column,
1162            ))
1163        }
1164    }
1165
1166    fn expect_identifier(&mut self) -> Result<String> {
1167        if let Some(TokenKind::Identifier(name)) = self.peek_kind() {
1168            let name = name.clone();
1169            self.advance();
1170            Ok(name)
1171        } else {
1172            let loc = self.current_location();
1173            Err(Error::parser(
1174                format!("expected identifier, found {:?}", self.peek_kind()),
1175                loc.line,
1176                loc.column,
1177            ))
1178        }
1179    }
1180
1181    fn skip_newlines(&mut self) {
1182        while self.match_token(&TokenKind::Newline) {}
1183    }
1184
1185    fn skip_terminators(&mut self) {
1186        while self.match_token(&TokenKind::Newline) || self.match_token(&TokenKind::Semicolon) {}
1187    }
1188
1189    fn can_start_expression(&self) -> bool {
1190        self.peek_kind()
1191            .map(|k| k.can_start_expression())
1192            .unwrap_or(false)
1193    }
1194}
1195
1196#[cfg(test)]
1197mod tests {
1198    use super::*;
1199    use crate::lexer::Lexer;
1200
1201    fn parse(source: &str) -> Result<Program> {
1202        let mut lexer = Lexer::new(source);
1203        let tokens = lexer.tokenize()?;
1204        let mut parser = Parser::new(tokens);
1205        parser.parse()
1206    }
1207
1208    #[test]
1209    fn test_simple_print() {
1210        let program = parse(r#"{ print "hello" }"#).unwrap();
1211        assert_eq!(program.rules.len(), 1);
1212    }
1213
1214    #[test]
1215    fn test_begin_end() {
1216        let program = parse(r#"BEGIN { x = 1 } END { print x }"#).unwrap();
1217        assert_eq!(program.rules.len(), 2);
1218        assert!(matches!(program.rules[0].pattern, Some(Pattern::Begin)));
1219        assert!(matches!(program.rules[1].pattern, Some(Pattern::End)));
1220    }
1221
1222    #[test]
1223    fn test_regex_pattern() {
1224        let program = parse(r#"/foo/ { print }"#).unwrap();
1225        assert_eq!(program.rules.len(), 1);
1226        assert!(matches!(
1227            &program.rules[0].pattern,
1228            Some(Pattern::Regex(r)) if r == "foo"
1229        ));
1230    }
1231
1232    #[test]
1233    fn test_arithmetic() {
1234        let program = parse(r#"{ x = 1 + 2 * 3 }"#).unwrap();
1235        assert_eq!(program.rules.len(), 1);
1236    }
1237
1238    #[test]
1239    fn test_function_def() {
1240        let program = parse(r#"function add(a, b) { return a + b }"#).unwrap();
1241        assert_eq!(program.functions.len(), 1);
1242        assert_eq!(program.functions[0].name, "add");
1243        assert_eq!(program.functions[0].params, vec!["a", "b"]);
1244    }
1245
1246    #[test]
1247    fn test_if_else() {
1248        let program = parse(r#"{ if (x) print 1; else print 2 }"#).unwrap();
1249        assert_eq!(program.rules.len(), 1);
1250    }
1251
1252    #[test]
1253    fn test_while_loop() {
1254        let program = parse(r#"{ while (x < 10) x++ }"#).unwrap();
1255        assert_eq!(program.rules.len(), 1);
1256    }
1257
1258    #[test]
1259    fn test_for_loop() {
1260        let program = parse(r#"{ for (i=0; i<10; i++) print i }"#).unwrap();
1261        assert_eq!(program.rules.len(), 1);
1262    }
1263
1264    #[test]
1265    fn test_for_in_loop() {
1266        let program = parse(r#"{ for (k in a) print k }"#).unwrap();
1267        assert_eq!(program.rules.len(), 1);
1268    }
1269
1270    #[test]
1271    fn test_do_while() {
1272        let program = parse(r#"{ do { x++ } while (x < 10) }"#).unwrap();
1273        assert_eq!(program.rules.len(), 1);
1274    }
1275
1276    #[test]
1277    fn test_delete() {
1278        let program = parse(r#"{ delete a[1] }"#).unwrap();
1279        assert_eq!(program.rules.len(), 1);
1280    }
1281
1282    #[test]
1283    fn test_delete_array() {
1284        let program = parse(r#"{ delete a }"#).unwrap();
1285        assert_eq!(program.rules.len(), 1);
1286    }
1287
1288    #[test]
1289    fn test_break_continue() {
1290        let program = parse(r#"{ break; continue }"#).unwrap();
1291        assert_eq!(program.rules.len(), 1);
1292    }
1293
1294    #[test]
1295    fn test_next_nextfile() {
1296        let program = parse(r#"{ next } { nextfile }"#).unwrap();
1297        assert_eq!(program.rules.len(), 2);
1298    }
1299
1300    #[test]
1301    fn test_exit() {
1302        let program = parse(r#"{ exit 0 }"#).unwrap();
1303        assert_eq!(program.rules.len(), 1);
1304    }
1305
1306    #[test]
1307    fn test_return() {
1308        let program = parse(r#"function f() { return 42 }"#).unwrap();
1309        assert_eq!(program.functions.len(), 1);
1310    }
1311
1312    #[test]
1313    fn test_printf() {
1314        let program = parse(r#"{ printf "%d", x }"#).unwrap();
1315        assert_eq!(program.rules.len(), 1);
1316    }
1317
1318    #[test]
1319    fn test_getline() {
1320        let program = parse(r#"{ getline x < "file" }"#).unwrap();
1321        assert_eq!(program.rules.len(), 1);
1322    }
1323
1324    #[test]
1325    fn test_pipe_getline() {
1326        let program = parse(r#"{ "cmd" | getline x }"#).unwrap();
1327        assert_eq!(program.rules.len(), 1);
1328    }
1329
1330    #[test]
1331    fn test_output_redirect() {
1332        let program = parse(r#"{ print "x" > "file" }"#).unwrap();
1333        assert_eq!(program.rules.len(), 1);
1334    }
1335
1336    #[test]
1337    fn test_output_append() {
1338        let program = parse(r#"{ print "x" >> "file" }"#).unwrap();
1339        assert_eq!(program.rules.len(), 1);
1340    }
1341
1342    #[test]
1343    fn test_output_pipe() {
1344        let program = parse(r#"{ print "x" | "cmd" }"#).unwrap();
1345        assert_eq!(program.rules.len(), 1);
1346    }
1347
1348    #[test]
1349    fn test_ternary() {
1350        let program = parse(r#"{ x = a ? b : c }"#).unwrap();
1351        assert_eq!(program.rules.len(), 1);
1352    }
1353
1354    #[test]
1355    fn test_logical_and_or() {
1356        let program = parse(r#"{ x = a && b || c }"#).unwrap();
1357        assert_eq!(program.rules.len(), 1);
1358    }
1359
1360    #[test]
1361    fn test_array_in() {
1362        let program = parse(r#"{ x = (1 in a) }"#).unwrap();
1363        assert_eq!(program.rules.len(), 1);
1364    }
1365
1366    #[test]
1367    fn test_regex_match() {
1368        let program = parse(r#"{ x = ($0 ~ /foo/) }"#).unwrap();
1369        assert_eq!(program.rules.len(), 1);
1370    }
1371
1372    #[test]
1373    fn test_concatenation() {
1374        let program = parse(r#"{ x = a b c }"#).unwrap();
1375        assert_eq!(program.rules.len(), 1);
1376    }
1377
1378    #[test]
1379    fn test_field_access() {
1380        let program = parse(r#"{ print $1, $NF, $(2+1) }"#).unwrap();
1381        assert_eq!(program.rules.len(), 1);
1382    }
1383
1384    #[test]
1385    fn test_array_multi_index() {
1386        let program = parse(r#"{ a[1,2,3] = x }"#).unwrap();
1387        assert_eq!(program.rules.len(), 1);
1388    }
1389
1390    #[test]
1391    fn test_function_call() {
1392        let program = parse(r#"{ x = substr(s, 1, 5) }"#).unwrap();
1393        assert_eq!(program.rules.len(), 1);
1394    }
1395
1396    #[test]
1397    fn test_pre_increment() {
1398        let program = parse(r#"{ ++x; --y }"#).unwrap();
1399        assert_eq!(program.rules.len(), 1);
1400    }
1401
1402    #[test]
1403    fn test_post_increment() {
1404        let program = parse(r#"{ x++; y-- }"#).unwrap();
1405        assert_eq!(program.rules.len(), 1);
1406    }
1407
1408    #[test]
1409    fn test_compound_assign() {
1410        let program = parse(r#"{ x += 1; y -= 1; z *= 2; w /= 2; v %= 3; p ^= 2 }"#).unwrap();
1411        assert_eq!(program.rules.len(), 1);
1412    }
1413
1414    #[test]
1415    fn test_range_pattern() {
1416        let program = parse(r#"/start/,/end/ { print }"#).unwrap();
1417        assert_eq!(program.rules.len(), 1);
1418        assert!(matches!(
1419            &program.rules[0].pattern,
1420            Some(Pattern::Range { .. })
1421        ));
1422    }
1423
1424    #[test]
1425    fn test_expression_pattern() {
1426        let program = parse(r#"NR > 5 { print }"#).unwrap();
1427        assert_eq!(program.rules.len(), 1);
1428    }
1429
1430    #[test]
1431    fn test_beginfile_endfile() {
1432        let program = parse(r#"BEGINFILE { x = 1 } ENDFILE { print }"#).unwrap();
1433        assert_eq!(program.rules.len(), 2);
1434        assert!(matches!(program.rules[0].pattern, Some(Pattern::BeginFile)));
1435        assert!(matches!(program.rules[1].pattern, Some(Pattern::EndFile)));
1436    }
1437
1438    #[test]
1439    fn test_empty_statement() {
1440        let program = parse(r#"{ ; ; ; }"#).unwrap();
1441        assert_eq!(program.rules.len(), 1);
1442    }
1443
1444    #[test]
1445    fn test_block_statement() {
1446        let program = parse(r#"{ { { x = 1 } } }"#).unwrap();
1447        assert_eq!(program.rules.len(), 1);
1448    }
1449
1450    #[test]
1451    fn test_multiple_rules() {
1452        let program = parse(r#"BEGIN { } { } END { }"#).unwrap();
1453        assert_eq!(program.rules.len(), 3);
1454    }
1455
1456    #[test]
1457    fn test_parenthesized_expression() {
1458        let program = parse(r#"{ x = (1 + 2) * 3 }"#).unwrap();
1459        assert_eq!(program.rules.len(), 1);
1460    }
1461
1462    #[test]
1463    fn test_unary_ops() {
1464        let program = parse(r#"{ x = -a + +b }"#).unwrap();
1465        assert_eq!(program.rules.len(), 1);
1466    }
1467
1468    #[test]
1469    fn test_not_operator() {
1470        let program = parse(r#"{ x = !a }"#).unwrap();
1471        assert_eq!(program.rules.len(), 1);
1472    }
1473
1474    #[test]
1475    fn test_comparison_ops() {
1476        let program =
1477            parse(r#"{ x = a < b && b <= c && c > d && d >= e && e == f && f != g }"#).unwrap();
1478        assert_eq!(program.rules.len(), 1);
1479    }
1480
1481    #[test]
1482    fn test_exponentiation() {
1483        let program = parse(r#"{ x = 2^3^4 }"#).unwrap(); // right associative
1484        assert_eq!(program.rules.len(), 1);
1485    }
1486}