1use crate::{
2 Lexer, Program,
3 ast::{Action, Expression, Rule, Statement},
4 token::{Token, TokenKind},
5};
6
7#[derive(Debug)]
8pub struct Parser<'a> {
9 lexer: Lexer<'a>,
10 current_token: Token<'a>,
11}
12
13impl<'a> Parser<'a> {
14 pub fn new(mut lexer: Lexer<'a>) -> Self {
15 lexer.set_allow_regex(true);
17 let current_token = lexer.next_token();
18 lexer.set_allow_regex(false);
19
20 Parser {
21 lexer,
22 current_token,
23 }
24 }
25
26 fn next_token(&mut self) {
27 self.next_token_with_regex(false);
28 }
29
30 fn next_token_with_regex(&mut self, allow_regex: bool) {
31 self.lexer.set_allow_regex(allow_regex);
32 self.current_token = self.lexer.next_token();
33 self.lexer.set_allow_regex(false);
34 }
35
36 fn is_eof(&self) -> bool {
37 self.current_token.kind == TokenKind::Eof
38 }
39
40 fn parse_next_rule(&mut self) -> Option<Rule<'a>> {
41 match &self.current_token.kind {
42 TokenKind::Begin => {
43 self.next_token();
44 match self.parse_action() {
45 Rule::Action(action) => Some(Rule::Begin(action)),
46 _ => panic!("Expected action after BEGIN"),
47 }
48 }
49 TokenKind::NewLine => {
50 self.next_token_with_regex(true);
51 self.parse_next_rule()
52 }
53 TokenKind::Eof => None,
54 TokenKind::LeftCurlyBrace => Some(self.parse_action()),
55 TokenKind::End => {
56 self.next_token();
57 match self.parse_action() {
58 Rule::Action(action) => Some(Rule::End(action)),
59 _ => panic!("Expected action after END"),
60 }
61 }
62 TokenKind::Regex => {
63 let pattern = Some(Expression::Regex(self.current_token.literal));
64 self.next_token();
65 if self.current_token.kind == TokenKind::LeftCurlyBrace {
66 match self.parse_action() {
67 Rule::Action(action) => Some(Rule::PatternAction {
68 pattern,
69 action: Some(action),
70 }),
71 _ => panic!("Expected action after regex pattern"),
72 }
73 } else {
74 Some(Rule::PatternAction {
75 pattern,
76 action: None,
77 })
78 }
79 }
80 _ => panic!(
81 "parse_next_rule not yet implemented, found token: {:?}",
82 self.current_token
83 ),
84 }
85 }
86
87 fn parse_action(&mut self) -> Rule<'a> {
88 self.next_token(); let pattern = None;
91
92 let mut statements = Vec::new();
93 while self.current_token.kind == TokenKind::NewLine {
94 self.next_token();
95 }
96
97 if self.current_token.kind == TokenKind::Print {
98 let print_statement = self.parse_print_function();
99 statements.push(print_statement);
100 }
101
102 while self.current_token.kind != TokenKind::RightCurlyBrace
103 && self.current_token.kind != TokenKind::Eof
104 {
105 self.next_token();
106 }
107
108 if pattern.is_some() {
109 Rule::PatternAction {
110 pattern,
111 action: Some(Action { statements }),
112 }
113 } else {
114 Rule::Action(Action { statements })
115 }
116 }
117
118 fn parse_print_function(&mut self) -> Statement<'a> {
119 let mut expressions = Vec::new();
120 self.next_token();
121
122 while self.current_token.kind != TokenKind::RightCurlyBrace
123 && self.current_token.kind != TokenKind::Eof
124 {
125 let expression = self.parse_expression();
126 expressions.push(expression);
127 }
128
129 Statement::Print(expressions)
130 }
131
132 fn parse_expression(&mut self) -> Expression<'a> {
133 let mut left = self.parse_primary_expression();
134
135 while matches!(
136 self.current_token.kind,
137 TokenKind::Plus
138 | TokenKind::Minus
139 | TokenKind::Asterisk
140 | TokenKind::Division
141 | TokenKind::Percent
142 | TokenKind::Caret
143 ) {
144 let operator = self.current_token.clone();
145 self.next_token();
146 let right = self.parse_primary_expression();
147
148 left = Expression::Infix {
149 left: Box::new(left),
150 operator,
151 right: Box::new(right),
152 };
153 }
154
155 left
156 }
157
158 fn parse_primary_expression(&mut self) -> Expression<'a> {
159 match self.current_token.kind {
160 TokenKind::String => {
161 let expression = Expression::String(self.current_token.literal);
162 self.next_token();
163 expression
164 }
165 TokenKind::Number => {
166 let expression = if let Ok(value) = self.current_token.literal.parse::<f64>() {
167 Expression::Number(value)
168 } else {
169 todo!()
170 };
171 self.next_token();
172 expression
173 }
174 TokenKind::LeftParen => {
175 self.next_token();
176 let expression = self.parse_expression();
177 if self.current_token.kind == TokenKind::RightParen {
178 self.next_token();
179 }
180 expression
181 }
182 _ => {
183 todo!()
184 }
185 }
186 }
187
188 pub fn parse_program(&mut self) -> Program<'_> {
189 let mut program = Program::new();
190
191 while !self.is_eof() {
192 match self.parse_next_rule() {
193 Some(Rule::Begin(action)) => program.add_begin_block(Rule::Begin(action)),
194 Some(Rule::End(action)) => program.add_end_block(Rule::End(action)),
195 Some(rule) => program.add_rule(rule),
196 None => {}
197 }
198 self.next_token_with_regex(true);
199 }
200
201 program
202 }
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208
209 #[test]
210 fn create_parser() {
211 let mut parser = Parser::new(Lexer::new("42 == 42"));
212
213 assert_eq!(parser.current_token.literal, "42");
214 parser.next_token();
215 assert_eq!(parser.current_token.literal, "==");
216 }
217
218 #[test]
219 fn parse_empty_program() {
220 let mut parser = Parser::new(Lexer::new(""));
221
222 let program = parser.parse_program();
223
224 assert_eq!(program.len(), 0);
225 }
226
227 #[test]
228 fn parse_action_without_pattern() {
229 let mut parser = Parser::new(Lexer::new("{ print }"));
230
231 let program = parser.parse_program();
232
233 assert_eq!(program.len(), 1);
234 assert_eq!("{ print }", program.to_string());
235 }
236
237 #[test]
238 fn parse_action_with_leading_newlines() {
239 let mut parser = Parser::new(Lexer::new("\n\n{ print }"));
240
241 let program = parser.parse_program();
242
243 assert_eq!(program.len(), 1);
244 assert_eq!("{ print }", program.to_string());
245 }
246
247 #[test]
248 fn parse_begin_block() {
249 let mut parser = Parser::new(Lexer::new("BEGIN { print }"));
250
251 let program = parser.parse_program();
252
253 assert_eq!(program.len(), 1);
254 assert_eq!("BEGIN { print }", program.to_string());
255 }
256
257 #[test]
258 fn parse_end_block() {
259 let mut parser = Parser::new(Lexer::new("END { print 42 }"));
260
261 let program = parser.parse_program();
262
263 assert_eq!(program.len(), 1);
264 assert_eq!("END { print 42 }", program.to_string());
265 }
266
267 #[test]
268 fn parse_regex_pattern_action() {
269 let mut parser = Parser::new(Lexer::new("/foo/ { print }"));
270
271 let program = parser.parse_program();
272
273 assert_eq!(program.len(), 1);
274 assert_eq!("/foo/ { print }", program.to_string());
275 }
276
277 #[test]
278 fn parse_print_infix_expression() {
279 let mut parser = Parser::new(Lexer::new("BEGIN { print 1 + 2 }"));
280
281 let program = parser.parse_program();
282 let mut begin_blocks = program.begin_blocks_iter();
283 let rule = begin_blocks.next().expect("expected begin block");
284
285 let statements = match rule {
286 Rule::Begin(Action { statements }) => statements,
287 _ => panic!("expected begin rule"),
288 };
289
290 let exprs = match &statements[0] {
291 Statement::Print(expressions) => expressions,
292 };
293
294 match &exprs[0] {
295 Expression::Infix {
296 left,
297 operator,
298 right,
299 } => {
300 assert!(matches!(**left, Expression::Number(1.0)));
301 assert_eq!(operator.kind, TokenKind::Plus);
302 assert!(matches!(**right, Expression::Number(2.0)));
303 }
304 _ => panic!("expected infix expression"),
305 }
306 }
307
308 #[test]
309 fn parse_print_parenthesized_expression() {
310 let mut parser = Parser::new(Lexer::new("BEGIN { print (1 + 2) * 3 }"));
311
312 let program = parser.parse_program();
313 let mut begin_blocks = program.begin_blocks_iter();
314 let rule = begin_blocks.next().expect("expected begin block");
315
316 let statements = match rule {
317 Rule::Begin(Action { statements }) => statements,
318 _ => panic!("expected begin rule"),
319 };
320
321 let exprs = match &statements[0] {
322 Statement::Print(expressions) => expressions,
323 };
324
325 match &exprs[0] {
326 Expression::Infix {
327 left,
328 operator,
329 right,
330 } => {
331 assert_eq!(operator.kind, TokenKind::Asterisk);
332 assert!(matches!(**right, Expression::Number(3.0)));
333 assert!(matches!(**left, Expression::Infix { .. }));
334 }
335 _ => panic!("expected infix expression"),
336 }
337 }
338
339 #[test]
340 fn parse_print_concatenation() {
341 let mut parser = Parser::new(Lexer::new(r#"BEGIN { print "Value:" 42 }"#));
342
343 let program = parser.parse_program();
344 let mut begin_blocks = program.begin_blocks_iter();
345 let rule = begin_blocks.next().expect("expected begin block");
346
347 let statements = match rule {
348 Rule::Begin(Action { statements }) => statements,
349 _ => panic!("expected begin rule"),
350 };
351
352 let exprs = match &statements[0] {
353 Statement::Print(expressions) => expressions,
354 };
355
356 assert_eq!(exprs.len(), 2);
357 assert!(matches!(exprs[0], Expression::String("Value:")));
358 assert!(matches!(exprs[1], Expression::Number(42.0)));
359 }
360}