1use crate::error::{ParseError, ParseResult};
4use crate::tokens::{Token, TokenType};
5use crate::lexer::Lexer;
6use crate::arena::{Arena, AstNode, NodeId, TokenId};
7use crate::arena::AddOp;
8use crate::arena::MultExprOp;
9use crate::arena::{EqualityOp, ComparisonOp, UnaryOp};
10use crate::arena::{
11 JmsSelectorNode,
12 OrExpressionNode,
13 AndExpressionNode,
14 EqualityExpressionNode,
15 ComparisonExpressionNode,
16 AddExpressionNode,
17 MultExprNode,
18 UnaryExprNode,
19 PrimaryExprNode,
20 LiteralNode,
21 StringLiteralNode,
22 VariableNode
23};
24
25pub struct Parser {
27 lexer: Lexer,
29 current_token: Token,
31 lookahead: Vec<Token>,
33 arena: Arena,
35 current_token_id: Option<TokenId>,
37 input: String,
39}
40
41#[derive(Debug, Clone, PartialEq)]
43enum InElementType {
44 Integer,
45 Float,
46 StringLit,
47}
48
49impl Parser {
50 pub fn new(input: String) -> ParseResult<Self> {
52 let mut lexer = Lexer::new(input.clone());
53 let current_token = lexer.next_token()?;
54
55 Ok(Parser {
56 lexer,
57 current_token,
58 lookahead: Vec::new(),
59 arena: Arena::new(),
60 current_token_id: None,
61 input,
62 })
63 }
64
65 pub fn arena(&self) -> &Arena {
67 &self.arena
68 }
69
70 pub fn arena_mut(&mut self) -> &mut Arena {
72 &mut self.arena
73 }
74
75 pub fn input(&self) -> &str {
77 &self.input
78 }
79
80 pub fn parse(&mut self) -> ParseResult<NodeId> {
82 self.parse_jms_selector()
83 }
84
85 fn parse_jms_selector(&mut self) -> ParseResult<NodeId> {
87 let begin_token = self.alloc_current_token();
88
89 let child = self.parse_or_expression()?;
90
91 self.validate_boolean_root(child)?;
92
93 self.expect_token(TokenType::EOF)?;
94 let end_token = self.current_token_id.unwrap_or(begin_token);
95
96 let mut node = JmsSelectorNode::new(begin_token, end_token);
97 node.children.push(child);
98
99 let node_id = self.arena.alloc_node(AstNode::JmsSelector(node));
100 self.set_parent(child, node_id);
101 Ok(node_id)
102 }
103
104 fn parse_or_expression(&mut self) -> ParseResult<NodeId> {
106 let begin_token = self.alloc_current_token();
107 let mut children = Vec::new();
108
109 let first = self.parse_and_expression()?;
110 children.push(first);
111
112 while self.current_token.token_type == TokenType::OR {
113 self.consume_token()?;
114 let child = self.parse_and_expression()?;
115 children.push(child);
116 }
117
118 let end_token = self.current_token_id.unwrap_or(begin_token);
119 let mut node = OrExpressionNode::new(begin_token, end_token);
120 node.children = children.clone();
121
122 let node_id = self.arena.alloc_node(AstNode::OrExpression(node));
123 for child in children {
124 self.set_parent(child, node_id);
125 }
126 Ok(node_id)
127 }
128
129 fn parse_and_expression(&mut self) -> ParseResult<NodeId> {
131 let begin_token = self.alloc_current_token();
132 let mut children = Vec::new();
133
134 let first = self.parse_equality_expression()?;
135 children.push(first);
136
137 while self.current_token.token_type == TokenType::AND {
138 self.consume_token()?;
139 let child = self.parse_equality_expression()?;
140 children.push(child);
141 }
142
143 let end_token = self.current_token_id.unwrap_or(begin_token);
144 let mut node = AndExpressionNode::new(begin_token, end_token);
145 node.children = children.clone();
146
147 let node_id = self.arena.alloc_node(AstNode::AndExpression(node));
148 for child in children {
149 self.set_parent(child, node_id);
150 }
151 Ok(node_id)
152 }
153
154 fn parse_equality_expression(&mut self) -> ParseResult<NodeId> {
156 let begin_token = self.alloc_current_token();
157 let mut children: Vec<NodeId> = Vec::new();
158 let mut operators: Vec<EqualityOp> = Vec::new();
159
160 {
161 let child = self.parse_comparison_expression()?;
162 children.push(child);
163 }
164 loop {
165 if self.current_token.token_type == TokenType::EQ
166 {
167 operators.push(EqualityOp::Equal);
168 self.expect_token(TokenType::EQ)?;
169 {
170 let child = self.parse_comparison_expression()?;
171 children.push(child);
172 }
173 }
174 else if self.current_token.token_type == TokenType::NE
175 {
176 operators.push(EqualityOp::NotEqual);
177 self.expect_token(TokenType::NE)?;
178 {
179 let child = self.parse_comparison_expression()?;
180 children.push(child);
181 }
182 }
183 else if
184 self.current_token.token_type == TokenType::IS
185 && self.lookahead_type(1) == Some(TokenType::NULL)
186 {
187 operators.push(EqualityOp::IsNull);
188 self.expect_token(TokenType::IS)?;
189 self.expect_token(TokenType::NULL)?;
190 }
191 else if self.current_token.token_type == TokenType::IS
192 {
193 operators.push(EqualityOp::IsNotNull);
194 self.expect_token(TokenType::IS)?;
195 self.expect_token(TokenType::NOT)?;
196 self.expect_token(TokenType::NULL)?;
197 }
198 else {
199 break;
200 }
201 }
202
203 let end_token = self.current_token_id.unwrap_or(begin_token);
204 let mut node = EqualityExpressionNode::new(begin_token, end_token);
205 node.children = children.clone();
206 node.operators = operators;
207 let node_id = self.arena.alloc_node(AstNode::EqualityExpression(node));
208 for child_id in children {
209 self.set_parent(child_id, node_id);
210 }
211 Ok(node_id)
212 }
213
214 fn parse_comparison_expression(&mut self) -> ParseResult<NodeId> {
216 let begin_token = self.alloc_current_token();
217 let mut children: Vec<NodeId> = Vec::new();
218 let mut operators: Vec<ComparisonOp> = Vec::new();
219
220 {
221 let child = self.parse_add_expression()?;
222 children.push(child);
223 }
224 loop {
225 if self.current_token.token_type == TokenType::GT
226 {
227 operators.push(ComparisonOp::GreaterThan);
228 self.expect_token(TokenType::GT)?;
229 {
230 let child = self.parse_add_expression()?;
231 children.push(child);
232 }
233 }
234 else if self.current_token.token_type == TokenType::GE
235 {
236 operators.push(ComparisonOp::GreaterThanEqual);
237 self.expect_token(TokenType::GE)?;
238 {
239 let child = self.parse_add_expression()?;
240 children.push(child);
241 }
242 }
243 else if self.current_token.token_type == TokenType::LT
244 {
245 operators.push(ComparisonOp::LessThan);
246 self.expect_token(TokenType::LT)?;
247 {
248 let child = self.parse_add_expression()?;
249 children.push(child);
250 }
251 }
252 else if self.current_token.token_type == TokenType::LE
253 {
254 operators.push(ComparisonOp::LessThanEqual);
255 self.expect_token(TokenType::LE)?;
256 {
257 let child = self.parse_add_expression()?;
258 children.push(child);
259 }
260 }
261 else if self.current_token.token_type == TokenType::LIKE
262 {
263 self.expect_token(TokenType::LIKE)?;
264 {
265 let child = self.parse_string_literal()?;
266 children.push(child);
267 }
268 if self.current_token.token_type == TokenType::ESCAPE
269 {
270 operators.push(ComparisonOp::LikeEscape);
271 self.expect_token(TokenType::ESCAPE)?;
272 {
273 let child = self.parse_string_literal()?;
274 children.push(child);
275 }
276 } else {
277 operators.push(ComparisonOp::Like);
278 }
279 }
280 else if
281 self.current_token.token_type == TokenType::NOT
282 && self.lookahead_type(1) == Some(TokenType::LIKE)
283 {
284 self.expect_token(TokenType::NOT)?;
285 self.expect_token(TokenType::LIKE)?;
286 {
287 let child = self.parse_string_literal()?;
288 children.push(child);
289 }
290 if self.current_token.token_type == TokenType::ESCAPE
291 {
292 operators.push(ComparisonOp::NotLikeEscape);
293 self.expect_token(TokenType::ESCAPE)?;
294 {
295 let child = self.parse_string_literal()?;
296 children.push(child);
297 }
298 } else {
299 operators.push(ComparisonOp::NotLike);
300 }
301 }
302 else if self.current_token.token_type == TokenType::BETWEEN
303 {
304 operators.push(ComparisonOp::Between);
305 self.expect_token(TokenType::BETWEEN)?;
306 {
307 let child = self.parse_between_bound()?;
308 children.push(child);
309 }
310 self.expect_token(TokenType::AND)?;
311 let low_id = *children.last().unwrap();
312 {
313 let child = self.parse_between_bound()?;
314 children.push(child);
315 }
316 let high_id = *children.last().unwrap();
317 self.validate_between_bounds(low_id, high_id)?;
318 }
319 else if
320 self.current_token.token_type == TokenType::NOT
321 && self.lookahead_type(1) == Some(TokenType::BETWEEN)
322 {
323 operators.push(ComparisonOp::NotBetween);
324 self.expect_token(TokenType::NOT)?;
325 self.expect_token(TokenType::BETWEEN)?;
326 {
327 let child = self.parse_between_bound()?;
328 children.push(child);
329 }
330 self.expect_token(TokenType::AND)?;
331 let low_id = *children.last().unwrap();
332 {
333 let child = self.parse_between_bound()?;
334 children.push(child);
335 }
336 let high_id = *children.last().unwrap();
337 self.validate_between_bounds(low_id, high_id)?;
338 }
339 else if self.current_token.token_type == TokenType::IN
340 {
341 operators.push(ComparisonOp::In);
342 self.expect_token(TokenType::IN)?;
343 self.expect_token(TokenType::LPAREN)?;
344 let first_type = self.classify_current_token_for_in()?;
345 {
346 let child = self.parse_in_element()?;
347 children.push(child);
348 }
349 while self.current_token.token_type == TokenType::COMMA
350 {
351 self.expect_token(TokenType::COMMA)?;
352 let elem_type = self.classify_current_token_for_in()?;
353 self.check_in_type_consistency(&first_type, &elem_type)?;
354 {
355 let child = self.parse_in_element()?;
356 children.push(child);
357 }
358 }
359 self.expect_token(TokenType::RPAREN)?;
360 }
361 else if
362 self.current_token.token_type == TokenType::NOT
363 && self.lookahead_type(1) == Some(TokenType::IN)
364 && self.lookahead_type(2) == Some(TokenType::LPAREN)
365 {
366 operators.push(ComparisonOp::NotIn);
367 self.expect_token(TokenType::NOT)?;
368 self.expect_token(TokenType::IN)?;
369 self.expect_token(TokenType::LPAREN)?;
370 let first_type = self.classify_current_token_for_in()?;
371 {
372 let child = self.parse_in_element()?;
373 children.push(child);
374 }
375 while self.current_token.token_type == TokenType::COMMA
376 {
377 self.expect_token(TokenType::COMMA)?;
378 let elem_type = self.classify_current_token_for_in()?;
379 self.check_in_type_consistency(&first_type, &elem_type)?;
380 {
381 let child = self.parse_in_element()?;
382 children.push(child);
383 }
384 }
385 self.expect_token(TokenType::RPAREN)?;
386 }
387 else {
388 break;
389 }
390 }
391
392 let end_token = self.current_token_id.unwrap_or(begin_token);
393 let mut node = ComparisonExpressionNode::new(begin_token, end_token);
394 node.children = children.clone();
395 node.operators = operators;
396 let node_id = self.arena.alloc_node(AstNode::ComparisonExpression(node));
397 for child_id in children {
398 self.set_parent(child_id, node_id);
399 }
400 Ok(node_id)
401 }
402
403 fn parse_add_expression(&mut self) -> ParseResult<NodeId> {
405 let begin_token = self.alloc_current_token();
406 let mut children = Vec::new();
407 let mut operators = Vec::new();
408
409 let first = self.parse_mult_expr()?;
410 children.push(first);
411
412 while self.current_token.token_type == TokenType::PLUS
413 || self.current_token.token_type == TokenType::MINUS
414 {
415 let op = match self.current_token.token_type {
416 TokenType::PLUS => AddOp::Plus,
417 TokenType::MINUS => AddOp::Minus,
418 _ => return Err(ParseError::at_position(
419 format!("Expected '+' or '-', found {:?} '{}'",
420 self.current_token.token_type, self.current_token.image),
421 self.current_token.begin_offset,
422 )),
423 };
424 operators.push(op);
425
426 self.consume_token()?;
427 let child = self.parse_mult_expr()?;
428 children.push(child);
429 }
430
431 let end_token = self.current_token_id.unwrap_or(begin_token);
432 let mut node = AddExpressionNode::new(begin_token, end_token);
433 node.children = children.clone();
434 node.operators = operators;
435
436 let node_id = self.arena.alloc_node(AstNode::AddExpression(node));
437 for child in children {
438 self.set_parent(child, node_id);
439 }
440 Ok(node_id)
441 }
442
443 fn parse_mult_expr(&mut self) -> ParseResult<NodeId> {
445 let begin_token = self.alloc_current_token();
446 let mut children = Vec::new();
447 let mut operators = Vec::new();
448
449 let first = self.parse_unary_expr()?;
450 children.push(first);
451
452 while self.current_token.token_type == TokenType::STAR
453 || self.current_token.token_type == TokenType::SLASH
454 || self.current_token.token_type == TokenType::PERCENT
455 {
456 let op = match self.current_token.token_type {
457 TokenType::STAR => MultExprOp::Star,
458 TokenType::SLASH => MultExprOp::Slash,
459 TokenType::PERCENT => MultExprOp::Percent,
460 _ => return Err(ParseError::at_position(
461 format!("Expected '*', '/' or '%', found {:?} '{}'",
462 self.current_token.token_type, self.current_token.image),
463 self.current_token.begin_offset,
464 )),
465 };
466 operators.push(op);
467
468 self.consume_token()?;
469 let child = self.parse_unary_expr()?;
470 children.push(child);
471 }
472
473 let end_token = self.current_token_id.unwrap_or(begin_token);
474 let mut node = MultExprNode::new(begin_token, end_token);
475 node.children = children.clone();
476 node.operators = operators;
477
478 let node_id = self.arena.alloc_node(AstNode::MultExpr(node));
479 for child in children {
480 self.set_parent(child, node_id);
481 }
482 Ok(node_id)
483 }
484
485 fn parse_unary_expr(&mut self) -> ParseResult<NodeId> {
487 let begin_token = self.alloc_current_token();
488 let mut children: Vec<NodeId> = Vec::new();
489 let mut operator: Option<UnaryOp> = None;
490
491 if
492 self.current_token.token_type == TokenType::PLUS
493 {
494 operator = Some(UnaryOp::Plus);
495 self.expect_token(TokenType::PLUS)?;
496 {
497 let child = self.parse_unary_expr()?;
498 children.push(child);
499 }
500 }
501 else if self.current_token.token_type == TokenType::MINUS
502 {
503 operator = Some(UnaryOp::Negate);
504 self.expect_token(TokenType::MINUS)?;
505 {
506 let child = self.parse_unary_expr()?;
507 children.push(child);
508 }
509 }
510 else if self.current_token.token_type == TokenType::NOT
511 {
512 operator = Some(UnaryOp::Not);
513 self.expect_token(TokenType::NOT)?;
514 {
515 let child = self.parse_unary_expr()?;
516 children.push(child);
517 }
518 }
519 else if self.current_token.token_type == TokenType::TRUE
520 || self.current_token.token_type == TokenType::FALSE
521 || self.current_token.token_type == TokenType::NULL
522 || self.current_token.token_type == TokenType::LPAREN
523 || self.current_token.token_type == TokenType::DECIMAL_LITERAL
524 || self.current_token.token_type == TokenType::HEX_LITERAL
525 || self.current_token.token_type == TokenType::OCTAL_LITERAL
526 || self.current_token.token_type == TokenType::FLOATING_POINT_LITERAL
527 || self.current_token.token_type == TokenType::STRING_LITERAL
528 || self.current_token.token_type == TokenType::ID
529 {
530 {
531 let child = self.parse_primary_expr()?;
532 children.push(child);
533 }
534 }
535 else {
536 return Err(ParseError::at_position(
537 format!(
538 "Expected expression, found {:?} '{}'",
539 self.current_token.token_type, self.current_token.image
540 ),
541 self.current_token.begin_offset,
542 ));
543 }
544
545 let end_token = self.current_token_id.unwrap_or(begin_token);
546 let mut node = UnaryExprNode::new(begin_token, end_token);
547 node.children = children.clone();
548 node.operator = operator;
549 let node_id = self.arena.alloc_node(AstNode::UnaryExpr(node));
550 for child_id in children {
551 self.set_parent(child_id, node_id);
552 }
553 Ok(node_id)
554 }
555
556 fn parse_primary_expr(&mut self) -> ParseResult<NodeId> {
558 let begin_token = self.alloc_current_token();
559 let mut children = Vec::new();
560
561 if self.current_token.token_type == TokenType::TRUE
562 || self.current_token.token_type == TokenType::FALSE
563 || self.current_token.token_type == TokenType::NULL
564 || self.current_token.token_type == TokenType::DECIMAL_LITERAL
565 || self.current_token.token_type == TokenType::HEX_LITERAL
566 || self.current_token.token_type == TokenType::OCTAL_LITERAL
567 || self.current_token.token_type == TokenType::FLOATING_POINT_LITERAL
568 || self.current_token.token_type == TokenType::STRING_LITERAL
569 {
570 let inner = self.parse_literal()?;
571 children.push(inner);
572 }
573 else if self.current_token.token_type == TokenType::ID
574 {
575 let inner = self.parse_variable()?;
576 children.push(inner);
577 }
578 else if self.current_token.token_type == TokenType::LPAREN {
579 self.consume_token()?;
580 let inner = self.parse_or_expression()?;
581 children.push(inner);
582 self.expect_token(TokenType::RPAREN)?;
583 }
584 else {
585 return Err(ParseError::at_position(
586 format!(
587 "Expected expression, found {:?} '{}'",
588 self.current_token.token_type, self.current_token.image
589 ),
590 self.current_token.begin_offset,
591 ));
592 }
593
594 let end_token = self.current_token_id.unwrap_or(begin_token);
595 let mut node = PrimaryExprNode::new(begin_token, end_token);
596 node.children = children.clone();
597
598 let node_id = self.arena.alloc_node(AstNode::PrimaryExpr(node));
599 for child in children {
600 self.set_parent(child, node_id);
601 }
602 Ok(node_id)
603 }
604
605 fn parse_literal(&mut self) -> ParseResult<NodeId> {
607 let begin_token = self.alloc_current_token();
608 let mut children = Vec::new();
609
610 if self.current_token.token_type == TokenType::STRING_LITERAL
611 {
612 let inner = self.parse_string_literal()?;
613 children.push(inner);
614 }
615 else if self.current_token.token_type == TokenType::DECIMAL_LITERAL
616 || self.current_token.token_type == TokenType::HEX_LITERAL
617 || self.current_token.token_type == TokenType::OCTAL_LITERAL
618 || self.current_token.token_type == TokenType::FLOATING_POINT_LITERAL
619 || self.current_token.token_type == TokenType::TRUE
620 || self.current_token.token_type == TokenType::FALSE
621 || self.current_token.token_type == TokenType::NULL
622 {
623 self.consume_token()?;
624 }
625 else {
626 return Err(ParseError::at_position(
627 format!(
628 "Expected expression, found {:?} '{}'",
629 self.current_token.token_type, self.current_token.image
630 ),
631 self.current_token.begin_offset,
632 ));
633 }
634
635 let end_token = self.current_token_id.unwrap_or(begin_token);
636 let mut node = LiteralNode::new(begin_token, end_token);
637 node.children = children.clone();
638
639 let node_id = self.arena.alloc_node(AstNode::Literal(node));
640 for child in children {
641 self.set_parent(child, node_id);
642 }
643 Ok(node_id)
644 }
645
646 fn parse_string_literal(&mut self) -> ParseResult<NodeId> {
648 let begin_token = self.alloc_current_token();
649 let children: Vec<NodeId> = Vec::new();
650
651 self.expect_token(TokenType::STRING_LITERAL)?;
652
653 let end_token = self.current_token_id.unwrap_or(begin_token);
654 let mut node = StringLiteralNode::new(begin_token, end_token);
655 node.children = children.clone();
656 let node_id = self.arena.alloc_node(AstNode::StringLiteral(node));
657 for child_id in children {
658 self.set_parent(child_id, node_id);
659 }
660 Ok(node_id)
661 }
662
663 fn parse_variable(&mut self) -> ParseResult<NodeId> {
665 let begin_token = self.alloc_current_token();
666 let children: Vec<NodeId> = Vec::new();
667
668 self.expect_token(TokenType::ID)?;
669
670 let end_token = self.current_token_id.unwrap_or(begin_token);
671 let mut node = VariableNode::new(begin_token, end_token);
672 node.children = children.clone();
673 let node_id = self.arena.alloc_node(AstNode::Variable(node));
674 for child_id in children {
675 self.set_parent(child_id, node_id);
676 }
677 Ok(node_id)
678 }
679
680 fn parse_between_bound(&mut self) -> ParseResult<NodeId> {
684 if matches!(self.current_token.token_type, TokenType::MINUS | TokenType::PLUS) {
686 let begin_token = self.alloc_current_token();
687 let operator = if self.current_token.token_type == TokenType::MINUS {
688 UnaryOp::Negate
689 } else {
690 UnaryOp::Plus
691 };
692 self.consume_token()?;
693 match self.current_token.token_type {
695 TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL
696 | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL => {}
697 _ => {
698 return Err(ParseError::at_position(
699 format!(
700 "Expected numeric literal after '{}' in BETWEEN bound, found {:?} '{}'",
701 if operator == UnaryOp::Negate { "-" } else { "+" },
702 self.current_token.token_type, self.current_token.image
703 ),
704 self.current_token.begin_offset,
705 ));
706 }
707 }
708 let child = self.parse_primary_expr()?;
709 let end_token = self.current_token_id.unwrap_or(begin_token);
710 let mut node = UnaryExprNode::new(begin_token, end_token);
711 node.children.push(child);
712 node.operator = Some(operator);
713 let node_id = self.arena.alloc_node(AstNode::UnaryExpr(node));
714 self.set_parent(child, node_id);
715 return Ok(node_id);
716 }
717 match self.current_token.token_type {
718 TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL
719 | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL
720 | TokenType::STRING_LITERAL => {
721 self.parse_primary_expr()
722 }
723 TokenType::TRUE | TokenType::FALSE => {
724 Err(ParseError::at_position(
725 "BETWEEN bounds cannot be boolean values".to_string(),
726 self.current_token.begin_offset,
727 ))
728 }
729 TokenType::NULL => {
730 Err(ParseError::at_position(
731 "NULL is not allowed in BETWEEN bounds".to_string(),
732 self.current_token.begin_offset,
733 ))
734 }
735 TokenType::ID => {
736 Err(ParseError::at_position(
737 "BETWEEN bounds must be literal values, not variables".to_string(),
738 self.current_token.begin_offset,
739 ))
740 }
741 _ => {
742 Err(ParseError::at_position(
743 format!(
744 "BETWEEN bounds must be literal values (numeric or string), found {:?} '{}'",
745 self.current_token.token_type, self.current_token.image
746 ),
747 self.current_token.begin_offset,
748 ))
749 }
750 }
751 }
752
753 fn get_literal_image(&self, node_id: NodeId) -> String {
756 match self.arena.get_node(node_id) {
757 AstNode::UnaryExpr(n) => {
758 if !n.children.is_empty() {
759 let inner = self.get_literal_image(n.children[0]);
760 match n.operator {
761 Some(UnaryOp::Negate) => format!("-{}", inner),
762 Some(UnaryOp::Plus) => inner,
763 _ => inner,
764 }
765 } else {
766 String::new()
767 }
768 }
769 AstNode::PrimaryExpr(n) => {
770 if n.children.is_empty() {
771 self.arena.get_token(n.begin_token).image.clone()
772 } else {
773 self.get_literal_image(n.children[0])
774 }
775 }
776 AstNode::Literal(n) => {
777 if n.children.is_empty() {
778 self.arena.get_token(n.begin_token).image.clone()
779 } else {
780 self.get_literal_image(n.children[0])
781 }
782 }
783 AstNode::StringLiteral(n) => {
784 self.arena.get_token(n.begin_token).image.clone()
785 }
786 _ => String::new(),
787 }
788 }
789
790 fn get_literal_token_type(&self, node_id: NodeId) -> TokenType {
792 match self.arena.get_node(node_id) {
793 AstNode::UnaryExpr(n) => {
794 if !n.children.is_empty() {
795 self.get_literal_token_type(n.children[0])
796 } else {
797 TokenType::INVALID
798 }
799 }
800 AstNode::PrimaryExpr(n) => {
801 if n.children.is_empty() {
802 self.arena.get_token(n.begin_token).token_type
803 } else {
804 self.get_literal_token_type(n.children[0])
805 }
806 }
807 AstNode::Literal(n) => {
808 if n.children.is_empty() {
809 self.arena.get_token(n.begin_token).token_type
810 } else {
811 self.get_literal_token_type(n.children[0])
812 }
813 }
814 AstNode::StringLiteral(_) => TokenType::STRING_LITERAL,
815 _ => TokenType::INVALID,
816 }
817 }
818
819 fn validate_between_bounds(&self, low_id: NodeId, high_id: NodeId) -> ParseResult<()> {
821 let low_image = self.get_literal_image(low_id);
822 let high_image = self.get_literal_image(high_id);
823 let low_type = self.get_literal_token_type(low_id);
824 let high_type = self.get_literal_token_type(high_id);
825
826 let low_is_numeric = matches!(low_type, TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL);
827 let high_is_numeric = matches!(high_type, TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL);
828 let low_is_string = low_type == TokenType::STRING_LITERAL;
829 let high_is_string = high_type == TokenType::STRING_LITERAL;
830
831 if low_is_numeric && high_is_string || low_is_string && high_is_numeric {
832 let low_kind = if low_is_string { "string" } else { "integer" };
833 let high_kind = if high_is_string { "string" } else { "integer" };
834 return Err(ParseError::new(format!(
835 "BETWEEN bounds must be the same type (both numeric or both string): found {} ('{}') and {} ('{}')",
836 low_kind, low_image, high_kind, high_image
837 )));
838 }
839
840 if low_is_numeric && high_is_numeric {
841 let low_val = Self::parse_numeric_literal(&low_image).map_err(|_| {
842 ParseError::new(format!("Invalid numeric literal in BETWEEN: '{}'", low_image))
843 })?;
844 let high_val = Self::parse_numeric_literal(&high_image).map_err(|_| {
845 ParseError::new(format!("Invalid numeric literal in BETWEEN: '{}'", high_image))
846 })?;
847 if low_val > high_val {
848 return Err(ParseError::new(format!(
849 "BETWEEN lower bound ({}) must not exceed upper bound ({})",
850 low_image, high_image
851 )));
852 }
853 } else if low_is_string && high_is_string {
854 let low_inner = &low_image[1..low_image.len() - 1];
856 let high_inner = &high_image[1..high_image.len() - 1];
857 if low_inner > high_inner {
858 return Err(ParseError::new(format!(
859 "BETWEEN lower bound ({}) must not exceed upper bound ({})",
860 low_image, high_image
861 )));
862 }
863 }
864
865 Ok(())
866 }
867
868 fn parse_numeric_literal(image: &str) -> Result<f64, String> {
870 let image = image.strip_suffix('L').or_else(|| image.strip_suffix('l')).unwrap_or(image);
871 if let Some(hex) = image.strip_prefix("0x").or_else(|| image.strip_prefix("0X")) {
872 i64::from_str_radix(hex, 16)
873 .map(|i| i as f64)
874 .map_err(|e| e.to_string())
875 } else if image.starts_with('0') && image.len() > 1
876 && image[1..].chars().all(|c| ('0'..='7').contains(&c))
877 {
878 let oct = &image[1..];
879 i64::from_str_radix(oct, 8)
880 .map(|i| i as f64)
881 .map_err(|e| e.to_string())
882 } else {
883 image.parse::<f64>().map_err(|e| e.to_string())
884 }
885 }
886
887 fn classify_current_token_for_in(&mut self) -> ParseResult<InElementType> {
889 if matches!(self.current_token.token_type, TokenType::MINUS | TokenType::PLUS) {
891 let next_type = self.lookahead_type(1);
892 return match next_type {
893 Some(TokenType::FLOATING_POINT_LITERAL) => Ok(InElementType::Float),
894 Some(TokenType::DECIMAL_LITERAL) => {
895 let next_image = self.lookahead(1).map(|t| t.image.clone()).unwrap_or_default();
896 if next_image.contains('.') {
897 Ok(InElementType::Float)
898 } else {
899 Ok(InElementType::Integer)
900 }
901 }
902 Some(TokenType::HEX_LITERAL) | Some(TokenType::OCTAL_LITERAL) => Ok(InElementType::Integer),
903 _ => Err(ParseError::at_position(
904 format!(
905 "Expected numeric literal after '{}', found {:?}",
906 self.current_token.image,
907 next_type
908 ),
909 self.current_token.begin_offset,
910 )),
911 };
912 }
913 match self.current_token.token_type {
914 TokenType::STRING_LITERAL => Ok(InElementType::StringLit),
915 TokenType::FLOATING_POINT_LITERAL => Ok(InElementType::Float),
916 TokenType::HEX_LITERAL | TokenType::OCTAL_LITERAL => Ok(InElementType::Integer),
917 TokenType::DECIMAL_LITERAL => {
918 if self.current_token.image.contains('.') {
919 Ok(InElementType::Float)
920 } else {
921 Ok(InElementType::Integer)
922 }
923 }
924 TokenType::TRUE | TokenType::FALSE => {
925 Err(ParseError::at_position(
926 "Boolean is not allowed in IN list elements".to_string(),
927 self.current_token.begin_offset,
928 ))
929 }
930 TokenType::NULL => {
931 Err(ParseError::at_position(
932 "NULL is not allowed in IN list elements".to_string(),
933 self.current_token.begin_offset,
934 ))
935 }
936 _ => {
937 Err(ParseError::at_position(
938 format!(
939 "IN list elements must be literal values (string, integer, or float), found {:?} '{}'",
940 self.current_token.token_type, self.current_token.image
941 ),
942 self.current_token.begin_offset,
943 ))
944 }
945 }
946 }
947
948 fn check_in_type_consistency(&self, first: &InElementType, current: &InElementType) -> ParseResult<()> {
950 let compatible = match (first, current) {
951 (InElementType::StringLit, InElementType::StringLit) => true,
952 (InElementType::Integer, InElementType::Integer) => true,
953 (InElementType::Float, InElementType::Float) => true,
954 (InElementType::Integer, InElementType::Float)
955 | (InElementType::Float, InElementType::Integer) => false,
956 _ => false,
957 };
958 if !compatible {
959 let type_name = |t: &InElementType| match t {
960 InElementType::Integer => "integer",
961 InElementType::Float => "float",
962 InElementType::StringLit => "string",
963 };
964 Err(ParseError::at_position(
965 format!(
966 "IN list elements must all be the same type: first element is {}, but found {} '{}'",
967 type_name(first), type_name(current), self.current_token.image
968 ),
969 self.current_token.begin_offset,
970 ))
971 } else {
972 Ok(())
973 }
974 }
975
976 fn parse_in_element(&mut self) -> ParseResult<NodeId> {
978 if matches!(self.current_token.token_type, TokenType::MINUS | TokenType::PLUS) {
980 let begin_token = self.alloc_current_token();
981 let operator = if self.current_token.token_type == TokenType::MINUS {
982 UnaryOp::Negate
983 } else {
984 UnaryOp::Plus
985 };
986 self.consume_token()?;
987 let child = self.parse_primary_expr()?;
988 let end_token = self.current_token_id.unwrap_or(begin_token);
989 let mut node = UnaryExprNode::new(begin_token, end_token);
990 node.children.push(child);
991 node.operator = Some(operator);
992 let node_id = self.arena.alloc_node(AstNode::UnaryExpr(node));
993 self.set_parent(child, node_id);
994 return Ok(node_id);
995 }
996 match self.current_token.token_type {
997 TokenType::STRING_LITERAL => self.parse_string_literal(),
998 TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL
999 | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL => {
1000 self.parse_primary_expr()
1001 }
1002 _ => {
1003 Err(ParseError::at_position(
1004 format!(
1005 "Expected literal value in IN list, found {:?} '{}'",
1006 self.current_token.token_type, self.current_token.image
1007 ),
1008 self.current_token.begin_offset,
1009 ))
1010 }
1011 }
1012 }
1013
1014 fn validate_boolean_root(&self, node_id: NodeId) -> ParseResult<()> {
1018 if self.is_boolean_expression(node_id) {
1019 Ok(())
1020 } else {
1021 Err(ParseError::new(
1022 "Expression must be boolean (comparison, logical, or boolean literal)".to_string(),
1023 ))
1024 }
1025 }
1026
1027 fn is_boolean_expression(&self, node_id: NodeId) -> bool {
1030 match self.arena.get_node(node_id) {
1031 AstNode::OrExpression(n) => {
1032 if n.children.len() > 1 {
1033 return true; }
1035 if n.children.len() == 1 {
1036 return self.is_boolean_expression(n.children[0]);
1037 }
1038 false
1039 }
1040 AstNode::AndExpression(n) => {
1041 if n.children.len() > 1 {
1042 return true; }
1044 if n.children.len() == 1 {
1045 return self.is_boolean_expression(n.children[0]);
1046 }
1047 false
1048 }
1049 AstNode::EqualityExpression(n) => {
1050 if !n.operators.is_empty() {
1051 return true; }
1053 if n.children.len() == 1 {
1054 return self.is_boolean_expression(n.children[0]);
1055 }
1056 false
1057 }
1058 AstNode::ComparisonExpression(n) => {
1059 if !n.operators.is_empty() {
1060 return true; }
1062 if n.children.len() == 1 {
1063 return self.is_boolean_expression(n.children[0]);
1064 }
1065 false
1066 }
1067 AstNode::AddExpression(n) => {
1068 if n.children.len() == 1 && n.operators.is_empty() {
1070 return self.is_boolean_expression(n.children[0]);
1071 }
1072 false
1073 }
1074 AstNode::MultExpr(n) => {
1075 if n.children.len() == 1 && n.operators.is_empty() {
1076 return self.is_boolean_expression(n.children[0]);
1077 }
1078 false
1079 }
1080 AstNode::UnaryExpr(n) => {
1081 if n.operator == Some(UnaryOp::Not) {
1082 return true; }
1084 if n.children.len() == 1 && n.operator.is_none() {
1085 return self.is_boolean_expression(n.children[0]);
1086 }
1087 false }
1089 AstNode::PrimaryExpr(n) => {
1090 if n.children.len() == 1 {
1091 return self.is_boolean_expression(n.children[0]);
1092 }
1093 false
1094 }
1095 AstNode::Variable(_) => true, AstNode::Literal(n) => {
1097 if n.children.is_empty() {
1098 let token = self.arena.get_token(n.begin_token);
1099 matches!(token.token_type, TokenType::TRUE | TokenType::FALSE)
1100 } else {
1101 false }
1103 }
1104 _ => false,
1105 }
1106 }
1107
1108 fn set_parent(&mut self, child_id: NodeId, parent_id: NodeId) {
1112 match self.arena.get_node_mut(child_id) {
1113 AstNode::JmsSelector(node) => node.parent = Some(parent_id),
1114 AstNode::OrExpression(node) => node.parent = Some(parent_id),
1115 AstNode::AndExpression(node) => node.parent = Some(parent_id),
1116 AstNode::EqualityExpression(node) => node.parent = Some(parent_id),
1117 AstNode::ComparisonExpression(node) => node.parent = Some(parent_id),
1118 AstNode::AddExpression(node) => node.parent = Some(parent_id),
1119 AstNode::MultExpr(node) => node.parent = Some(parent_id),
1120 AstNode::UnaryExpr(node) => node.parent = Some(parent_id),
1121 AstNode::PrimaryExpr(node) => node.parent = Some(parent_id),
1122 AstNode::Literal(node) => node.parent = Some(parent_id),
1123 AstNode::StringLiteral(node) => node.parent = Some(parent_id),
1124 AstNode::Variable(node) => node.parent = Some(parent_id),
1125 }
1126 }
1127
1128 #[allow(dead_code)]
1130 fn current_token_matches(&self, types: &[TokenType]) -> bool {
1131 types.contains(&self.current_token.token_type)
1132 }
1133
1134 fn consume_token(&mut self) -> ParseResult<Token> {
1136 let old_token = self.current_token.clone();
1137 self.current_token = if !self.lookahead.is_empty() {
1138 self.lookahead.remove(0)
1139 } else {
1140 self.lexer.next_token()?
1141 };
1142 self.current_token_id = Some(self.arena.alloc_token(self.current_token.clone()));
1143 Ok(old_token)
1144 }
1145
1146 fn expect_token(&mut self, expected: TokenType) -> ParseResult<Token> {
1148 if self.current_token.token_type == expected {
1149 self.consume_token()
1150 } else {
1151 Err(ParseError::at_position(
1152 format!(
1153 "Expected {:?}, found {:?} '{}'",
1154 expected, self.current_token.token_type, self.current_token.image
1155 ),
1156 self.current_token.begin_offset
1157 ))
1158 }
1159 }
1160
1161 fn alloc_current_token(&mut self) -> TokenId {
1163 let token_id = self.arena.alloc_token(self.current_token.clone());
1164 self.current_token_id = Some(token_id);
1165 token_id
1166 }
1167
1168 #[allow(dead_code)]
1170 fn lookahead(&mut self, n: usize) -> ParseResult<&Token> {
1171 if n == 0 {
1172 return Ok(&self.current_token);
1173 }
1174
1175 while self.lookahead.len() < n {
1177 let token = self.lexer.next_token()?;
1178 self.lookahead.push(token);
1179 }
1180
1181 Ok(&self.lookahead[n - 1])
1182 }
1183
1184 #[allow(dead_code)]
1187 fn lookahead_type(&mut self, n: usize) -> Option<TokenType> {
1188 if n == 0 {
1189 return Some(self.current_token.token_type);
1190 }
1191 self.lookahead(n).ok().map(|t| t.token_type)
1192 }
1193}