use crate::error::{ParseError, ParseResult};
use crate::tokens::{Token, TokenType};
use crate::lexer::Lexer;
use crate::arena::{Arena, AstNode, NodeId, TokenId};
use crate::arena::AddOp;
use crate::arena::MultExprOp;
use crate::arena::{EqualityOp, ComparisonOp, UnaryOp};
use crate::arena::{
JmsSelectorNode,
OrExpressionNode,
AndExpressionNode,
EqualityExpressionNode,
ComparisonExpressionNode,
AddExpressionNode,
MultExprNode,
UnaryExprNode,
PrimaryExprNode,
LiteralNode,
StringLiteralNode,
VariableNode
};
pub struct Parser {
lexer: Lexer,
current_token: Token,
lookahead: Vec<Token>,
arena: Arena,
current_token_id: Option<TokenId>,
input: String,
}
#[derive(Debug, Clone, PartialEq)]
enum InElementType {
Integer,
Float,
StringLit,
}
impl Parser {
pub fn new(input: String) -> ParseResult<Self> {
let mut lexer = Lexer::new(input.clone());
let current_token = lexer.next_token()?;
Ok(Parser {
lexer,
current_token,
lookahead: Vec::new(),
arena: Arena::new(),
current_token_id: None,
input,
})
}
pub fn arena(&self) -> &Arena {
&self.arena
}
pub fn arena_mut(&mut self) -> &mut Arena {
&mut self.arena
}
pub fn input(&self) -> &str {
&self.input
}
pub fn parse(&mut self) -> ParseResult<NodeId> {
self.parse_jms_selector()
}
fn parse_jms_selector(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let child = self.parse_or_expression()?;
self.validate_boolean_root(child)?;
self.expect_token(TokenType::EOF)?;
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = JmsSelectorNode::new(begin_token, end_token);
node.children.push(child);
let node_id = self.arena.alloc_node(AstNode::JmsSelector(node));
self.set_parent(child, node_id);
Ok(node_id)
}
fn parse_or_expression(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let mut children = Vec::new();
let first = self.parse_and_expression()?;
children.push(first);
while self.current_token.token_type == TokenType::OR {
self.consume_token()?;
let child = self.parse_and_expression()?;
children.push(child);
}
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = OrExpressionNode::new(begin_token, end_token);
node.children = children.clone();
let node_id = self.arena.alloc_node(AstNode::OrExpression(node));
for child in children {
self.set_parent(child, node_id);
}
Ok(node_id)
}
fn parse_and_expression(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let mut children = Vec::new();
let first = self.parse_equality_expression()?;
children.push(first);
while self.current_token.token_type == TokenType::AND {
self.consume_token()?;
let child = self.parse_equality_expression()?;
children.push(child);
}
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = AndExpressionNode::new(begin_token, end_token);
node.children = children.clone();
let node_id = self.arena.alloc_node(AstNode::AndExpression(node));
for child in children {
self.set_parent(child, node_id);
}
Ok(node_id)
}
fn parse_equality_expression(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let mut children: Vec<NodeId> = Vec::new();
let mut operators: Vec<EqualityOp> = Vec::new();
{
let child = self.parse_comparison_expression()?;
children.push(child);
}
loop {
if self.current_token.token_type == TokenType::EQ
{
operators.push(EqualityOp::Equal);
self.expect_token(TokenType::EQ)?;
{
let child = self.parse_comparison_expression()?;
children.push(child);
}
}
else if self.current_token.token_type == TokenType::NE
{
operators.push(EqualityOp::NotEqual);
self.expect_token(TokenType::NE)?;
{
let child = self.parse_comparison_expression()?;
children.push(child);
}
}
else if
self.current_token.token_type == TokenType::IS
&& self.lookahead_type(1) == Some(TokenType::NULL)
{
operators.push(EqualityOp::IsNull);
self.expect_token(TokenType::IS)?;
self.expect_token(TokenType::NULL)?;
}
else if self.current_token.token_type == TokenType::IS
{
operators.push(EqualityOp::IsNotNull);
self.expect_token(TokenType::IS)?;
self.expect_token(TokenType::NOT)?;
self.expect_token(TokenType::NULL)?;
}
else {
break;
}
}
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = EqualityExpressionNode::new(begin_token, end_token);
node.children = children.clone();
node.operators = operators;
let node_id = self.arena.alloc_node(AstNode::EqualityExpression(node));
for child_id in children {
self.set_parent(child_id, node_id);
}
Ok(node_id)
}
fn parse_comparison_expression(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let mut children: Vec<NodeId> = Vec::new();
let mut operators: Vec<ComparisonOp> = Vec::new();
{
let child = self.parse_add_expression()?;
children.push(child);
}
loop {
if self.current_token.token_type == TokenType::GT
{
operators.push(ComparisonOp::GreaterThan);
self.expect_token(TokenType::GT)?;
{
let child = self.parse_add_expression()?;
children.push(child);
}
}
else if self.current_token.token_type == TokenType::GE
{
operators.push(ComparisonOp::GreaterThanEqual);
self.expect_token(TokenType::GE)?;
{
let child = self.parse_add_expression()?;
children.push(child);
}
}
else if self.current_token.token_type == TokenType::LT
{
operators.push(ComparisonOp::LessThan);
self.expect_token(TokenType::LT)?;
{
let child = self.parse_add_expression()?;
children.push(child);
}
}
else if self.current_token.token_type == TokenType::LE
{
operators.push(ComparisonOp::LessThanEqual);
self.expect_token(TokenType::LE)?;
{
let child = self.parse_add_expression()?;
children.push(child);
}
}
else if self.current_token.token_type == TokenType::LIKE
{
self.expect_token(TokenType::LIKE)?;
{
let child = self.parse_string_literal()?;
children.push(child);
}
if self.current_token.token_type == TokenType::ESCAPE
{
operators.push(ComparisonOp::LikeEscape);
self.expect_token(TokenType::ESCAPE)?;
{
let child = self.parse_string_literal()?;
children.push(child);
}
} else {
operators.push(ComparisonOp::Like);
}
}
else if
self.current_token.token_type == TokenType::NOT
&& self.lookahead_type(1) == Some(TokenType::LIKE)
{
self.expect_token(TokenType::NOT)?;
self.expect_token(TokenType::LIKE)?;
{
let child = self.parse_string_literal()?;
children.push(child);
}
if self.current_token.token_type == TokenType::ESCAPE
{
operators.push(ComparisonOp::NotLikeEscape);
self.expect_token(TokenType::ESCAPE)?;
{
let child = self.parse_string_literal()?;
children.push(child);
}
} else {
operators.push(ComparisonOp::NotLike);
}
}
else if self.current_token.token_type == TokenType::BETWEEN
{
operators.push(ComparisonOp::Between);
self.expect_token(TokenType::BETWEEN)?;
{
let child = self.parse_between_bound()?;
children.push(child);
}
self.expect_token(TokenType::AND)?;
let low_id = *children.last().unwrap();
{
let child = self.parse_between_bound()?;
children.push(child);
}
let high_id = *children.last().unwrap();
self.validate_between_bounds(low_id, high_id)?;
}
else if
self.current_token.token_type == TokenType::NOT
&& self.lookahead_type(1) == Some(TokenType::BETWEEN)
{
operators.push(ComparisonOp::NotBetween);
self.expect_token(TokenType::NOT)?;
self.expect_token(TokenType::BETWEEN)?;
{
let child = self.parse_between_bound()?;
children.push(child);
}
self.expect_token(TokenType::AND)?;
let low_id = *children.last().unwrap();
{
let child = self.parse_between_bound()?;
children.push(child);
}
let high_id = *children.last().unwrap();
self.validate_between_bounds(low_id, high_id)?;
}
else if self.current_token.token_type == TokenType::IN
{
operators.push(ComparisonOp::In);
self.expect_token(TokenType::IN)?;
self.expect_token(TokenType::LPAREN)?;
let first_type = self.classify_current_token_for_in()?;
{
let child = self.parse_in_element()?;
children.push(child);
}
while self.current_token.token_type == TokenType::COMMA
{
self.expect_token(TokenType::COMMA)?;
let elem_type = self.classify_current_token_for_in()?;
self.check_in_type_consistency(&first_type, &elem_type)?;
{
let child = self.parse_in_element()?;
children.push(child);
}
}
self.expect_token(TokenType::RPAREN)?;
}
else if
self.current_token.token_type == TokenType::NOT
&& self.lookahead_type(1) == Some(TokenType::IN)
&& self.lookahead_type(2) == Some(TokenType::LPAREN)
{
operators.push(ComparisonOp::NotIn);
self.expect_token(TokenType::NOT)?;
self.expect_token(TokenType::IN)?;
self.expect_token(TokenType::LPAREN)?;
let first_type = self.classify_current_token_for_in()?;
{
let child = self.parse_in_element()?;
children.push(child);
}
while self.current_token.token_type == TokenType::COMMA
{
self.expect_token(TokenType::COMMA)?;
let elem_type = self.classify_current_token_for_in()?;
self.check_in_type_consistency(&first_type, &elem_type)?;
{
let child = self.parse_in_element()?;
children.push(child);
}
}
self.expect_token(TokenType::RPAREN)?;
}
else {
break;
}
}
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = ComparisonExpressionNode::new(begin_token, end_token);
node.children = children.clone();
node.operators = operators;
let node_id = self.arena.alloc_node(AstNode::ComparisonExpression(node));
for child_id in children {
self.set_parent(child_id, node_id);
}
Ok(node_id)
}
fn parse_add_expression(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let mut children = Vec::new();
let mut operators = Vec::new();
let first = self.parse_mult_expr()?;
children.push(first);
while self.current_token.token_type == TokenType::PLUS
|| self.current_token.token_type == TokenType::MINUS
{
let op = match self.current_token.token_type {
TokenType::PLUS => AddOp::Plus,
TokenType::MINUS => AddOp::Minus,
_ => return Err(ParseError::at_position(
format!("Expected '+' or '-', found {:?} '{}'",
self.current_token.token_type, self.current_token.image),
self.current_token.begin_offset,
)),
};
operators.push(op);
self.consume_token()?;
let child = self.parse_mult_expr()?;
children.push(child);
}
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = AddExpressionNode::new(begin_token, end_token);
node.children = children.clone();
node.operators = operators;
let node_id = self.arena.alloc_node(AstNode::AddExpression(node));
for child in children {
self.set_parent(child, node_id);
}
Ok(node_id)
}
fn parse_mult_expr(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let mut children = Vec::new();
let mut operators = Vec::new();
let first = self.parse_unary_expr()?;
children.push(first);
while self.current_token.token_type == TokenType::STAR
|| self.current_token.token_type == TokenType::SLASH
|| self.current_token.token_type == TokenType::PERCENT
{
let op = match self.current_token.token_type {
TokenType::STAR => MultExprOp::Star,
TokenType::SLASH => MultExprOp::Slash,
TokenType::PERCENT => MultExprOp::Percent,
_ => return Err(ParseError::at_position(
format!("Expected '*', '/' or '%', found {:?} '{}'",
self.current_token.token_type, self.current_token.image),
self.current_token.begin_offset,
)),
};
operators.push(op);
self.consume_token()?;
let child = self.parse_unary_expr()?;
children.push(child);
}
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = MultExprNode::new(begin_token, end_token);
node.children = children.clone();
node.operators = operators;
let node_id = self.arena.alloc_node(AstNode::MultExpr(node));
for child in children {
self.set_parent(child, node_id);
}
Ok(node_id)
}
fn parse_unary_expr(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let mut children: Vec<NodeId> = Vec::new();
let mut operator: Option<UnaryOp> = None;
if
self.current_token.token_type == TokenType::PLUS
{
operator = Some(UnaryOp::Plus);
self.expect_token(TokenType::PLUS)?;
{
let child = self.parse_unary_expr()?;
children.push(child);
}
}
else if self.current_token.token_type == TokenType::MINUS
{
operator = Some(UnaryOp::Negate);
self.expect_token(TokenType::MINUS)?;
{
let child = self.parse_unary_expr()?;
children.push(child);
}
}
else if self.current_token.token_type == TokenType::NOT
{
operator = Some(UnaryOp::Not);
self.expect_token(TokenType::NOT)?;
{
let child = self.parse_unary_expr()?;
children.push(child);
}
}
else if self.current_token.token_type == TokenType::TRUE
|| self.current_token.token_type == TokenType::FALSE
|| self.current_token.token_type == TokenType::NULL
|| self.current_token.token_type == TokenType::LPAREN
|| self.current_token.token_type == TokenType::DECIMAL_LITERAL
|| self.current_token.token_type == TokenType::HEX_LITERAL
|| self.current_token.token_type == TokenType::OCTAL_LITERAL
|| self.current_token.token_type == TokenType::FLOATING_POINT_LITERAL
|| self.current_token.token_type == TokenType::STRING_LITERAL
|| self.current_token.token_type == TokenType::ID
{
{
let child = self.parse_primary_expr()?;
children.push(child);
}
}
else {
return Err(ParseError::at_position(
format!(
"Expected expression, found {:?} '{}'",
self.current_token.token_type, self.current_token.image
),
self.current_token.begin_offset,
));
}
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = UnaryExprNode::new(begin_token, end_token);
node.children = children.clone();
node.operator = operator;
let node_id = self.arena.alloc_node(AstNode::UnaryExpr(node));
for child_id in children {
self.set_parent(child_id, node_id);
}
Ok(node_id)
}
fn parse_primary_expr(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let mut children = Vec::new();
if self.current_token.token_type == TokenType::TRUE
|| self.current_token.token_type == TokenType::FALSE
|| self.current_token.token_type == TokenType::NULL
|| self.current_token.token_type == TokenType::DECIMAL_LITERAL
|| self.current_token.token_type == TokenType::HEX_LITERAL
|| self.current_token.token_type == TokenType::OCTAL_LITERAL
|| self.current_token.token_type == TokenType::FLOATING_POINT_LITERAL
|| self.current_token.token_type == TokenType::STRING_LITERAL
{
let inner = self.parse_literal()?;
children.push(inner);
}
else if self.current_token.token_type == TokenType::ID
{
let inner = self.parse_variable()?;
children.push(inner);
}
else if self.current_token.token_type == TokenType::LPAREN {
self.consume_token()?;
let inner = self.parse_or_expression()?;
children.push(inner);
self.expect_token(TokenType::RPAREN)?;
}
else {
return Err(ParseError::at_position(
format!(
"Expected expression, found {:?} '{}'",
self.current_token.token_type, self.current_token.image
),
self.current_token.begin_offset,
));
}
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = PrimaryExprNode::new(begin_token, end_token);
node.children = children.clone();
let node_id = self.arena.alloc_node(AstNode::PrimaryExpr(node));
for child in children {
self.set_parent(child, node_id);
}
Ok(node_id)
}
fn parse_literal(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let mut children = Vec::new();
if self.current_token.token_type == TokenType::STRING_LITERAL
{
let inner = self.parse_string_literal()?;
children.push(inner);
}
else if self.current_token.token_type == TokenType::DECIMAL_LITERAL
|| self.current_token.token_type == TokenType::HEX_LITERAL
|| self.current_token.token_type == TokenType::OCTAL_LITERAL
|| self.current_token.token_type == TokenType::FLOATING_POINT_LITERAL
|| self.current_token.token_type == TokenType::TRUE
|| self.current_token.token_type == TokenType::FALSE
|| self.current_token.token_type == TokenType::NULL
{
self.consume_token()?;
}
else {
return Err(ParseError::at_position(
format!(
"Expected expression, found {:?} '{}'",
self.current_token.token_type, self.current_token.image
),
self.current_token.begin_offset,
));
}
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = LiteralNode::new(begin_token, end_token);
node.children = children.clone();
let node_id = self.arena.alloc_node(AstNode::Literal(node));
for child in children {
self.set_parent(child, node_id);
}
Ok(node_id)
}
fn parse_string_literal(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let children: Vec<NodeId> = Vec::new();
self.expect_token(TokenType::STRING_LITERAL)?;
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = StringLiteralNode::new(begin_token, end_token);
node.children = children.clone();
let node_id = self.arena.alloc_node(AstNode::StringLiteral(node));
for child_id in children {
self.set_parent(child_id, node_id);
}
Ok(node_id)
}
fn parse_variable(&mut self) -> ParseResult<NodeId> {
let begin_token = self.alloc_current_token();
let children: Vec<NodeId> = Vec::new();
self.expect_token(TokenType::ID)?;
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = VariableNode::new(begin_token, end_token);
node.children = children.clone();
let node_id = self.arena.alloc_node(AstNode::Variable(node));
for child_id in children {
self.set_parent(child_id, node_id);
}
Ok(node_id)
}
fn parse_between_bound(&mut self) -> ParseResult<NodeId> {
if matches!(self.current_token.token_type, TokenType::MINUS | TokenType::PLUS) {
let begin_token = self.alloc_current_token();
let operator = if self.current_token.token_type == TokenType::MINUS {
UnaryOp::Negate
} else {
UnaryOp::Plus
};
self.consume_token()?;
match self.current_token.token_type {
TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL
| TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL => {}
_ => {
return Err(ParseError::at_position(
format!(
"Expected numeric literal after '{}' in BETWEEN bound, found {:?} '{}'",
if operator == UnaryOp::Negate { "-" } else { "+" },
self.current_token.token_type, self.current_token.image
),
self.current_token.begin_offset,
));
}
}
let child = self.parse_primary_expr()?;
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = UnaryExprNode::new(begin_token, end_token);
node.children.push(child);
node.operator = Some(operator);
let node_id = self.arena.alloc_node(AstNode::UnaryExpr(node));
self.set_parent(child, node_id);
return Ok(node_id);
}
match self.current_token.token_type {
TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL
| TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL
| TokenType::STRING_LITERAL => {
self.parse_primary_expr()
}
TokenType::TRUE | TokenType::FALSE => {
Err(ParseError::at_position(
"BETWEEN bounds cannot be boolean values".to_string(),
self.current_token.begin_offset,
))
}
TokenType::NULL => {
Err(ParseError::at_position(
"NULL is not allowed in BETWEEN bounds".to_string(),
self.current_token.begin_offset,
))
}
TokenType::ID => {
Err(ParseError::at_position(
"BETWEEN bounds must be literal values, not variables".to_string(),
self.current_token.begin_offset,
))
}
_ => {
Err(ParseError::at_position(
format!(
"BETWEEN bounds must be literal values (numeric or string), found {:?} '{}'",
self.current_token.token_type, self.current_token.image
),
self.current_token.begin_offset,
))
}
}
}
fn get_literal_image(&self, node_id: NodeId) -> String {
match self.arena.get_node(node_id) {
AstNode::UnaryExpr(n) => {
if !n.children.is_empty() {
let inner = self.get_literal_image(n.children[0]);
match n.operator {
Some(UnaryOp::Negate) => format!("-{}", inner),
Some(UnaryOp::Plus) => inner,
_ => inner,
}
} else {
String::new()
}
}
AstNode::PrimaryExpr(n) => {
if n.children.is_empty() {
self.arena.get_token(n.begin_token).image.clone()
} else {
self.get_literal_image(n.children[0])
}
}
AstNode::Literal(n) => {
if n.children.is_empty() {
self.arena.get_token(n.begin_token).image.clone()
} else {
self.get_literal_image(n.children[0])
}
}
AstNode::StringLiteral(n) => {
self.arena.get_token(n.begin_token).image.clone()
}
_ => String::new(),
}
}
fn get_literal_token_type(&self, node_id: NodeId) -> TokenType {
match self.arena.get_node(node_id) {
AstNode::UnaryExpr(n) => {
if !n.children.is_empty() {
self.get_literal_token_type(n.children[0])
} else {
TokenType::INVALID
}
}
AstNode::PrimaryExpr(n) => {
if n.children.is_empty() {
self.arena.get_token(n.begin_token).token_type
} else {
self.get_literal_token_type(n.children[0])
}
}
AstNode::Literal(n) => {
if n.children.is_empty() {
self.arena.get_token(n.begin_token).token_type
} else {
self.get_literal_token_type(n.children[0])
}
}
AstNode::StringLiteral(_) => TokenType::STRING_LITERAL,
_ => TokenType::INVALID,
}
}
fn validate_between_bounds(&self, low_id: NodeId, high_id: NodeId) -> ParseResult<()> {
let low_image = self.get_literal_image(low_id);
let high_image = self.get_literal_image(high_id);
let low_type = self.get_literal_token_type(low_id);
let high_type = self.get_literal_token_type(high_id);
let low_is_numeric = matches!(low_type, TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL);
let high_is_numeric = matches!(high_type, TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL | TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL);
let low_is_string = low_type == TokenType::STRING_LITERAL;
let high_is_string = high_type == TokenType::STRING_LITERAL;
if low_is_numeric && high_is_string || low_is_string && high_is_numeric {
let low_kind = if low_is_string { "string" } else { "integer" };
let high_kind = if high_is_string { "string" } else { "integer" };
return Err(ParseError::new(format!(
"BETWEEN bounds must be the same type (both numeric or both string): found {} ('{}') and {} ('{}')",
low_kind, low_image, high_kind, high_image
)));
}
if low_is_numeric && high_is_numeric {
let low_val = Self::parse_numeric_literal(&low_image).map_err(|_| {
ParseError::new(format!("Invalid numeric literal in BETWEEN: '{}'", low_image))
})?;
let high_val = Self::parse_numeric_literal(&high_image).map_err(|_| {
ParseError::new(format!("Invalid numeric literal in BETWEEN: '{}'", high_image))
})?;
if low_val > high_val {
return Err(ParseError::new(format!(
"BETWEEN lower bound ({}) must not exceed upper bound ({})",
low_image, high_image
)));
}
} else if low_is_string && high_is_string {
let low_inner = &low_image[1..low_image.len() - 1];
let high_inner = &high_image[1..high_image.len() - 1];
if low_inner > high_inner {
return Err(ParseError::new(format!(
"BETWEEN lower bound ({}) must not exceed upper bound ({})",
low_image, high_image
)));
}
}
Ok(())
}
fn parse_numeric_literal(image: &str) -> Result<f64, String> {
let image = image.strip_suffix('L').or_else(|| image.strip_suffix('l')).unwrap_or(image);
if let Some(hex) = image.strip_prefix("0x").or_else(|| image.strip_prefix("0X")) {
i64::from_str_radix(hex, 16)
.map(|i| i as f64)
.map_err(|e| e.to_string())
} else if image.starts_with('0') && image.len() > 1
&& image[1..].chars().all(|c| ('0'..='7').contains(&c))
{
let oct = &image[1..];
i64::from_str_radix(oct, 8)
.map(|i| i as f64)
.map_err(|e| e.to_string())
} else {
image.parse::<f64>().map_err(|e| e.to_string())
}
}
fn classify_current_token_for_in(&mut self) -> ParseResult<InElementType> {
if matches!(self.current_token.token_type, TokenType::MINUS | TokenType::PLUS) {
let next_type = self.lookahead_type(1);
return match next_type {
Some(TokenType::FLOATING_POINT_LITERAL) => Ok(InElementType::Float),
Some(TokenType::DECIMAL_LITERAL) => {
let next_image = self.lookahead(1).map(|t| t.image.clone()).unwrap_or_default();
if next_image.contains('.') {
Ok(InElementType::Float)
} else {
Ok(InElementType::Integer)
}
}
Some(TokenType::HEX_LITERAL) | Some(TokenType::OCTAL_LITERAL) => Ok(InElementType::Integer),
_ => Err(ParseError::at_position(
format!(
"Expected numeric literal after '{}', found {:?}",
self.current_token.image,
next_type
),
self.current_token.begin_offset,
)),
};
}
match self.current_token.token_type {
TokenType::STRING_LITERAL => Ok(InElementType::StringLit),
TokenType::FLOATING_POINT_LITERAL => Ok(InElementType::Float),
TokenType::HEX_LITERAL | TokenType::OCTAL_LITERAL => Ok(InElementType::Integer),
TokenType::DECIMAL_LITERAL => {
if self.current_token.image.contains('.') {
Ok(InElementType::Float)
} else {
Ok(InElementType::Integer)
}
}
TokenType::TRUE | TokenType::FALSE => {
Err(ParseError::at_position(
"Boolean is not allowed in IN list elements".to_string(),
self.current_token.begin_offset,
))
}
TokenType::NULL => {
Err(ParseError::at_position(
"NULL is not allowed in IN list elements".to_string(),
self.current_token.begin_offset,
))
}
_ => {
Err(ParseError::at_position(
format!(
"IN list elements must be literal values (string, integer, or float), found {:?} '{}'",
self.current_token.token_type, self.current_token.image
),
self.current_token.begin_offset,
))
}
}
}
fn check_in_type_consistency(&self, first: &InElementType, current: &InElementType) -> ParseResult<()> {
let compatible = match (first, current) {
(InElementType::StringLit, InElementType::StringLit) => true,
(InElementType::Integer, InElementType::Integer) => true,
(InElementType::Float, InElementType::Float) => true,
(InElementType::Integer, InElementType::Float)
| (InElementType::Float, InElementType::Integer) => false,
_ => false,
};
if !compatible {
let type_name = |t: &InElementType| match t {
InElementType::Integer => "integer",
InElementType::Float => "float",
InElementType::StringLit => "string",
};
Err(ParseError::at_position(
format!(
"IN list elements must all be the same type: first element is {}, but found {} '{}'",
type_name(first), type_name(current), self.current_token.image
),
self.current_token.begin_offset,
))
} else {
Ok(())
}
}
fn parse_in_element(&mut self) -> ParseResult<NodeId> {
if matches!(self.current_token.token_type, TokenType::MINUS | TokenType::PLUS) {
let begin_token = self.alloc_current_token();
let operator = if self.current_token.token_type == TokenType::MINUS {
UnaryOp::Negate
} else {
UnaryOp::Plus
};
self.consume_token()?;
let child = self.parse_primary_expr()?;
let end_token = self.current_token_id.unwrap_or(begin_token);
let mut node = UnaryExprNode::new(begin_token, end_token);
node.children.push(child);
node.operator = Some(operator);
let node_id = self.arena.alloc_node(AstNode::UnaryExpr(node));
self.set_parent(child, node_id);
return Ok(node_id);
}
match self.current_token.token_type {
TokenType::STRING_LITERAL => self.parse_string_literal(),
TokenType::DECIMAL_LITERAL | TokenType::HEX_LITERAL
| TokenType::OCTAL_LITERAL | TokenType::FLOATING_POINT_LITERAL => {
self.parse_primary_expr()
}
_ => {
Err(ParseError::at_position(
format!(
"Expected literal value in IN list, found {:?} '{}'",
self.current_token.token_type, self.current_token.image
),
self.current_token.begin_offset,
))
}
}
}
fn validate_boolean_root(&self, node_id: NodeId) -> ParseResult<()> {
if self.is_boolean_expression(node_id) {
Ok(())
} else {
Err(ParseError::new(
"Expression must be boolean (comparison, logical, or boolean literal)".to_string(),
))
}
}
fn is_boolean_expression(&self, node_id: NodeId) -> bool {
match self.arena.get_node(node_id) {
AstNode::OrExpression(n) => {
if n.children.len() > 1 {
return true; }
if n.children.len() == 1 {
return self.is_boolean_expression(n.children[0]);
}
false
}
AstNode::AndExpression(n) => {
if n.children.len() > 1 {
return true; }
if n.children.len() == 1 {
return self.is_boolean_expression(n.children[0]);
}
false
}
AstNode::EqualityExpression(n) => {
if !n.operators.is_empty() {
return true; }
if n.children.len() == 1 {
return self.is_boolean_expression(n.children[0]);
}
false
}
AstNode::ComparisonExpression(n) => {
if !n.operators.is_empty() {
return true; }
if n.children.len() == 1 {
return self.is_boolean_expression(n.children[0]);
}
false
}
AstNode::AddExpression(n) => {
if n.children.len() == 1 && n.operators.is_empty() {
return self.is_boolean_expression(n.children[0]);
}
false
}
AstNode::MultExpr(n) => {
if n.children.len() == 1 && n.operators.is_empty() {
return self.is_boolean_expression(n.children[0]);
}
false
}
AstNode::UnaryExpr(n) => {
if n.operator == Some(UnaryOp::Not) {
return true; }
if n.children.len() == 1 && n.operator.is_none() {
return self.is_boolean_expression(n.children[0]);
}
false }
AstNode::PrimaryExpr(n) => {
if n.children.len() == 1 {
return self.is_boolean_expression(n.children[0]);
}
false
}
AstNode::Variable(_) => true, AstNode::Literal(n) => {
if n.children.is_empty() {
let token = self.arena.get_token(n.begin_token);
matches!(token.token_type, TokenType::TRUE | TokenType::FALSE)
} else {
false }
}
_ => false,
}
}
fn set_parent(&mut self, child_id: NodeId, parent_id: NodeId) {
match self.arena.get_node_mut(child_id) {
AstNode::JmsSelector(node) => node.parent = Some(parent_id),
AstNode::OrExpression(node) => node.parent = Some(parent_id),
AstNode::AndExpression(node) => node.parent = Some(parent_id),
AstNode::EqualityExpression(node) => node.parent = Some(parent_id),
AstNode::ComparisonExpression(node) => node.parent = Some(parent_id),
AstNode::AddExpression(node) => node.parent = Some(parent_id),
AstNode::MultExpr(node) => node.parent = Some(parent_id),
AstNode::UnaryExpr(node) => node.parent = Some(parent_id),
AstNode::PrimaryExpr(node) => node.parent = Some(parent_id),
AstNode::Literal(node) => node.parent = Some(parent_id),
AstNode::StringLiteral(node) => node.parent = Some(parent_id),
AstNode::Variable(node) => node.parent = Some(parent_id),
}
}
#[allow(dead_code)]
fn current_token_matches(&self, types: &[TokenType]) -> bool {
types.contains(&self.current_token.token_type)
}
fn consume_token(&mut self) -> ParseResult<Token> {
let old_token = self.current_token.clone();
self.current_token = if !self.lookahead.is_empty() {
self.lookahead.remove(0)
} else {
self.lexer.next_token()?
};
self.current_token_id = Some(self.arena.alloc_token(self.current_token.clone()));
Ok(old_token)
}
fn expect_token(&mut self, expected: TokenType) -> ParseResult<Token> {
if self.current_token.token_type == expected {
self.consume_token()
} else {
Err(ParseError::at_position(
format!(
"Expected {:?}, found {:?} '{}'",
expected, self.current_token.token_type, self.current_token.image
),
self.current_token.begin_offset
))
}
}
fn alloc_current_token(&mut self) -> TokenId {
let token_id = self.arena.alloc_token(self.current_token.clone());
self.current_token_id = Some(token_id);
token_id
}
#[allow(dead_code)]
fn lookahead(&mut self, n: usize) -> ParseResult<&Token> {
if n == 0 {
return Ok(&self.current_token);
}
while self.lookahead.len() < n {
let token = self.lexer.next_token()?;
self.lookahead.push(token);
}
Ok(&self.lookahead[n - 1])
}
#[allow(dead_code)]
fn lookahead_type(&mut self, n: usize) -> Option<TokenType> {
if n == 0 {
return Some(self.current_token.token_type);
}
self.lookahead(n).ok().map(|t| t.token_type)
}
}