use std::collections::HashMap;
use pest::iterators::{Pair, Pairs};
use uni_common::Value;
use super::{ParseError, Rule};
use crate::ast::*;
fn peek_is(inner: &mut std::iter::Peekable<Pairs<Rule>>, rule: Rule) -> bool {
inner.peek().is_some_and(|p| p.as_rule() == rule)
}
pub(crate) fn normalize_identifier(s: &str) -> String {
s.strip_prefix('`')
.and_then(|s| s.strip_suffix('`'))
.unwrap_or(s)
.to_string()
}
fn extract_where_expr(pair: Pair<Rule>) -> Result<Expr, ParseError> {
build_expression(pair.into_inner().nth(1).unwrap())
}
fn consume_identifier(inner: &mut std::iter::Peekable<Pairs<Rule>>) -> Option<String> {
if peek_is(inner, Rule::identifier) {
Some(normalize_identifier(inner.next().unwrap().as_str()))
} else {
None
}
}
fn match_query(pattern: Pattern, where_clause: Option<Expr>) -> Query {
Query::Single(Statement {
clauses: vec![Clause::Match(MatchClause {
optional: false,
pattern,
where_clause,
})],
})
}
pub fn build_query(pairs: Pairs<Rule>) -> Result<Query, ParseError> {
let pair = pairs.into_iter().next().unwrap();
debug_assert_eq!(pair.as_rule(), Rule::query);
let mut inner = pair.into_inner();
let query = build_union_query(inner.next().unwrap())?;
if let Some(tt_pair) = inner.next()
&& tt_pair.as_rule() == Rule::time_travel_clause
{
let spec = build_time_travel_spec(tt_pair)?;
return Ok(Query::TimeTravel {
query: Box::new(query),
spec,
});
}
Ok(query)
}
fn build_time_travel_spec(pair: Pair<Rule>) -> Result<TimeTravelSpec, ParseError> {
let mut inner = pair.into_inner();
let keyword = inner.next().unwrap();
let string_pair = inner
.find(|p| p.as_rule() == Rule::string)
.ok_or_else(|| ParseError::new("Expected string literal in time_travel_clause".into()))?;
let value = build_string_literal(string_pair)?;
match keyword.as_rule() {
Rule::VERSION => Ok(TimeTravelSpec::Version(value)),
Rule::TIMESTAMP_KW => Ok(TimeTravelSpec::Timestamp(value)),
other => Err(ParseError::new(format!(
"Unexpected keyword in time_travel_clause: {:?}",
other
))),
}
}
fn build_union_query(pair: Pair<Rule>) -> Result<Query, ParseError> {
let mut inner = pair.into_inner();
let first = inner.next().unwrap();
let mut left = build_single_query(first)?;
while let Some(op_pair) = inner.next() {
let all = op_pair.into_inner().count() > 1; let right = build_single_query(inner.next().unwrap())?;
left = Query::Union {
left: Box::new(left),
right: Box::new(right),
all,
};
}
Ok(left)
}
fn build_single_query(pair: Pair<Rule>) -> Result<Query, ParseError> {
let inner = if pair.as_rule() == Rule::single_query {
pair.into_inner().next().unwrap()
} else {
pair
};
match inner.as_rule() {
Rule::explain_query => {
let mut explain_inner = inner.into_inner();
explain_inner.next(); let stmt = explain_inner.next().unwrap();
match stmt.as_rule() {
Rule::statement => Ok(Query::Explain(Box::new(Query::Single(build_statement(
stmt,
)?)))),
Rule::schema_command => Ok(Query::Explain(Box::new(Query::Schema(Box::new(
build_schema_command(stmt)?,
))))),
_ => unreachable!("Unexpected explain inner rule: {:?}", stmt.as_rule()),
}
}
Rule::statement => Ok(Query::Single(build_statement(inner)?)),
Rule::schema_command => Ok(Query::Schema(Box::new(build_schema_command(inner)?))),
_ => unreachable!("Unexpected single_query rule: {:?}", inner.as_rule()),
}
}
pub(crate) fn build_statement(pair: Pair<Rule>) -> Result<Statement, ParseError> {
let clauses = pair
.into_inner()
.map(build_clause)
.collect::<Result<Vec<_>, _>>()?;
Ok(Statement { clauses })
}
pub(crate) fn build_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let inner = pair.into_inner().next().unwrap();
match inner.as_rule() {
Rule::match_clause => build_match_clause(inner),
Rule::create_clause => build_create_clause(inner),
Rule::return_clause => build_return_clause(inner),
Rule::with_recursive_clause => build_with_recursive_clause(inner),
Rule::with_clause => build_with_clause(inner),
Rule::set_clause => build_set_clause(inner),
Rule::delete_clause => build_delete_clause(inner),
Rule::merge_clause => build_merge_clause(inner),
Rule::unwind_clause => build_unwind_clause(inner),
Rule::remove_clause => build_remove_clause(inner),
Rule::call_clause => build_call_clause(inner),
_ => unreachable!("Unexpected clause: {:?}", inner.as_rule()),
}
}
fn build_match_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner().peekable();
let optional = consume_if_present(&mut inner, Rule::OPTIONAL);
inner.next();
let pattern = build_pattern(inner.next().unwrap())?;
let where_clause = inner.next().map(extract_where_expr).transpose()?;
Ok(Clause::Match(MatchClause {
optional,
pattern,
where_clause,
}))
}
fn build_create_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner();
inner.next(); let pattern = build_pattern(inner.next().unwrap())?;
Ok(Clause::Create(CreateClause { pattern }))
}
struct ProjectionModifiers {
order_by: Option<Vec<SortItem>>,
skip: Option<Expr>,
limit: Option<Expr>,
where_clause: Option<Expr>,
}
fn parse_projection_modifiers(pairs: Pairs<Rule>) -> Result<ProjectionModifiers, ParseError> {
let mut order_by = None;
let mut skip = None;
let mut limit = None;
let mut where_clause = None;
for p in pairs {
match p.as_rule() {
Rule::order_clause => {
order_by = Some(build_sort_items(p.into_inner().nth(2).unwrap())?);
}
Rule::skip_clause => {
skip = Some(build_expression(p.into_inner().nth(1).unwrap())?);
}
Rule::limit_clause => {
limit = Some(build_expression(p.into_inner().nth(1).unwrap())?);
}
Rule::where_clause => where_clause = Some(extract_where_expr(p)?),
_ => {}
}
}
Ok(ProjectionModifiers {
order_by,
skip,
limit,
where_clause,
})
}
fn consume_distinct(inner: &mut Pairs<Rule>) -> bool {
if inner.peek().is_some_and(|p| p.as_rule() == Rule::DISTINCT) {
inner.next();
true
} else {
false
}
}
fn build_return_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner();
inner.next();
let distinct = consume_distinct(&mut inner);
let items = build_return_items(inner.next().unwrap())?;
let mods = parse_projection_modifiers(inner)?;
Ok(Clause::Return(ReturnClause {
distinct,
items,
order_by: mods.order_by,
skip: mods.skip,
limit: mods.limit,
}))
}
fn build_with_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner();
inner.next();
let distinct = consume_distinct(&mut inner);
let items = build_return_items(inner.next().unwrap())?;
let mods = parse_projection_modifiers(inner)?;
Ok(Clause::With(WithClause {
distinct,
items,
order_by: mods.order_by,
skip: mods.skip,
limit: mods.limit,
where_clause: mods.where_clause,
}))
}
fn build_with_recursive_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner();
inner.next(); inner.next(); let name = inner.next().unwrap().as_str().to_string();
inner.next(); let union_query = inner.next().unwrap();
let query = Box::new(build_union_query(union_query)?);
let items = vec![];
Ok(Clause::WithRecursive(WithRecursiveClause {
name,
query,
items,
}))
}
fn build_unwind_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner();
inner.next(); let expr = build_expression(inner.next().unwrap())?;
inner.next(); let variable = inner.next().unwrap().as_str().to_string();
Ok(Clause::Unwind(UnwindClause { expr, variable }))
}
fn build_delete_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner().peekable();
let detach = consume_if_present(&mut inner, Rule::DETACH);
inner.next(); let items = build_expression_list(inner.next().unwrap())?;
Ok(Clause::Delete(DeleteClause { detach, items }))
}
fn build_set_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner();
inner.next(); let items = inner.map(build_set_item).collect::<Result<Vec<_>, _>>()?;
Ok(Clause::Set(SetClause { items }))
}
fn build_property_expr_chain(pair: Pair<Rule>) -> Expr {
let mut p_inner = pair.into_inner();
let first_token = p_inner.next().unwrap();
let var = if first_token.as_str() == "(" {
p_inner.next().unwrap().as_str().to_string()
} else {
first_token.as_str().to_string()
};
p_inner
.filter(|p| p.as_str() != ")")
.fold(Expr::Variable(var), |expr, p| {
Expr::Property(Box::new(expr), p.as_str().to_string())
})
}
fn build_set_item(pair: Pair<Rule>) -> Result<SetItem, ParseError> {
let mut inner = pair.into_inner();
let first = inner.next().unwrap();
match first.as_rule() {
Rule::identifier => {
let next = inner.next().unwrap();
match next.as_rule() {
Rule::plus_eq => {
let expr = build_expression(inner.next().unwrap())?;
Ok(SetItem::VariablePlus {
variable: first.as_str().to_string(),
value: expr,
})
}
Rule::node_labels => {
let labels = next
.into_inner()
.map(|l| normalize_identifier(l.as_str()))
.collect();
Ok(SetItem::Labels {
variable: first.as_str().to_string(),
labels,
})
}
Rule::eq => {
let expr = build_expression(inner.next().unwrap())?;
Ok(SetItem::Variable {
variable: first.as_str().to_string(),
value: expr,
})
}
_ => unreachable!(),
}
}
Rule::property_expr => {
let expr = build_property_expr_chain(first);
inner.next(); let val = build_expression(inner.next().unwrap())?;
Ok(SetItem::Property { expr, value: val })
}
_ => unreachable!(),
}
}
fn build_remove_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner();
inner.next(); let items = inner
.map(build_remove_item)
.collect::<Result<Vec<_>, _>>()?;
Ok(Clause::Remove(RemoveClause { items }))
}
fn build_remove_item(pair: Pair<Rule>) -> Result<RemoveItem, ParseError> {
let mut inner = pair.into_inner();
let first = inner.next().unwrap();
match first.as_rule() {
Rule::property_expr => Ok(RemoveItem::Property(build_property_expr_chain(first))),
Rule::identifier => {
let id = first.as_str().to_string();
let labels_pair = inner.next().unwrap();
let labels = labels_pair
.into_inner()
.map(|l| l.as_str().to_string())
.collect();
Ok(RemoveItem::Labels {
variable: id,
labels,
})
}
_ => unreachable!(),
}
}
fn build_merge_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner();
inner.next();
let pattern_path = build_path_pattern(inner.next().unwrap())?;
let pattern = Pattern {
paths: vec![pattern_path],
};
let mut on_match = vec![];
let mut on_create = vec![];
for action in inner {
let mut action_inner = action.into_inner();
action_inner.next(); let kind = action_inner.next().unwrap();
let set_pair = action_inner.next().unwrap();
let items = set_pair
.into_inner()
.skip(1)
.map(build_set_item)
.collect::<Result<Vec<_>, _>>()?;
match kind.as_rule() {
Rule::MATCH => on_match.extend(items),
Rule::CREATE => on_create.extend(items),
_ => unreachable!(),
}
}
Ok(Clause::Merge(MergeClause {
pattern,
on_match,
on_create,
}))
}
fn build_call_clause(pair: Pair<Rule>) -> Result<Clause, ParseError> {
let mut inner = pair.into_inner();
inner.next();
let target = inner.next().unwrap();
match target.as_rule() {
Rule::statement => {
let stmt = build_statement(target)?;
Ok(Clause::Call(CallClause {
kind: CallKind::Subquery(Box::new(Query::Single(stmt))),
yield_items: vec![],
where_clause: None,
}))
}
Rule::qualified_name => {
let procedure = target.as_str().to_string();
let mut args = vec![];
let mut yield_items = vec![];
let mut where_clause = None;
for p in inner {
match p.as_rule() {
Rule::expression_list => args = build_expression_list(p)?,
Rule::yield_clause => {
let mut y_inner = p.into_inner();
y_inner.next(); let yield_items_pair = y_inner.next().unwrap();
let mut yi_inner = yield_items_pair.into_inner();
let first = yi_inner.next().unwrap();
if first.as_rule() == Rule::star {
yield_items = vec![YieldItem {
name: "*".to_string(),
alias: None,
}];
} else {
yield_items = std::iter::once(first)
.chain(yi_inner)
.map(build_yield_item)
.collect::<Result<_, _>>()?;
}
if let Some(where_pair) = y_inner.next() {
where_clause = Some(extract_where_expr(where_pair)?);
}
}
_ => {}
}
}
Ok(Clause::Call(CallClause {
kind: CallKind::Procedure {
procedure,
arguments: args,
},
yield_items,
where_clause,
}))
}
_ => Err(ParseError::new(format!(
"Expected statement or qualified_name in CALL clause, got {:?}",
target.as_rule()
))),
}
}
fn build_yield_item(pair: Pair<Rule>) -> Result<YieldItem, ParseError> {
let mut inner = pair.into_inner();
let name = normalize_identifier(inner.next().unwrap().as_str());
let alias = if inner.next().is_some() {
Some(normalize_identifier(inner.next().unwrap().as_str()))
} else {
None
};
Ok(YieldItem { name, alias })
}
pub fn build_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
match pair.as_rule() {
Rule::expression | Rule::comprehension_source => {
build_expression(pair.into_inner().next().unwrap())
}
Rule::or_expression => build_binary_left_assoc(pair, BinaryOp::Or),
Rule::xor_expression => build_binary_left_assoc(pair, BinaryOp::Xor),
Rule::and_expression => build_binary_left_assoc(pair, BinaryOp::And),
Rule::not_expression => build_not_expression(pair),
Rule::comparison_expression => build_comparison_expression(pair),
Rule::additive_expression => build_additive_expression(pair),
Rule::multiplicative_expression => build_multiplicative_expression(pair),
Rule::power_expression => build_binary_left_assoc(pair, BinaryOp::Pow),
Rule::unary_expression => build_unary_expression(pair),
Rule::postfix_expression => build_postfix_expression(pair),
Rule::primary_expression => build_primary_expression(pair),
Rule::literal => build_literal(pair),
Rule::identifier => Ok(Expr::Variable(pair.as_str().to_string())),
Rule::parameter => Ok(Expr::Parameter(pair.as_str()[1..].to_string())),
_ => unreachable!("Unexpected expression rule: {:?}", pair.as_rule()),
}
}
fn build_binary_left_assoc(pair: Pair<Rule>, op: BinaryOp) -> Result<Expr, ParseError> {
build_binary_left_assoc_dynamic(pair, |_| op)
}
fn build_binary_left_assoc_dynamic(
pair: Pair<Rule>,
map_op: impl Fn(Rule) -> BinaryOp,
) -> Result<Expr, ParseError> {
let mut inner = pair.into_inner();
let mut left = build_expression(inner.next().unwrap())?;
while let Some(op_pair) = inner.next() {
let op = map_op(op_pair.as_rule());
let right = build_expression(inner.next().unwrap())?;
left = Expr::BinaryOp {
left: Box::new(left),
op,
right: Box::new(right),
};
}
Ok(left)
}
fn build_additive_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
build_binary_left_assoc_dynamic(pair, |rule| match rule {
Rule::plus => BinaryOp::Add,
Rule::minus => BinaryOp::Sub,
_ => unreachable!(),
})
}
fn build_multiplicative_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
build_binary_left_assoc_dynamic(pair, |rule| match rule {
Rule::star => BinaryOp::Mul,
Rule::slash => BinaryOp::Div,
Rule::percent => BinaryOp::Mod,
_ => unreachable!(),
})
}
fn build_not_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let mut inner = pair.into_inner().peekable();
let mut not_count = 0;
while peek_is(&mut inner, Rule::NOT) {
inner.next();
not_count += 1;
}
let mut expr = build_expression(inner.next().unwrap())?;
for _ in 0..not_count {
expr = Expr::UnaryOp {
op: UnaryOp::Not,
expr: Box::new(expr),
};
}
Ok(expr)
}
fn build_comparison_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let mut inner = pair.into_inner();
let first = build_expression(inner.next().unwrap())?;
let tails: Vec<Pair<Rule>> = inner.collect();
if tails.is_empty() {
return Ok(first);
}
let mut operands = vec![first];
let mut ops = vec![];
let mut last_was_predicate = false;
for tail in tails {
let mut tail_inner = tail.into_inner();
let op_pair = tail_inner.next().unwrap();
let rule = op_pair.as_rule();
let chain_op = match rule {
Rule::eq => Some(BinaryOp::Eq),
Rule::not_eq => Some(BinaryOp::NotEq),
Rule::lt => Some(BinaryOp::Lt),
Rule::gt => Some(BinaryOp::Gt),
Rule::lt_eq => Some(BinaryOp::LtEq),
Rule::gt_eq => Some(BinaryOp::GtEq),
Rule::approx_eq => Some(BinaryOp::ApproxEq),
_ => None,
};
if let Some(op) = chain_op {
let rhs = build_expression(tail_inner.next().unwrap())?;
ops.push(op);
operands.push(rhs);
last_was_predicate = false;
} else {
let is_label = rule == Rule::identifier_or_keyword;
if last_was_predicate && !is_label {
return Err(ParseError::new(
"InvalidPredicateChain: cannot stack multiple predicates \
without a comparison operator between them"
.to_string(),
));
}
let last_idx = operands.len() - 1;
let last = operands.remove(last_idx);
let modified = apply_tail_to_expr(last, rule, op_pair, tail_inner)?;
let produced_label = matches!(&modified, Expr::LabelCheck { .. });
operands.push(modified);
last_was_predicate = !produced_label;
}
}
if ops.is_empty() {
return Ok(operands.pop().unwrap());
}
let mut final_expr = Expr::BinaryOp {
left: Box::new(operands[0].clone()),
op: ops[0],
right: Box::new(operands[1].clone()),
};
for i in 1..ops.len() {
let next_cmp = Expr::BinaryOp {
left: Box::new(operands[i].clone()),
op: ops[i],
right: Box::new(operands[i + 1].clone()),
};
final_expr = Expr::BinaryOp {
left: Box::new(final_expr),
op: BinaryOp::And,
right: Box::new(next_cmp),
};
}
Ok(final_expr)
}
fn accumulate_label(expr: Expr, label: String) -> Expr {
match expr {
Expr::LabelCheck { expr, mut labels } => {
labels.push(label);
Expr::LabelCheck { expr, labels }
}
_ => Expr::LabelCheck {
expr: Box::new(expr),
labels: vec![label],
},
}
}
fn apply_tail_to_expr(
left: Expr,
rule: Rule,
op_pair: Pair<Rule>,
mut tail_inner: Pairs<Rule>,
) -> Result<Expr, ParseError> {
match rule {
Rule::IS => {
let next = tail_inner.next().unwrap();
match next.as_rule() {
Rule::NULL => Ok(Expr::IsNull(Box::new(left))),
Rule::NOT => Ok(Expr::IsNotNull(Box::new(left))),
Rule::UNIQUE => Ok(Expr::IsUnique(Box::new(left))),
_ => {
let label = tail_inner.next().unwrap().as_str().to_string();
Ok(accumulate_label(left, label))
}
}
}
Rule::IN => Ok(Expr::In {
expr: Box::new(left),
list: Box::new(build_expression(tail_inner.next().unwrap())?),
}),
Rule::CONTAINS => Ok(Expr::BinaryOp {
left: Box::new(left),
op: BinaryOp::Contains,
right: Box::new(build_expression(tail_inner.next().unwrap())?),
}),
Rule::STARTS => {
tail_inner.next(); Ok(Expr::BinaryOp {
left: Box::new(left),
op: BinaryOp::StartsWith,
right: Box::new(build_expression(tail_inner.next().unwrap())?),
})
}
Rule::ENDS => {
tail_inner.next(); Ok(Expr::BinaryOp {
left: Box::new(left),
op: BinaryOp::EndsWith,
right: Box::new(build_expression(tail_inner.next().unwrap())?),
})
}
Rule::regex_match => Ok(Expr::BinaryOp {
left: Box::new(left),
op: BinaryOp::Regex,
right: Box::new(build_expression(tail_inner.next().unwrap())?),
}),
Rule::VALID_AT => {
let timestamp = build_expression(tail_inner.next().unwrap())?;
let start_prop = tail_inner.next().map(build_string_literal).transpose()?;
let end_prop = tail_inner.next().map(build_string_literal).transpose()?;
Ok(Expr::ValidAt {
entity: Box::new(left),
timestamp: Box::new(timestamp),
start_prop,
end_prop,
})
}
Rule::identifier_or_keyword => Ok(accumulate_label(
left,
normalize_identifier(op_pair.as_str()),
)),
_ => unreachable!("Unexpected non-chainable rule: {:?}", rule),
}
}
fn negate_expression(expr: Expr) -> Expr {
if let Expr::Literal(CypherLiteral::Integer(int_val)) = &expr {
return Expr::Literal(CypherLiteral::Integer(int_val.wrapping_neg()));
}
Expr::UnaryOp {
op: UnaryOp::Neg,
expr: Box::new(expr),
}
}
fn build_unary_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let mut inner = pair.into_inner().peekable();
let mut neg_count = 0u32;
while inner
.peek()
.is_some_and(|p| matches!(p.as_rule(), Rule::minus | Rule::plus))
{
let p = inner.next().unwrap();
if p.as_rule() == Rule::minus {
neg_count += 1;
}
}
let mut expr = build_expression(inner.next().unwrap())?;
if neg_count == 0 {
if let Expr::Literal(CypherLiteral::Integer(i64::MIN)) = expr {
return Err(ParseError::new(
"IntegerOverflow: value too large".to_string(),
));
}
} else {
for _ in 0..neg_count {
expr = negate_expression(expr);
}
if neg_count.is_multiple_of(2)
&& matches!(expr, Expr::Literal(CypherLiteral::Integer(i64::MIN)))
{
return Err(ParseError::new(
"IntegerOverflow: value too large".to_string(),
));
}
}
Ok(expr)
}
fn build_postfix_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let mut inner = pair.into_inner();
let base = build_expression(inner.next().unwrap())?;
inner.try_fold(base, apply_postfix_suffix)
}
fn make_function_call(
base: &Expr,
args: Vec<Expr>,
distinct: bool,
window_spec: Option<WindowSpec>,
) -> Result<Expr, ParseError> {
let name = extract_dotted_name(base)
.ok_or_else(|| ParseError::new(format!("Invalid call base: {base:?}")))?;
Ok(Expr::FunctionCall {
name,
args,
distinct,
window_spec,
})
}
fn apply_postfix_suffix(base: Expr, suffix: Pair<Rule>) -> Result<Expr, ParseError> {
let mut inner = suffix.into_inner();
let Some(first) = inner.next() else {
return make_function_call(&base, vec![], false, None);
};
let (distinct, first) = if first.as_rule() == Rule::DISTINCT {
(true, inner.next())
} else {
(false, Some(first))
};
let Some(first) = first else {
return make_function_call(&base, vec![], distinct, None);
};
match first.as_rule() {
Rule::identifier_or_keyword => Ok(Expr::Property(
Box::new(base),
normalize_identifier(first.as_str()),
)),
Rule::expression => {
if let Some(second) = inner.next() {
if second.as_rule() == Rule::dot_dot {
let end = inner.next().map(build_expression).transpose()?;
Ok(Expr::ArraySlice {
array: Box::new(base),
start: Some(Box::new(build_expression(first)?)),
end: end.map(Box::new),
})
} else {
unreachable!()
}
} else {
Ok(Expr::ArrayIndex {
array: Box::new(base),
index: Box::new(build_expression(first)?),
})
}
}
Rule::dot_dot => {
let end = inner.next().map(build_expression).transpose()?;
Ok(Expr::ArraySlice {
array: Box::new(base),
start: None,
end: end.map(Box::new),
})
}
Rule::expression_list => {
let args = build_expression_list(first)?;
let window_spec = inner.next().map(build_window_spec).transpose()?;
make_function_call(&base, args, distinct, window_spec)
}
Rule::map_projection_items => {
let items = build_map_projection_items(first)?;
Ok(Expr::MapProjection {
base: Box::new(base),
items,
})
}
Rule::window_spec => {
let window_spec = Some(build_window_spec(first)?);
make_function_call(&base, vec![], distinct, window_spec)
}
_ => unreachable!(),
}
}
fn build_primary_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let mut inner = pair.into_inner();
let first = inner.next().unwrap();
match first.as_rule() {
Rule::literal => build_literal(first),
Rule::function_name => {
Ok(Expr::Variable(first.as_str().to_string()))
}
Rule::identifier => Ok(Expr::Variable(first.as_str().to_string())),
Rule::parameter => Ok(Expr::Parameter(first.as_str()[1..].to_string())),
Rule::case_expression => build_case_expression(first),
Rule::list_expression => build_list_expression(first),
Rule::map_literal => build_map_literal(first),
Rule::expression => build_expression(first),
Rule::count_subquery => {
let inner = first.into_inner().nth(1).unwrap();
let query = match inner.as_rule() {
Rule::statement => Query::Single(build_statement(inner)?),
Rule::pattern => match_query(build_pattern(inner)?, None),
_ => unreachable!("Unexpected rule in count_subquery: {:?}", inner.as_rule()),
};
Ok(Expr::CountSubquery(Box::new(query)))
}
Rule::collect_subquery => {
let stmt = build_statement(first.into_inner().nth(1).unwrap())?;
Ok(Expr::CollectSubquery(Box::new(Query::Single(stmt))))
}
Rule::exists_expression => {
let content = first.into_inner().nth(1).unwrap();
let mut content_inner = content.into_inner();
let first_item = content_inner.next().unwrap();
let query = match first_item.as_rule() {
Rule::statement => Query::Single(build_statement(first_item)?),
Rule::pattern => {
let pattern = build_pattern(first_item)?;
let where_clause = content_inner
.next()
.filter(|p| p.as_rule() == Rule::where_clause)
.map(extract_where_expr)
.transpose()?;
match_query(pattern, where_clause)
}
_ => unreachable!(
"Unexpected rule in exists_subquery_content: {:?}",
first_item.as_rule()
),
};
Ok(Expr::Exists {
query: Box::new(query),
from_pattern_predicate: false,
})
}
Rule::quantifier_expression => build_quantifier_expression(first),
Rule::reduce_expression => build_reduce_expression(first),
Rule::pattern_expression => {
let elements = first
.into_inner()
.map(build_pattern_child)
.collect::<Result<Vec<_>, _>>()?;
let pattern = Pattern {
paths: vec![PathPattern {
variable: None,
elements,
shortest_path_mode: None,
}],
};
Ok(Expr::Exists {
query: Box::new(match_query(pattern, None)),
from_pattern_predicate: true,
})
}
Rule::COUNT => {
let mut distinct = false;
let mut args = vec![];
let mut window_spec = None;
for p in inner {
match p.as_rule() {
Rule::DISTINCT => distinct = true,
Rule::count_args => {
let ca_inner = p.into_inner().next().unwrap();
if ca_inner.as_rule() == Rule::star {
args.push(Expr::Wildcard);
} else {
args = build_expression_list(ca_inner)?;
}
}
Rule::window_spec => window_spec = Some(build_window_spec(p)?),
_ => {}
}
}
Ok(Expr::FunctionCall {
name: first.as_str().to_string(),
args,
distinct,
window_spec,
})
}
Rule::EXISTS => {
let list = inner.next().unwrap();
Ok(Expr::FunctionCall {
name: first.as_str().to_string(),
args: build_expression_list(list)?,
distinct: false,
window_spec: None,
})
}
_ => unreachable!("Unexpected primary: {:?}", first.as_rule()),
}
}
fn unescape_string(s: &str, quote_char: char) -> Result<String, ParseError> {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(ch) = chars.next() {
if ch == '\\' {
match chars.next() {
Some('\\') => result.push('\\'),
Some('\'') => result.push('\''),
Some('"') => result.push('"'),
Some('t') => result.push('\t'),
Some('n') => result.push('\n'),
Some('r') => result.push('\r'),
Some('b') => result.push('\u{0008}'), Some('f') => result.push('\u{000C}'), Some(esc @ ('u' | 'U')) => {
let expected_len = if esc == 'u' { 4 } else { 6 };
let hex: String = chars.by_ref().take(expected_len).collect();
if hex.len() != expected_len {
return Err(ParseError::new(format!(
"InvalidUnicodeLiteral: Invalid \\{esc} escape: \\{esc}{hex}"
)));
}
let code = u32::from_str_radix(&hex, 16).map_err(|_| {
ParseError::new(format!(
"InvalidUnicodeLiteral: Invalid hex in \\{esc}{hex}"
))
})?;
result.push(char::from_u32(code).ok_or_else(|| {
ParseError::new(format!(
"InvalidUnicodeLiteral: Invalid unicode \\{esc}{hex}"
))
})?);
}
Some(other) => {
result.push('\\');
result.push(other);
}
None => {
return Err(ParseError::new(
"InvalidUnicodeLiteral: Unexpected end after backslash".to_string(),
));
}
}
} else if ch == quote_char {
if chars.as_str().starts_with(quote_char) {
chars.next(); result.push(quote_char);
} else {
result.push(ch);
}
} else {
result.push(ch);
}
}
Ok(result)
}
fn parse_integer_safe(s: &str, radix: u32) -> Result<i64, ParseError> {
if let Ok(val) = i64::from_str_radix(s, radix) {
return Ok(val);
}
let magnitude = u64::from_str_radix(s, radix)
.map_err(|e| ParseError::new(format!("Invalid integer: {e}")))?;
if magnitude == i64::MAX as u64 + 1 {
Ok(i64::MIN)
} else {
Err(ParseError::new(
"IntegerOverflow: value too large".to_string(),
))
}
}
fn build_literal(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let inner = pair.into_inner().next().unwrap();
match inner.as_rule() {
Rule::integer => {
let s_clean = inner.as_str().replace('_', "");
let value = if let Some(hex) = s_clean
.strip_prefix("0x")
.or_else(|| s_clean.strip_prefix("0X"))
{
parse_integer_safe(hex, 16)?
} else if let Some(oct) = s_clean
.strip_prefix("0o")
.or_else(|| s_clean.strip_prefix("0O"))
{
parse_integer_safe(oct, 8)?
} else {
parse_integer_safe(&s_clean, 10)?
};
Ok(Expr::Literal(CypherLiteral::Integer(value)))
}
Rule::float => {
let s = inner.as_str().replace('_', "");
let value = s
.parse::<f64>()
.map_err(|e| ParseError::new(format!("Invalid float: {e}")))?;
if value.is_infinite() {
return Err(ParseError::new(
"FloatingPointOverflow: value too large".to_string(),
));
}
Ok(Expr::Literal(CypherLiteral::Float(value)))
}
Rule::infinity => {
let value = if inner.as_str().starts_with('-') {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
Ok(Expr::Literal(CypherLiteral::Float(value)))
}
Rule::nan => Ok(Expr::Literal(CypherLiteral::Float(f64::NAN))),
Rule::string => {
let s = inner.as_str();
let quote_char = s.chars().next().unwrap();
let content = &s[1..s.len() - 1];
let unescaped = unescape_string(content, quote_char)?;
Ok(Expr::Literal(CypherLiteral::String(unescaped)))
}
Rule::TRUE => Ok(Expr::Literal(CypherLiteral::Bool(true))),
Rule::FALSE => Ok(Expr::Literal(CypherLiteral::Bool(false))),
Rule::NULL => Ok(Expr::Literal(CypherLiteral::Null)),
_ => unreachable!(),
}
}
fn build_list_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let mut inner = pair.into_inner();
let Some(first) = inner.next() else {
return Ok(Expr::List(vec![]));
};
match first.as_rule() {
Rule::pattern_comprehension_body => {
let mut p_inner = first.into_inner().peekable();
let variable = consume_identifier(&mut p_inner);
let path = build_path_pattern(p_inner.next().unwrap())?;
let pattern = Pattern { paths: vec![path] };
let where_clause = if peek_is(&mut p_inner, Rule::WHERE) {
p_inner.next();
Some(build_expression(p_inner.next().unwrap())?)
} else {
None
};
p_inner.next(); let map_expr = build_expression(p_inner.next().unwrap())?;
Ok(Expr::PatternComprehension {
path_variable: variable,
pattern,
where_clause: where_clause.map(Box::new),
map_expr: Box::new(map_expr),
})
}
Rule::list_comprehension_body => {
let mut l_inner = first.into_inner();
let var = l_inner.next().unwrap().as_str().to_string();
l_inner.next(); let list = build_expression(l_inner.next().unwrap())?;
let mut where_clause = None;
let mut map_expr = None;
if let Some(next) = l_inner.next() {
if next.as_rule() == Rule::WHERE {
where_clause = Some(build_expression(l_inner.next().unwrap())?);
if let Some(pipe_or_expr) = l_inner.next()
&& pipe_or_expr.as_rule() == Rule::pipe
{
map_expr = Some(build_expression(l_inner.next().unwrap())?);
}
} else if next.as_rule() == Rule::pipe {
map_expr = Some(build_expression(l_inner.next().unwrap())?);
}
}
let map_expr = map_expr.unwrap_or_else(|| Expr::Variable(var.clone()));
Ok(Expr::ListComprehension {
variable: var,
list: Box::new(list),
where_clause: where_clause.map(Box::new),
map_expr: Box::new(map_expr),
})
}
Rule::expression => {
let exprs: Vec<_> = std::iter::once(first)
.chain(inner)
.map(build_expression)
.collect::<Result<_, _>>()?;
Ok(Expr::List(exprs))
}
_ => unreachable!("Unexpected list rule: {:?}", first.as_rule()),
}
}
fn build_case_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let mut inner = pair.into_inner();
inner.next();
let expr = if inner
.peek()
.is_some_and(|p| p.as_rule() == Rule::expression)
{
Some(Box::new(build_expression(inner.next().unwrap())?))
} else {
None
};
let mut when_then = vec![];
let mut else_expr = None;
for p in inner {
match p.as_rule() {
Rule::when_clause => {
let mut w_inner = p.into_inner();
w_inner.next(); let w = build_expression(w_inner.next().unwrap())?;
w_inner.next(); let t = build_expression(w_inner.next().unwrap())?;
when_then.push((w, t));
}
Rule::else_clause => {
else_expr = Some(Box::new(build_expression(p.into_inner().nth(1).unwrap())?));
}
_ => {}
}
}
Ok(Expr::Case {
expr,
when_then,
else_expr,
})
}
fn build_quantifier_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let mut inner = pair.into_inner();
let q_rule = inner.next().unwrap().as_rule();
let quantifier = match q_rule {
Rule::ALL => Quantifier::All,
Rule::ANY_KW => Quantifier::Any,
Rule::SINGLE => Quantifier::Single,
Rule::NONE => Quantifier::None,
_ => unreachable!(),
};
let variable = inner.next().unwrap().as_str().to_string();
inner.next(); let list = build_expression(inner.next().unwrap())?;
inner.next(); let predicate = build_expression(inner.next().unwrap())?;
Ok(Expr::Quantifier {
quantifier,
variable,
list: Box::new(list),
predicate: Box::new(predicate),
})
}
fn build_reduce_expression(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let mut inner = pair.into_inner();
inner.next(); let accumulator = inner.next().unwrap().as_str().to_string();
inner.next(); let init = build_expression(inner.next().unwrap())?;
let variable = inner.next().unwrap().as_str().to_string();
inner.next(); let list = build_expression(inner.next().unwrap())?;
inner.next(); let expr = build_expression(inner.next().unwrap())?;
Ok(Expr::Reduce {
accumulator,
init: Box::new(init),
variable,
list: Box::new(list),
expr: Box::new(expr),
})
}
fn build_map_literal(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let entries = pair
.into_inner()
.map(|p| {
let mut inner = p.into_inner();
let key_pair = inner.next().unwrap();
let key = match key_pair.as_rule() {
Rule::string => {
let s = key_pair.as_str();
let content = &s[1..s.len() - 1];
content.to_string()
}
_ => key_pair.as_str().to_string(), };
let val = build_expression(inner.next().unwrap())?;
Ok((key, val))
})
.collect::<Result<Vec<_>, ParseError>>()?;
Ok(Expr::Map(entries))
}
pub(crate) fn build_pattern(pair: Pair<Rule>) -> Result<Pattern, ParseError> {
let paths = pair
.into_inner()
.map(build_path_pattern)
.collect::<Result<Vec<_>, _>>()?;
Ok(Pattern { paths })
}
fn build_pattern_child(child: Pair<Rule>) -> Result<PatternElement, ParseError> {
match child.as_rule() {
Rule::node_pattern => Ok(PatternElement::Node(build_node_pattern(child)?)),
Rule::relationship_pattern => Ok(PatternElement::Relationship(build_relationship_pattern(
child,
)?)),
_ => unreachable!("Unexpected pattern child: {:?}", child.as_rule()),
}
}
fn process_pattern_element(
elem: Pair<Rule>,
elements: &mut Vec<PatternElement>,
) -> Result<(), ParseError> {
if elem.as_rule() != Rule::pattern_element {
return Ok(());
}
let mut elem_inner = elem.into_inner();
let first_child = elem_inner.next().unwrap();
match first_child.as_rule() {
Rule::parenthesized_pattern => {
elements.push(build_parenthesized_pattern(first_child)?);
}
Rule::node_pattern => {
elements.push(build_pattern_child(first_child)?);
for child in elem_inner {
if matches!(
child.as_rule(),
Rule::node_pattern | Rule::relationship_pattern
) {
elements.push(build_pattern_child(child)?);
}
}
}
_ => unreachable!(
"Unexpected rule in pattern_element: {:?}",
first_child.as_rule()
),
}
Ok(())
}
fn build_path_pattern(pair: Pair<Rule>) -> Result<PathPattern, ParseError> {
let mut inner = pair.into_inner().peekable();
let variable = consume_identifier(&mut inner);
let mut shortest_path_mode = None;
let mut elements = vec![];
let next = inner.next().unwrap();
if next.as_rule() == Rule::shortest_path_pattern {
let matched_lower = next.as_str().to_lowercase();
shortest_path_mode = Some(if matched_lower.starts_with("allshortest") {
ShortestPathMode::AllShortest
} else {
ShortestPathMode::Shortest
});
for elem in next.into_inner() {
process_pattern_element(elem, &mut elements)?;
}
} else {
process_pattern_element(next, &mut elements)?;
for p in inner {
process_pattern_element(p, &mut elements)?;
}
}
Ok(PathPattern {
variable,
elements,
shortest_path_mode,
})
}
fn build_parenthesized_pattern(pair: Pair<Rule>) -> Result<PatternElement, ParseError> {
let mut inner = pair.into_inner().peekable();
let variable = consume_identifier(&mut inner);
let pattern_part = inner.next().unwrap();
let elements = pattern_part
.into_inner()
.map(build_pattern_child)
.collect::<Result<Vec<_>, _>>()?;
let where_clause = if peek_is(&mut inner, Rule::WHERE) {
inner.next();
Some(build_expression(inner.next().unwrap())?)
} else {
None
};
let range = inner.next().map(build_path_quantifier).transpose()?;
let path_pattern = PathPattern {
variable,
elements,
shortest_path_mode: None,
};
if where_clause.is_some() {
eprintln!("Warning: WHERE clause in parenthesized pattern is not yet fully supported");
}
Ok(PatternElement::Parenthesized {
pattern: Box::new(path_pattern),
range,
})
}
fn build_path_quantifier(pair: Pair<Rule>) -> Result<Range, ParseError> {
let has_comma = pair.as_str().contains(',');
let mut inner = pair.into_inner().peekable();
let Some(first) = inner.next() else {
return Ok(Range {
min: Some(0),
max: None,
});
};
match first.as_rule() {
Rule::plus => Ok(Range {
min: Some(1),
max: None,
}),
Rule::star => Ok(Range {
min: Some(0),
max: None,
}),
Rule::integer => {
let n: u32 = first.as_str().parse().unwrap();
if !has_comma {
return Ok(Range {
min: Some(n),
max: Some(n),
});
}
let max = inner
.next()
.filter(|p| p.as_rule() == Rule::integer)
.map(|p| p.as_str().parse::<u32>().unwrap());
Ok(Range { min: Some(n), max })
}
_ => {
let max: u32 = first.as_str().parse().unwrap();
Ok(Range {
min: Some(0),
max: Some(max),
})
}
}
}
fn parse_predicate(pair: Pair<Rule>) -> Result<(Option<Expr>, Option<Expr>), ParseError> {
let mut pred_inner = pair.into_inner();
let Some(first) = pred_inner.next() else {
return Ok((None, None));
};
match first.as_rule() {
Rule::properties => {
let properties = Some(build_properties(first)?);
let where_clause = if pred_inner
.next()
.is_some_and(|p| p.as_rule() == Rule::WHERE)
{
Some(build_expression(pred_inner.next().unwrap())?)
} else {
None
};
Ok((properties, where_clause))
}
Rule::WHERE => {
let where_clause = Some(build_expression(pred_inner.next().unwrap())?);
Ok((None, where_clause))
}
_ => unreachable!("Unexpected rule in predicate: {:?}", first.as_rule()),
}
}
fn build_node_pattern(pair: Pair<Rule>) -> Result<NodePattern, ParseError> {
let mut variable = None;
let mut labels = vec![];
let mut properties = None;
let mut where_clause = None;
for p in pair.into_inner() {
match p.as_rule() {
Rule::identifier => variable = Some(p.as_str().to_string()),
Rule::node_labels => {
labels = p
.into_inner()
.map(|l| normalize_identifier(l.as_str()))
.collect();
}
Rule::node_predicate => {
let (props, wc) = parse_predicate(p)?;
properties = props;
where_clause = wc;
}
Rule::properties => properties = Some(build_properties(p)?),
_ => {}
}
}
Ok(NodePattern {
variable,
labels,
properties,
where_clause,
})
}
fn build_relationship_pattern(pair: Pair<Rule>) -> Result<RelationshipPattern, ParseError> {
let mut has_left = false;
let mut has_right = false;
let mut variable = None;
let mut types = vec![];
let mut range = None;
let mut properties = None;
let mut where_clause = None;
for p in pair.into_inner() {
match p.as_rule() {
Rule::arrow_left => has_left = true,
Rule::arrow_right => has_right = true,
Rule::arrow_bidirectional => {
has_left = true;
has_right = true;
}
Rule::relationship_detail => {
for detail in p.into_inner() {
match detail.as_rule() {
Rule::identifier => variable = Some(detail.as_str().to_string()),
Rule::relationship_types => {
types = detail
.into_inner()
.filter(|p| {
matches!(
p.as_rule(),
Rule::identifier | Rule::identifier_or_keyword
)
})
.map(|t| normalize_identifier(t.as_str()))
.collect();
}
Rule::range_literal => range = Some(build_range(detail)?),
Rule::rel_predicate => {
let (props, wc) = parse_predicate(detail)?;
properties = props;
where_clause = wc;
}
Rule::properties => properties = Some(build_properties(detail)?),
_ => {}
}
}
}
_ => {}
}
}
let direction = match (has_left, has_right) {
(true, false) => Direction::Incoming,
(false, true) => Direction::Outgoing,
_ => Direction::Both,
};
Ok(RelationshipPattern {
variable,
types,
direction,
range,
properties,
where_clause,
})
}
fn build_single_return_item(pair: Pair<Rule>) -> Result<ReturnItem, ParseError> {
let mut inner = pair.into_inner();
let expr_pair = inner.next().unwrap();
let source_text = expr_pair.as_str().trim().to_string();
let expr = build_expression(expr_pair)?;
let alias = if inner.next().is_some() {
Some(normalize_identifier(inner.next().unwrap().as_str()))
} else {
None
};
Ok(ReturnItem::Expr {
expr,
alias,
source_text: Some(source_text),
})
}
pub(crate) fn build_return_items(pair: Pair<Rule>) -> Result<Vec<ReturnItem>, ParseError> {
let mut inner = pair.into_inner();
let first = inner.next().unwrap();
if first.as_rule() == Rule::star {
let mut items = vec![ReturnItem::All];
for p in inner {
items.push(build_single_return_item(p)?);
}
return Ok(items);
}
std::iter::once(first)
.chain(inner)
.map(build_single_return_item)
.collect()
}
pub(crate) fn build_sort_items(pair: Pair<Rule>) -> Result<Vec<SortItem>, ParseError> {
pair.into_inner()
.map(|p| {
let mut i = p.into_inner();
let expr = build_expression(i.next().unwrap())?;
let ascending = i.next().is_none_or(|o| o.as_rule() == Rule::ASC);
Ok(SortItem { expr, ascending })
})
.collect()
}
fn build_window_spec(pair: Pair<Rule>) -> Result<WindowSpec, ParseError> {
let mut partition_by = vec![];
let mut order_by = vec![];
for p in pair.into_inner() {
match p.as_rule() {
Rule::partition_clause => {
let list = p.into_inner().nth(2).unwrap();
partition_by = build_expression_list(list)?;
}
Rule::order_clause => {
let list = p.into_inner().nth(2).unwrap();
order_by = build_sort_items(list)?;
}
_ => {}
}
}
Ok(WindowSpec {
partition_by,
order_by,
})
}
pub(crate) fn build_properties(pair: Pair<Rule>) -> Result<Expr, ParseError> {
let inner = pair.into_inner().next().unwrap();
match inner.as_rule() {
Rule::map_literal => build_map_literal(inner),
Rule::parameter => Ok(Expr::Parameter(inner.as_str()[1..].to_string())),
_ => unreachable!(),
}
}
fn parse_range_bound(pair: &Pair<Rule>) -> Result<u32, ParseError> {
let val: i64 = pair.as_str().parse().map_err(|e| {
ParseError::new(format!(
"SyntaxError: InvalidRelationshipPattern - Invalid range bound '{}': {}",
pair.as_str(),
e
))
})?;
if val < 0 {
return Err(ParseError::new(format!(
"SyntaxError: InvalidRelationshipPattern - Negative range bound '{}' is not allowed",
val
)));
}
Ok(val as u32)
}
fn build_range(pair: Pair<Rule>) -> Result<Range, ParseError> {
let mut inner = pair.into_inner();
inner.next(); let mut min = None;
let mut max = None;
if let Some(first) = inner.next() {
if first.as_rule() == Rule::integer {
let val = parse_range_bound(&first)?;
min = Some(val);
if inner.next().is_some() {
if let Some(second) = inner.next() {
max = Some(parse_range_bound(&second)?);
}
} else {
max = Some(val);
}
} else if first.as_rule() == Rule::dot_dot
&& let Some(second) = inner.next()
{
max = Some(parse_range_bound(&second)?);
}
} else {
min = Some(1);
}
Ok(Range { min, max })
}
fn build_expression_list(pair: Pair<Rule>) -> Result<Vec<Expr>, ParseError> {
pair.into_inner().map(build_expression).collect()
}
fn build_map_projection_items(pair: Pair<Rule>) -> Result<Vec<MapProjectionItem>, ParseError> {
pair.into_inner()
.map(|p| {
let full_str = p.as_str();
let mut inner = p.into_inner();
if full_str.starts_with('.') {
let first = inner.next().unwrap();
if first.as_rule() == Rule::star {
Ok(MapProjectionItem::AllProperties)
} else {
Ok(MapProjectionItem::Property(first.as_str().to_string()))
}
} else if full_str.contains(':') {
let key = inner.next().unwrap().as_str().to_string();
let val = build_expression(inner.next().unwrap())?;
Ok(MapProjectionItem::LiteralEntry(key, Box::new(val)))
} else {
let first = inner.next().unwrap();
Ok(MapProjectionItem::Variable(first.as_str().to_string()))
}
})
.collect()
}
fn build_schema_command(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let inner = pair.into_inner().next().unwrap();
match inner.as_rule() {
Rule::create_vector_index => build_create_vector_index(inner),
Rule::create_fulltext_index => build_create_fulltext_index(inner),
Rule::create_scalar_index => build_create_scalar_index(inner),
Rule::create_json_index => build_create_json_index(inner),
Rule::drop_index => build_drop_index(inner),
Rule::create_constraint => build_create_constraint(inner),
Rule::drop_constraint => build_drop_constraint(inner),
Rule::create_label => build_create_label(inner),
Rule::create_edge_type => build_create_edge_type(inner),
Rule::alter_label => build_alter_label(inner),
Rule::alter_edge_type => build_alter_edge_type(inner),
Rule::drop_label => build_drop_label(inner),
Rule::drop_edge_type => build_drop_edge_type(inner),
Rule::show_constraints => build_show_constraints(inner),
Rule::show_indexes => build_show_indexes(inner),
Rule::show_database => Ok(SchemaCommand::ShowDatabase),
Rule::show_config => Ok(SchemaCommand::ShowConfig),
Rule::show_statistics => Ok(SchemaCommand::ShowStatistics),
Rule::vacuum_command => Ok(SchemaCommand::Vacuum),
Rule::checkpoint_command => Ok(SchemaCommand::Checkpoint),
Rule::backup_command => build_backup_command(inner),
Rule::copy_to => build_copy_to(inner),
Rule::copy_from => build_copy_from(inner),
_ => unreachable!("Unexpected schema command: {:?}", inner.as_rule()),
}
}
fn parse_label_binding(inner: &mut std::iter::Peekable<Pairs<Rule>>) -> String {
let first_token = inner.next().unwrap();
match inner.peek() {
Some(p) if p.as_rule() == Rule::identifier_or_keyword => {
normalize_identifier(inner.next().unwrap().as_str())
}
_ => normalize_identifier(first_token.as_str()),
}
}
fn extract_property_name(pair: Pair<Rule>) -> String {
let s = pair.as_str().trim();
s.split('.').next_back().unwrap_or(s).trim().to_string()
}
fn parse_options<'a, I>(inner: &mut I) -> Result<HashMap<String, Value>, ParseError>
where
I: Iterator<Item = Pair<'a, Rule>>,
{
if let Some(p) = inner.next()
&& p.as_rule() == Rule::OPTIONS
{
return build_map_options(inner.next().unwrap());
}
Ok(HashMap::new())
}
fn build_create_vector_index(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next(); inner.next();
let name = inner.next().unwrap().as_str().to_string();
let if_not_exists = consume_if_present(&mut inner, Rule::if_not_exists);
inner.next();
let label = parse_label_binding(&mut inner);
inner.next();
let property = extract_property_name(inner.next().unwrap());
let options = parse_options(&mut inner)?;
Ok(SchemaCommand::CreateVectorIndex(CreateVectorIndex {
name,
label,
property,
options,
if_not_exists,
}))
}
fn build_create_fulltext_index(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next(); inner.next();
let name = inner.next().unwrap().as_str().to_string();
let if_not_exists = consume_if_present(&mut inner, Rule::if_not_exists);
inner.next();
let label = parse_label_binding(&mut inner);
inner.next();
consume_if_present(&mut inner, Rule::EACH);
let prop_list = inner.next().unwrap();
let properties = if prop_list.as_rule() == Rule::property_expression_list {
prop_list.into_inner().map(extract_property_name).collect()
} else {
vec![prop_list.as_str().to_string()]
};
let options = parse_options(&mut inner)?;
Ok(SchemaCommand::CreateFullTextIndex(CreateFullTextIndex {
name,
label,
properties,
options,
if_not_exists,
}))
}
fn build_create_scalar_index(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next();
let if_not_exists = consume_if_present(&mut inner, Rule::if_not_exists);
let name = inner.next().unwrap().as_str().to_string();
inner.next(); let _variable = inner.next().unwrap().as_str().to_string(); let label = normalize_identifier(inner.next().unwrap().as_str()); inner.next();
let expr_list = inner.next().unwrap();
let expressions: Vec<Expr> = expr_list
.into_inner()
.map(build_expression)
.collect::<Result<Vec<_>, _>>()?;
let mut where_clause = None;
let mut options = HashMap::new();
for p in inner {
match p.as_rule() {
Rule::where_clause => where_clause = Some(extract_where_expr(p)?),
Rule::OPTIONS => {
}
Rule::map_literal => {
options = build_map_options(p)?;
}
_ => {}
}
}
Ok(SchemaCommand::CreateScalarIndex(CreateScalarIndex {
name,
label,
expressions,
where_clause,
options,
if_not_exists,
}))
}
fn build_create_json_index(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next(); inner.next(); inner.next();
let name = inner.next().unwrap().as_str().to_string();
let if_not_exists = consume_if_present(&mut inner, Rule::if_not_exists);
inner.next();
let _variable = inner.next().unwrap(); let label = normalize_identifier(inner.next().unwrap().as_str());
inner.next(); let column = inner.next().unwrap().as_str().to_string();
let options = parse_options(&mut inner)?;
Ok(SchemaCommand::CreateJsonFtsIndex(CreateJsonFtsIndex {
name,
label,
column,
options,
if_not_exists,
}))
}
fn build_drop_index(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next();
let _if_exists = consume_if_present(&mut inner, Rule::if_exists);
let name = inner.next().unwrap().as_str().to_string();
Ok(SchemaCommand::DropIndex(DropIndex { name }))
}
fn build_create_constraint(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next();
let _if_not_exists = consume_if_present(&mut inner, Rule::if_not_exists);
let name = {
let next_pair = inner.next().unwrap();
if matches!(
next_pair.as_rule(),
Rule::identifier | Rule::identifier_or_keyword
) {
inner.next(); Some(normalize_identifier(next_pair.as_str()))
} else {
None
}
};
let var = inner.next().unwrap().as_str().to_string();
let label = normalize_identifier(inner.next().unwrap().as_str());
inner.next();
let assertion = inner.next().unwrap();
let (constraint_type, properties, expression) = build_constraint_assertion(assertion, &var)?;
Ok(SchemaCommand::CreateConstraint(CreateConstraint {
name,
constraint_type,
label,
properties,
expression,
}))
}
fn build_constraint_assertion(
pair: Pair<Rule>,
var: &str,
) -> Result<(ConstraintType, Vec<String>, Option<Expr>), ParseError> {
let mut inner = pair.into_inner();
let first = inner.next().unwrap();
match first.as_rule() {
Rule::EXISTS => {
let prop_expr = inner.next().unwrap();
let props = extract_property_names_from_expr(prop_expr, var)?;
Ok((ConstraintType::Exists, props, None))
}
Rule::property_expr => {
let props = extract_property_names_from_expr(first, var)?;
inner.next(); let mut kw = inner.next().unwrap();
if kw.as_rule() == Rule::NODE {
kw = inner.next().unwrap();
}
let ctype = match kw.as_rule() {
Rule::UNIQUE => ConstraintType::Unique,
Rule::KEY => ConstraintType::NodeKey,
_ => return Err(ParseError::new("Expected UNIQUE or KEY".to_string())),
};
Ok((ctype, props, None))
}
Rule::expression => {
let expr = build_expression(first)?;
Ok((ConstraintType::Check, vec![], Some(expr)))
}
_ => {
inner.next(); let prop_list = inner.next().unwrap();
let properties = prop_list
.into_inner()
.map(|p| p.as_str().to_string())
.collect();
inner.next(); inner.next();
let mut next = inner.next().unwrap();
if matches!(next.as_rule(), Rule::NODE | Rule::RELATIONSHIP) {
next = inner.next().unwrap();
}
let ctype = if next.as_rule() == Rule::UNIQUE {
ConstraintType::Unique
} else {
ConstraintType::NodeKey
};
Ok((ctype, properties, None))
}
}
}
fn extract_property_names_from_expr(
pair: Pair<Rule>,
expected_var: &str,
) -> Result<Vec<String>, ParseError> {
let mut parts: Vec<String> = Vec::new();
fn collect_identifiers(pair: Pair<Rule>, parts: &mut Vec<String>) {
if matches!(
pair.as_rule(),
Rule::identifier | Rule::identifier_or_keyword
) {
parts.push(pair.as_str().to_string());
} else {
for child in pair.into_inner() {
collect_identifiers(child, parts);
}
}
}
collect_identifiers(pair, &mut parts);
if parts.first().is_some_and(|p| p == expected_var) {
parts.remove(0);
}
Ok(parts)
}
fn build_drop_constraint(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next();
let _if_exists = consume_if_present(&mut inner, Rule::if_exists);
let name = inner.next().unwrap().as_str().to_string();
Ok(SchemaCommand::DropConstraint(DropConstraint { name }))
}
fn build_create_label(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next();
let if_not_exists = consume_if_present(&mut inner, Rule::if_not_exists);
let name = normalize_identifier(inner.next().unwrap().as_str());
let prop_defs = inner.next().unwrap();
let properties = build_property_definitions(prop_defs)?;
Ok(SchemaCommand::CreateLabel(CreateLabel {
name,
properties,
if_not_exists,
}))
}
fn build_create_edge_type(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next(); inner.next();
let if_not_exists = consume_if_present(&mut inner, Rule::if_not_exists);
let name = normalize_identifier(inner.next().unwrap().as_str());
let mut properties = vec![];
let next = inner.next().unwrap();
if next.as_rule() == Rule::property_definitions {
properties = build_property_definitions(next)?;
inner.next(); }
let src_label = normalize_identifier(inner.next().unwrap().as_str());
inner.next(); let dst_label = normalize_identifier(inner.next().unwrap().as_str());
Ok(SchemaCommand::CreateEdgeType(CreateEdgeType {
name,
src_labels: vec![src_label],
dst_labels: vec![dst_label],
properties,
if_not_exists,
}))
}
fn build_alter_label(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner();
inner.next(); inner.next(); let name = normalize_identifier(inner.next().unwrap().as_str());
let action_pair = inner.next().unwrap();
let action = build_alter_action(action_pair)?;
Ok(SchemaCommand::AlterLabel(AlterLabel { name, action }))
}
fn build_alter_edge_type(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner();
inner.next(); inner.next(); inner.next(); let name = normalize_identifier(inner.next().unwrap().as_str());
let action_pair = inner.next().unwrap();
let action = build_alter_action(action_pair)?;
Ok(SchemaCommand::AlterEdgeType(AlterEdgeType { name, action }))
}
fn build_alter_action(pair: Pair<Rule>) -> Result<AlterAction, ParseError> {
let mut inner = pair.into_inner();
let first = inner.next().unwrap();
match first.as_rule() {
Rule::ADD => {
inner.next(); let prop_def = inner.next().unwrap();
Ok(AlterAction::AddProperty(build_property_definition(
prop_def,
)?))
}
Rule::DROP_KW => {
inner.next(); let name = inner.next().unwrap().as_str().to_string();
Ok(AlterAction::DropProperty(name))
}
Rule::RENAME => {
inner.next(); let old_name = inner.next().unwrap().as_str().to_string();
inner.next(); let new_name = inner.next().unwrap().as_str().to_string();
Ok(AlterAction::RenameProperty { old_name, new_name })
}
_ => unreachable!(),
}
}
fn build_drop_label(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next(); let if_exists = consume_if_present(&mut inner, Rule::if_exists);
let name = normalize_identifier(inner.next().unwrap().as_str());
Ok(SchemaCommand::DropLabel(DropLabel { name, if_exists }))
}
fn build_drop_edge_type(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next(); inner.next(); inner.next(); let if_exists = consume_if_present(&mut inner, Rule::if_exists);
let name = normalize_identifier(inner.next().unwrap().as_str());
Ok(SchemaCommand::DropEdgeType(DropEdgeType {
name,
if_exists,
}))
}
fn build_show_constraints(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner();
inner.next(); inner.next();
let target = inner.next().map(build_constraint_target).transpose()?;
Ok(SchemaCommand::ShowConstraints(ShowConstraints { target }))
}
fn build_constraint_target(pair: Pair<Rule>) -> Result<ConstraintTarget, ParseError> {
let mut inner = pair.into_inner().peekable();
inner.next();
let is_edge = consume_if_present(&mut inner, Rule::EDGE);
inner.next(); let name = normalize_identifier(inner.next().unwrap().as_str());
Ok(if is_edge {
ConstraintTarget::EdgeType(name)
} else {
ConstraintTarget::Label(name)
})
}
fn build_show_indexes(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner();
inner.next();
let filter = inner.next().and_then(|p| match p.as_rule() {
Rule::VECTOR | Rule::FULLTEXT => Some(p.as_str().to_uppercase()),
_ => None,
});
Ok(SchemaCommand::ShowIndexes(ShowIndexes { filter }))
}
fn build_backup_command(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let mut inner = pair.into_inner();
inner.next(); inner.next(); let path_pair = inner.next().unwrap();
let path = build_string_literal(path_pair)?;
Ok(SchemaCommand::Backup { path })
}
fn parse_with_options(inner: &mut Pairs<Rule>) -> Result<HashMap<String, Value>, ParseError> {
if let Some(p) = inner.next()
&& p.as_rule() == Rule::WITH
{
return build_map_options(inner.next().unwrap());
}
Ok(HashMap::new())
}
fn parse_copy_fields(
pair: Pair<Rule>,
) -> Result<(String, String, String, HashMap<String, Value>), ParseError> {
let mut inner = pair.into_inner();
inner.next(); let label = normalize_identifier(inner.next().unwrap().as_str());
inner.next(); let path = build_string_literal(inner.next().unwrap())?;
let options = parse_with_options(&mut inner)?;
let format = detect_file_format(&path, &options);
Ok((label, path, format, options))
}
fn build_copy_to(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let (label, path, format, options) = parse_copy_fields(pair)?;
Ok(SchemaCommand::CopyTo(CopyToCommand {
label,
path,
format,
options,
}))
}
fn build_copy_from(pair: Pair<Rule>) -> Result<SchemaCommand, ParseError> {
let (label, path, format, options) = parse_copy_fields(pair)?;
Ok(SchemaCommand::CopyFrom(CopyFromCommand {
label,
path,
format,
options,
}))
}
fn detect_file_format(path: &str, options: &HashMap<String, Value>) -> String {
if let Some(format_value) = options.get("format")
&& let Some(format_str) = format_value.as_str()
{
return format_str.to_lowercase();
}
let ext = std::path::Path::new(path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
match ext.to_lowercase().as_str() {
"csv" => "csv",
"parquet" | "pq" => "parquet",
_ => "parquet",
}
.to_string()
}
fn consume_if_present(inner: &mut std::iter::Peekable<Pairs<Rule>>, rule: Rule) -> bool {
if peek_is(inner, rule) {
inner.next();
true
} else {
false
}
}
fn build_property_definitions(pair: Pair<Rule>) -> Result<Vec<PropertyDefinition>, ParseError> {
pair.into_inner().map(build_property_definition).collect()
}
fn build_property_definition(pair: Pair<Rule>) -> Result<PropertyDefinition, ParseError> {
let mut inner = pair.into_inner();
let name = inner.next().unwrap().as_str().to_string();
let data_type = inner.next().unwrap().as_str().to_string();
let mut nullable = true;
let mut unique = false;
let mut default = None;
for p in inner {
match p.as_rule() {
Rule::nullable_constraint => {
let constraint_inner = p.into_inner().next().unwrap();
if constraint_inner.as_rule() == Rule::NOT {
nullable = false;
}
}
Rule::UNIQUE => unique = true,
Rule::default_value => {
default = Some(build_expression(p.into_inner().nth(1).unwrap())?);
}
_ => {}
}
}
Ok(PropertyDefinition {
name,
data_type,
nullable,
unique,
default,
})
}
fn expr_to_value(expr: Expr) -> Result<Value, ParseError> {
match expr {
Expr::Literal(lit) => Ok(lit.to_value()),
Expr::Map(entries) => {
let map = entries
.into_iter()
.map(|(k, v)| Ok((k, expr_to_value(v)?)))
.collect::<Result<_, ParseError>>()?;
Ok(Value::Map(map))
}
Expr::List(items) => Ok(Value::List(
items
.into_iter()
.map(expr_to_value)
.collect::<Result<_, _>>()?,
)),
_ => Err(ParseError::new(format!(
"OPTIONS values must be literals, maps, or lists. Got: {:?}",
expr
))),
}
}
fn build_map_options(pair: Pair<Rule>) -> Result<HashMap<String, Value>, ParseError> {
let Expr::Map(entries) = build_map_literal(pair)? else {
unreachable!("build_map_literal always returns Expr::Map");
};
entries
.into_iter()
.map(|(key, expr)| Ok((key, expr_to_value(expr)?)))
.collect()
}
pub(crate) fn build_string_literal(pair: Pair<Rule>) -> Result<String, ParseError> {
if pair.as_rule() == Rule::string {
let s = pair.as_str();
let quote_char = s.chars().next().unwrap();
let content = &s[1..s.len() - 1];
unescape_string(content, quote_char)
} else {
Err(ParseError::new("Expected string literal".to_string()))
}
}