pub(crate) mod locy_parser;
mod locy_walker;
mod walker;
use crate::ast::{Expr, Query};
use crate::locy_ast::LocyProgram;
use pest::Parser;
use pest_derive::Parser;
#[derive(Debug, thiserror::Error)]
#[error("{message}")]
pub struct ParseError {
message: String,
}
impl ParseError {
pub fn new(message: String) -> Self {
Self { message }
}
}
#[derive(Parser)]
#[grammar = "grammar/cypher.pest"]
pub struct CypherParser;
pub fn parse(input: &str) -> Result<Query, ParseError> {
let pairs = CypherParser::parse(Rule::query, input).map_err(|e| map_pest_error(input, e))?;
walker::build_query(pairs)
}
pub fn parse_expression(input: &str) -> Result<Expr, ParseError> {
let pairs =
CypherParser::parse(Rule::expression, input).map_err(|e| map_pest_error(input, e))?;
walker::build_expression(pairs.into_iter().next().unwrap())
}
pub fn parse_locy(input: &str) -> Result<LocyProgram, ParseError> {
use locy_parser::LocyParser;
use locy_parser::Rule as LocyRule;
let pairs = LocyParser::parse(LocyRule::locy_query, input)
.map_err(|e| map_locy_pest_error(input, e))?;
locy_walker::build_program(pairs.into_iter().next().unwrap())
}
fn error_position(e: &pest::error::Error<Rule>) -> usize {
match e.location {
pest::error::InputLocation::Pos(p) => p,
pest::error::InputLocation::Span((s, _)) => s,
}
}
fn extract_token_span_at(input: &str, pos: usize) -> Option<(usize, usize)> {
let bytes = input.as_bytes();
if bytes.is_empty() {
return None;
}
let mut p = pos.min(bytes.len() - 1);
let is_token_char =
|b: u8| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'-' | b'.' | b'#' | b'$');
if !is_token_char(bytes[p]) {
if p == 0 || !is_token_char(bytes[p - 1]) {
return None;
}
p -= 1;
}
let mut start = p;
while start > 0 && is_token_char(bytes[start - 1]) {
start -= 1;
}
let mut end = p;
while end < bytes.len() && is_token_char(bytes[end]) {
end += 1;
}
Some((start, end))
}
fn is_map_key_like_context(input: &str, start: usize, end: usize) -> bool {
let bytes = input.as_bytes();
if bytes.is_empty() || start >= bytes.len() || end > bytes.len() {
return false;
}
let mut colon_pos = end;
while colon_pos < bytes.len() && bytes[colon_pos].is_ascii_whitespace() {
colon_pos += 1;
}
if colon_pos >= bytes.len() || bytes[colon_pos] != b':' {
return false;
}
let mut prev_pos = start;
while prev_pos > 0 && bytes[prev_pos - 1].is_ascii_whitespace() {
prev_pos -= 1;
}
if prev_pos == 0 {
return false;
}
matches!(bytes[prev_pos - 1], b'{' | b',')
}
fn relationship_bracket_segment(input: &str, pos: usize) -> Option<&str> {
let pos = pos.min(input.len());
let before = &input[..pos];
let start = before.rfind('[')?;
let prefix = &input[..start];
if !prefix.trim_end().ends_with('-') {
return None;
}
let after = &input[start..];
let end = after.find(']').map(|i| start + i + 1).unwrap_or(pos);
Some(&input[start..end])
}
fn is_invalid_relationship_pattern(input: &str, pos: usize) -> bool {
let Some(segment) = relationship_bracket_segment(input, pos) else {
return false;
};
(segment.contains("..") && !segment.contains('*')) || segment.contains("*-")
}
fn is_invalid_number_literal(input: &str, pos: usize) -> bool {
let Some((start, end)) = extract_token_span_at(input, pos) else {
return false;
};
if is_map_key_like_context(input, start, end) {
return false;
}
let token = &input[start..end];
let t = token.strip_prefix('-').unwrap_or(token);
if !t.as_bytes().first().is_some_and(|b| b.is_ascii_digit()) {
return false;
}
let has_only = |digits: &str, valid: fn(&char) -> bool| {
digits.is_empty() || !digits.chars().all(|c| valid(&c) || c == '_')
};
if let Some(digits) = t.strip_prefix("0x").or_else(|| t.strip_prefix("0X")) {
return has_only(digits, char::is_ascii_hexdigit);
}
if let Some(digits) = t.strip_prefix("0o").or_else(|| t.strip_prefix("0O")) {
return has_only(digits, |c| matches!(c, '0'..='7'));
}
t.chars().any(|c| c.is_ascii_alphabetic())
}
fn invalid_unicode_character(input: &str, pos: usize) -> Option<char> {
let ch = input.get(pos..)?.chars().next()?;
matches!(ch, '—' | '–' | '−').then_some(ch)
}
fn locy_error_position(e: &pest::error::Error<locy_parser::Rule>) -> usize {
match e.location {
pest::error::InputLocation::Pos(p) => p,
pest::error::InputLocation::Span((s, _)) => s,
}
}
fn locy_context_category(input: &str, pos: usize) -> Option<&'static str> {
let before = input[..pos].trim_end();
let before_upper = before.to_uppercase();
if before_upper.ends_with("BEST BY") {
return Some("InvalidBestByClause");
}
if before_upper.ends_with("ALONG") {
return Some("InvalidAlongClause");
}
if before_upper.ends_with("FOLD") {
return Some("InvalidFoldClause");
}
if before_upper.ends_with("ASSUME") {
return Some("InvalidAssumeBlock");
}
if before_upper.ends_with("DERIVE") {
return Some("InvalidDeriveCommand");
}
if before_upper.contains("CREATE RULE") {
return Some("InvalidRuleDefinition");
}
if before_upper.ends_with("QUERY") && !before_upper.contains("CREATE RULE") {
return Some("InvalidGoalQuery");
}
None
}
fn map_locy_pest_error(input: &str, e: pest::error::Error<locy_parser::Rule>) -> ParseError {
let pos = locy_error_position(&e);
if is_invalid_relationship_pattern(input, pos) {
return ParseError::new(format!("LocySyntaxError: InvalidRelationshipPattern - {e}"));
}
if is_invalid_number_literal(input, pos) {
return ParseError::new(format!("LocySyntaxError: InvalidNumberLiteral - {e}"));
}
if let Some(ch) = invalid_unicode_character(input, pos) {
return ParseError::new(format!(
"LocySyntaxError: InvalidUnicodeCharacter - Invalid character '{ch}'"
));
}
if let Some(category) = locy_context_category(input, pos) {
return ParseError::new(format!("LocySyntaxError: {category} - {e}"));
}
ParseError::new(format!("LocySyntaxError: {e}"))
}
fn map_pest_error(input: &str, e: pest::error::Error<Rule>) -> ParseError {
let pos = error_position(&e);
if is_invalid_relationship_pattern(input, pos) {
return ParseError::new(format!("SyntaxError: InvalidRelationshipPattern - {e}"));
}
if is_invalid_number_literal(input, pos) {
return ParseError::new(format!("SyntaxError: InvalidNumberLiteral - {e}"));
}
if let Some(ch) = invalid_unicode_character(input, pos) {
return ParseError::new(format!(
"SyntaxError: InvalidUnicodeCharacter - Invalid character '{ch}'"
));
}
ParseError::new(format!("UnexpectedSyntax: {e}"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_expression_parsing() {
let cases = [
("1", Rule::integer),
("3.14", Rule::float),
("'hello'", Rule::string),
("n.name", Rule::expression),
("1 + 2", Rule::expression),
("a AND b OR c", Rule::expression),
];
for (input, rule) in cases {
let result = CypherParser::parse(rule, input);
assert!(
result.is_ok(),
"Failed to parse '{}' as {:?}: {:?}",
input,
rule,
result.err()
);
}
}
#[test]
fn test_list_expressions() {
assert!(parse_expression("[]").is_ok());
assert!(parse_expression("[1, 2, 3]").is_ok());
assert!(parse_expression("[x IN range(1,10) | x * 2]").is_ok());
assert!(parse_expression("[x IN list WHERE x > 5 | x]").is_ok());
assert!(parse_expression("[(n)-[:KNOWS]->(m) | m.name]").is_ok());
assert!(parse_expression("[p = (n)-->(m) WHERE m.age > 30 | p]").is_ok());
}
#[test]
fn test_ambiguous_cases() {
assert!(parse_expression("[n]").is_ok()); assert!(parse_expression("[n.name]").is_ok()); assert!(parse_expression("[n IN list]").is_ok());
assert!(parse_expression("[(n)]").is_ok()); }
fn parse_err_msg(input: &str) -> String {
parse(input).unwrap_err().to_string()
}
#[test]
fn test_invalid_relationship_pattern_missing_star_error_code() {
let msg = parse_err_msg("MATCH (a:A)\nMATCH (a)-[:LIKES..]->(c)\nRETURN c.name");
assert!(
msg.contains("InvalidRelationshipPattern"),
"expected InvalidRelationshipPattern, got: {msg}"
);
}
#[test]
fn test_invalid_number_literal_error_code_decimal_alpha() {
let msg = parse_err_msg("RETURN 9223372h54775808 AS literal");
assert!(
msg.contains("InvalidNumberLiteral"),
"expected InvalidNumberLiteral, got: {msg}"
);
}
#[test]
fn test_invalid_number_literal_error_code_hex_prefix_only() {
let msg = parse_err_msg("RETURN 0x AS literal");
assert!(
msg.contains("InvalidNumberLiteral"),
"expected InvalidNumberLiteral, got: {msg}"
);
}
#[test]
fn test_invalid_unicode_character_error_code() {
let msg = parse_err_msg("RETURN 42 — 41");
assert!(
msg.contains("InvalidUnicodeCharacter"),
"expected InvalidUnicodeCharacter, got: {msg}"
);
}
#[test]
fn test_symbol_in_number_stays_unexpected_syntax() {
let msg = parse_err_msg("RETURN 9223372#54775808 AS literal");
assert!(
msg.contains("UnexpectedSyntax"),
"expected UnexpectedSyntax, got: {msg}"
);
}
#[test]
fn test_map_key_starting_with_number_stays_unexpected_syntax() {
let msg = parse_err_msg("RETURN {1B2c3e67:1} AS literal");
assert!(
msg.contains("UnexpectedSyntax"),
"expected UnexpectedSyntax, got: {msg}"
);
}
#[test]
fn test_unary_minus_double() {
use crate::ast::{CypherLiteral, Expr};
let expr = parse_expression("--5").expect("--5 should parse");
assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(5)));
}
#[test]
fn test_unary_minus_single() {
use crate::ast::{CypherLiteral, Expr};
let expr = parse_expression("-5").expect("-5 should parse");
assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
}
#[test]
fn test_unary_minus_triple() {
use crate::ast::{CypherLiteral, Expr};
let expr = parse_expression("---5").expect("---5 should parse");
assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
}
#[test]
fn test_unary_plus_identity() {
use crate::ast::{CypherLiteral, Expr};
let expr = parse_expression("+5").expect("+5 should parse");
assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(5)));
}
#[test]
fn test_unary_plus_minus() {
use crate::ast::{CypherLiteral, Expr};
let expr = parse_expression("+-5").expect("+-5 should parse");
assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
}
#[test]
fn test_unary_minus_plus() {
use crate::ast::{CypherLiteral, Expr};
let expr = parse_expression("-+5").expect("-+5 should parse");
assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
}
#[test]
fn test_unary_double_minus_overflow() {
let result = parse_expression("--9223372036854775808");
assert!(
result.is_err(),
"expected overflow error, got: {:?}",
result
);
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("IntegerOverflow"),
"expected IntegerOverflow, got: {msg}"
);
}
#[test]
fn test_unary_minus_i64_min() {
use crate::ast::{CypherLiteral, Expr};
let expr = parse_expression("-9223372036854775808").expect("-i64::MIN should parse");
assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(i64::MIN)));
}
#[test]
fn test_stacked_predicates_is_null_is_not_null() {
let result = parse("RETURN x IS NULL IS NOT NULL");
assert!(
result.is_err(),
"expected parse error for stacked IS NULL IS NOT NULL"
);
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("InvalidPredicateChain"),
"expected InvalidPredicateChain, got: {msg}"
);
}
#[test]
fn test_stacked_predicates_starts_with() {
let result = parse("RETURN x STARTS WITH 'a' STARTS WITH 'b'");
assert!(
result.is_err(),
"expected parse error for stacked STARTS WITH"
);
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("InvalidPredicateChain"),
"expected InvalidPredicateChain, got: {msg}"
);
}
#[test]
fn test_stacked_predicates_in() {
let result = parse("RETURN x IN [1] IN [true]");
assert!(result.is_err(), "expected parse error for stacked IN");
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("InvalidPredicateChain"),
"expected InvalidPredicateChain, got: {msg}"
);
}
#[test]
fn test_stacked_predicates_contains_ends_with() {
let result = parse("RETURN x CONTAINS 'a' ENDS WITH 'b'");
assert!(
result.is_err(),
"expected parse error for stacked CONTAINS/ENDS WITH"
);
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("InvalidPredicateChain"),
"expected InvalidPredicateChain, got: {msg}"
);
}
#[test]
fn test_label_stacking_allowed() {
assert!(
parse("MATCH (x) WHERE x:Person:Employee RETURN x").is_ok(),
"label stacking should be allowed"
);
}
#[test]
fn test_range_chaining_allowed() {
assert!(
parse("MATCH (n) WHERE 1 < n.num < 3 RETURN n").is_ok(),
"range chaining 1 < n.num < 3 should be allowed"
);
}
}