1pub(crate) mod locy_parser;
2mod locy_walker;
3mod walker;
4
5use crate::ast::{Expr, Query};
6use crate::locy_ast::LocyProgram;
7use pest::Parser;
8use pest_derive::Parser;
9
10#[derive(Debug, thiserror::Error)]
12#[error("{message}")]
13pub struct ParseError {
14 message: String,
15}
16
17impl ParseError {
18 pub fn new(message: String) -> Self {
19 Self { message }
20 }
21}
22
23#[derive(Parser)]
24#[grammar = "grammar/cypher.pest"]
25pub struct CypherParser;
26
27const MAX_NESTING_DEPTH: u32 = 200;
38
39fn check_nesting_depth(input: &str) -> Result<(), ParseError> {
52 let bytes = input.as_bytes();
53 let mut i = 0usize;
54 let mut depth: i32 = 0;
55 let mut max_depth: i32 = 0;
56
57 while i < bytes.len() {
58 match bytes[i] {
59 quote @ (b'\'' | b'"') => {
60 i += 1;
62 while i < bytes.len() {
63 match bytes[i] {
64 b'\\' => i += 2,
65 c if c == quote => {
66 i += 1;
67 break;
68 }
69 _ => i += 1,
70 }
71 }
72 }
73 b'`' => {
74 i += 1;
76 while i < bytes.len() && bytes[i] != b'`' {
77 i += 1;
78 }
79 i += 1;
80 }
81 b'/' if bytes.get(i + 1) == Some(&b'/') => {
82 i += 2;
83 while i < bytes.len() && bytes[i] != b'\n' {
84 i += 1;
85 }
86 }
87 b'/' if bytes.get(i + 1) == Some(&b'*') => {
88 i += 2;
89 while i < bytes.len() && !(bytes[i] == b'*' && bytes.get(i + 1) == Some(&b'/')) {
90 i += 1;
91 }
92 i += 2;
93 }
94 b'(' | b'[' | b'{' => {
95 depth += 1;
96 max_depth = max_depth.max(depth);
97 i += 1;
98 }
99 b')' | b']' | b'}' => {
100 depth = (depth - 1).max(0);
101 i += 1;
102 }
103 b if b.is_ascii_alphabetic() || b == b'_' => {
104 let start = i;
106 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
107 i += 1;
108 }
109 let word = &input[start..i];
110 if word.eq_ignore_ascii_case("case") {
111 depth += 1;
112 max_depth = max_depth.max(depth);
113 } else if word.eq_ignore_ascii_case("end") {
114 depth = (depth - 1).max(0);
115 }
116 }
117 _ => i += 1,
118 }
119
120 if max_depth as u32 > MAX_NESTING_DEPTH {
121 return Err(ParseError::new(format!(
122 "SyntaxError: NestingTooDeep - query nesting exceeds the maximum \
123 supported depth ({MAX_NESTING_DEPTH})"
124 )));
125 }
126 }
127
128 Ok(())
129}
130
131pub fn parse(input: &str) -> Result<Query, ParseError> {
132 check_nesting_depth(input)?;
133 let pairs = CypherParser::parse(Rule::query, input).map_err(|e| map_pest_error(input, e))?;
134
135 walker::build_query(pairs)
136}
137
138pub fn parse_expression(input: &str) -> Result<Expr, ParseError> {
139 check_nesting_depth(input)?;
140 let pairs =
141 CypherParser::parse(Rule::expression, input).map_err(|e| map_pest_error(input, e))?;
142
143 walker::build_expression(pairs.into_iter().next().unwrap())
144}
145
146pub fn parse_locy(input: &str) -> Result<LocyProgram, ParseError> {
147 use locy_parser::LocyParser;
148 use locy_parser::Rule as LocyRule;
149
150 check_nesting_depth(input)?;
151 let pairs = LocyParser::parse(LocyRule::locy_query, input)
152 .map_err(|e| map_locy_pest_error(input, e))?;
153
154 locy_walker::build_program(pairs.into_iter().next().unwrap())
155}
156
157fn expects_identifier(e: &pest::error::Error<Rule>) -> bool {
161 use pest::error::ErrorVariant;
162 match &e.variant {
163 ErrorVariant::ParsingError { positives, .. } => positives
164 .iter()
165 .any(|r| matches!(r, Rule::identifier | Rule::identifier_or_keyword)),
166 _ => false,
167 }
168}
169
170fn expects_locy_identifier(e: &pest::error::Error<locy_parser::Rule>) -> bool {
176 use pest::error::ErrorVariant;
177 match &e.variant {
178 ErrorVariant::ParsingError { positives, .. } => positives
179 .iter()
180 .any(|r| matches!(r, locy_parser::Rule::locy_identifier)),
181 _ => false,
182 }
183}
184
185fn error_position<R: pest::RuleType>(e: &pest::error::Error<R>) -> usize {
186 match e.location {
187 pest::error::InputLocation::Pos(p) => p,
188 pest::error::InputLocation::Span((s, _)) => s,
189 }
190}
191
192fn extract_token_span_at(input: &str, pos: usize) -> Option<(usize, usize)> {
193 let bytes = input.as_bytes();
194 if bytes.is_empty() {
195 return None;
196 }
197
198 let mut p = pos.min(bytes.len() - 1);
199
200 let is_token_char =
201 |b: u8| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'-' | b'.' | b'#' | b'$');
202
203 if !is_token_char(bytes[p]) {
204 if p == 0 || !is_token_char(bytes[p - 1]) {
205 return None;
206 }
207 p -= 1;
208 }
209
210 let mut start = p;
211 while start > 0 && is_token_char(bytes[start - 1]) {
212 start -= 1;
213 }
214
215 let mut end = p;
216 while end < bytes.len() && is_token_char(bytes[end]) {
217 end += 1;
218 }
219
220 Some((start, end))
221}
222
223fn is_map_key_like_context(input: &str, start: usize, end: usize) -> bool {
224 let bytes = input.as_bytes();
225 if bytes.is_empty() || start >= bytes.len() || end > bytes.len() {
226 return false;
227 }
228
229 let mut colon_pos = end;
230 while colon_pos < bytes.len() && bytes[colon_pos].is_ascii_whitespace() {
231 colon_pos += 1;
232 }
233 if colon_pos >= bytes.len() || bytes[colon_pos] != b':' {
234 return false;
235 }
236
237 let mut prev_pos = start;
238 while prev_pos > 0 && bytes[prev_pos - 1].is_ascii_whitespace() {
239 prev_pos -= 1;
240 }
241 if prev_pos == 0 {
242 return false;
243 }
244
245 matches!(bytes[prev_pos - 1], b'{' | b',')
246}
247
248fn relationship_bracket_segment(input: &str, pos: usize) -> Option<&str> {
249 let pos = pos.min(input.len());
250 let before = &input[..pos];
251 let start = before.rfind('[')?;
252
253 let prefix = &input[..start];
255 if !prefix.trim_end().ends_with('-') {
256 return None;
257 }
258
259 let after = &input[start..];
260 let end = after.find(']').map(|i| start + i + 1).unwrap_or(pos);
261 Some(&input[start..end])
262}
263
264fn is_invalid_relationship_pattern(input: &str, pos: usize) -> bool {
265 let Some(segment) = relationship_bracket_segment(input, pos) else {
266 return false;
267 };
268 (segment.contains("..") && !segment.contains('*')) || segment.contains("*-")
270}
271
272fn is_invalid_number_literal(input: &str, pos: usize) -> bool {
273 let Some((start, end)) = extract_token_span_at(input, pos) else {
274 return false;
275 };
276 if is_map_key_like_context(input, start, end) {
277 return false;
278 }
279 let token = &input[start..end];
280
281 let t = token.strip_prefix('-').unwrap_or(token);
282 if !t.as_bytes().first().is_some_and(|b| b.is_ascii_digit()) {
283 return false;
284 }
285
286 let has_only = |digits: &str, valid: fn(&char) -> bool| {
287 digits.is_empty() || !digits.chars().all(|c| valid(&c) || c == '_')
288 };
289
290 if let Some(digits) = t.strip_prefix("0x").or_else(|| t.strip_prefix("0X")) {
291 return has_only(digits, char::is_ascii_hexdigit);
292 }
293 if let Some(digits) = t.strip_prefix("0o").or_else(|| t.strip_prefix("0O")) {
294 return has_only(digits, |c| matches!(c, '0'..='7'));
295 }
296
297 t.chars().any(|c| c.is_ascii_alphabetic())
299}
300
301fn invalid_unicode_character(input: &str, pos: usize) -> Option<char> {
302 let ch = input.get(pos..)?.chars().next()?;
303 matches!(ch, '—' | '–' | '−').then_some(ch)
304}
305
306const CYPHER_RESERVED_KEYWORDS: &[&str] = &[
309 "match",
310 "optional",
311 "where",
312 "create",
313 "merge",
314 "set",
315 "remove",
316 "delete",
317 "detach",
318 "return",
319 "with",
320 "unwind",
321 "union",
322 "call",
323 "yield",
324 "distinct",
325 "order",
326 "by",
327 "asc",
328 "desc",
329 "skip",
330 "limit",
331 "as",
332 "and",
333 "or",
334 "xor",
335 "not",
336 "in",
337 "contains",
338 "starts",
339 "ends",
340 "is",
341 "null",
342 "true",
343 "false",
344 "case",
345 "when",
346 "then",
347 "else",
348 "if",
349 "from",
350 "to",
351 "on",
352 "drop",
353 "alter",
354 "show",
355 "over",
356 "partition",
357 "explain",
358 "recursive",
359 "valid_at",
360 "each",
361];
362
363const LOCY_RESERVED_KEYWORDS: &[&str] = &[
365 "rule", "along", "prev", "fold", "best", "derive", "assume", "abduce", "query",
366];
367
368fn reserved_keyword_at(input: &str, pos: usize, extra_keywords: &[&str]) -> Option<String> {
370 let (start, end) = extract_token_span_at(input, pos)?;
371 let token = &input[start..end];
372 let lower = token.to_lowercase();
373 if CYPHER_RESERVED_KEYWORDS.contains(&lower.as_str())
374 || extra_keywords.contains(&lower.as_str())
375 {
376 Some(token.to_string())
377 } else {
378 None
379 }
380}
381
382fn locy_context_category(input: &str, pos: usize) -> Option<&'static str> {
384 let before = input[..pos].trim_end();
385 let before_upper = before.to_uppercase();
386 if before_upper.ends_with("BEST BY") {
388 return Some("InvalidBestByClause");
389 }
390 if before_upper.ends_with("ALONG") {
391 return Some("InvalidAlongClause");
392 }
393 if before_upper.ends_with("FOLD") {
394 return Some("InvalidFoldClause");
395 }
396 if before_upper.ends_with("ASSUME") {
397 return Some("InvalidAssumeBlock");
398 }
399 if before_upper.ends_with("DERIVE") {
400 return Some("InvalidDeriveCommand");
401 }
402 if before_upper.contains("CREATE RULE") {
404 return Some("InvalidRuleDefinition");
405 }
406 if before_upper.ends_with("QUERY") && !before_upper.contains("CREATE RULE") {
408 return Some("InvalidGoalQuery");
409 }
410 None
411}
412
413fn map_locy_pest_error(input: &str, e: pest::error::Error<locy_parser::Rule>) -> ParseError {
414 let pos = error_position(&e);
415
416 if is_invalid_relationship_pattern(input, pos) {
418 return ParseError::new(format!("LocySyntaxError: InvalidRelationshipPattern - {e}"));
419 }
420 if is_invalid_number_literal(input, pos) {
421 return ParseError::new(format!("LocySyntaxError: InvalidNumberLiteral - {e}"));
422 }
423 if let Some(ch) = invalid_unicode_character(input, pos) {
424 return ParseError::new(format!(
425 "LocySyntaxError: InvalidUnicodeCharacter - Invalid character '{ch}'"
426 ));
427 }
428 if let Some(kw) = expects_locy_identifier(&e)
429 .then(|| reserved_keyword_at(input, pos, LOCY_RESERVED_KEYWORDS))
430 .flatten()
431 {
432 return ParseError::new(format!(
433 "LocySyntaxError: ReservedKeyword - \"{kw}\" is a reserved keyword \
434 and cannot be used as a variable name. Use backtick-quoting: `{kw}`\n{e}"
435 ));
436 }
437
438 if let Some(category) = locy_context_category(input, pos) {
440 return ParseError::new(format!("LocySyntaxError: {category} - {e}"));
441 }
442
443 ParseError::new(format!("LocySyntaxError: {e}"))
444}
445
446fn map_pest_error(input: &str, e: pest::error::Error<Rule>) -> ParseError {
447 let pos = error_position(&e);
448 if is_invalid_relationship_pattern(input, pos) {
449 return ParseError::new(format!("SyntaxError: InvalidRelationshipPattern - {e}"));
450 }
451 if is_invalid_number_literal(input, pos) {
452 return ParseError::new(format!("SyntaxError: InvalidNumberLiteral - {e}"));
453 }
454 if let Some(ch) = invalid_unicode_character(input, pos) {
455 return ParseError::new(format!(
456 "SyntaxError: InvalidUnicodeCharacter - Invalid character '{ch}'"
457 ));
458 }
459 if let Some(kw) = expects_identifier(&e)
460 .then(|| reserved_keyword_at(input, pos, &[]))
461 .flatten()
462 {
463 return ParseError::new(format!(
464 "SyntaxError: ReservedKeyword - \"{kw}\" is a reserved keyword \
465 and cannot be used as a variable name. Use backtick-quoting: `{kw}`\n{e}"
466 ));
467 }
468
469 ParseError::new(format!("UnexpectedSyntax: {e}"))
470}
471
472#[cfg(test)]
473mod tests {
474 use super::*;
475
476 #[test]
477 fn test_expression_parsing() {
478 let cases = [
479 ("1", Rule::integer),
480 ("3.14", Rule::float),
481 ("'hello'", Rule::string),
482 ("n.name", Rule::expression),
483 ("1 + 2", Rule::expression),
484 ("a AND b OR c", Rule::expression),
485 ];
486
487 for (input, rule) in cases {
488 let result = CypherParser::parse(rule, input);
489 assert!(
490 result.is_ok(),
491 "Failed to parse '{}' as {:?}: {:?}",
492 input,
493 rule,
494 result.err()
495 );
496 }
497 }
498
499 #[test]
500 fn test_list_expressions() {
501 assert!(parse_expression("[]").is_ok());
503
504 assert!(parse_expression("[1, 2, 3]").is_ok());
506
507 assert!(parse_expression("[x IN range(1,10) | x * 2]").is_ok());
509 assert!(parse_expression("[x IN list WHERE x > 5 | x]").is_ok());
510
511 assert!(parse_expression("[(n)-[:KNOWS]->(m) | m.name]").is_ok());
513 assert!(parse_expression("[p = (n)-->(m) WHERE m.age > 30 | p]").is_ok());
514 }
515
516 #[test]
517 fn test_ambiguous_cases() {
518 assert!(parse_expression("[n]").is_ok()); assert!(parse_expression("[n.name]").is_ok()); assert!(parse_expression("[n IN list]").is_ok()); assert!(parse_expression("[(n)]").is_ok()); }
543
544 fn parse_err_msg(input: &str) -> String {
545 parse(input).unwrap_err().to_string()
546 }
547
548 #[test]
549 fn test_invalid_relationship_pattern_missing_star_error_code() {
550 let msg = parse_err_msg("MATCH (a:A)\nMATCH (a)-[:LIKES..]->(c)\nRETURN c.name");
551 assert!(
552 msg.contains("InvalidRelationshipPattern"),
553 "expected InvalidRelationshipPattern, got: {msg}"
554 );
555 }
556
557 #[test]
558 fn test_invalid_number_literal_error_code_decimal_alpha() {
559 let msg = parse_err_msg("RETURN 9223372h54775808 AS literal");
560 assert!(
561 msg.contains("InvalidNumberLiteral"),
562 "expected InvalidNumberLiteral, got: {msg}"
563 );
564 }
565
566 #[test]
567 fn test_invalid_number_literal_error_code_hex_prefix_only() {
568 let msg = parse_err_msg("RETURN 0x AS literal");
569 assert!(
570 msg.contains("InvalidNumberLiteral"),
571 "expected InvalidNumberLiteral, got: {msg}"
572 );
573 }
574
575 #[test]
576 fn test_invalid_unicode_character_error_code() {
577 let msg = parse_err_msg("RETURN 42 — 41");
578 assert!(
579 msg.contains("InvalidUnicodeCharacter"),
580 "expected InvalidUnicodeCharacter, got: {msg}"
581 );
582 }
583
584 #[test]
585 fn test_symbol_in_number_stays_unexpected_syntax() {
586 let msg = parse_err_msg("RETURN 9223372#54775808 AS literal");
587 assert!(
588 msg.contains("UnexpectedSyntax"),
589 "expected UnexpectedSyntax, got: {msg}"
590 );
591 }
592
593 #[test]
594 fn test_map_key_starting_with_number_stays_unexpected_syntax() {
595 let msg = parse_err_msg("RETURN {1B2c3e67:1} AS literal");
596 assert!(
597 msg.contains("UnexpectedSyntax"),
598 "expected UnexpectedSyntax, got: {msg}"
599 );
600 }
601
602 #[test]
603 fn test_unary_minus_double() {
604 use crate::ast::{CypherLiteral, Expr};
605 let expr = parse_expression("--5").expect("--5 should parse");
607 assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(5)));
608 }
609
610 #[test]
611 fn test_unary_minus_single() {
612 use crate::ast::{CypherLiteral, Expr};
613 let expr = parse_expression("-5").expect("-5 should parse");
615 assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
616 }
617
618 #[test]
619 fn test_unary_minus_triple() {
620 use crate::ast::{CypherLiteral, Expr};
621 let expr = parse_expression("---5").expect("---5 should parse");
623 assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
624 }
625
626 #[test]
627 fn test_unary_plus_identity() {
628 use crate::ast::{CypherLiteral, Expr};
629 let expr = parse_expression("+5").expect("+5 should parse");
631 assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(5)));
632 }
633
634 #[test]
635 fn test_unary_plus_minus() {
636 use crate::ast::{CypherLiteral, Expr};
637 let expr = parse_expression("+-5").expect("+-5 should parse");
639 assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
640 }
641
642 #[test]
643 fn test_unary_minus_plus() {
644 use crate::ast::{CypherLiteral, Expr};
645 let expr = parse_expression("-+5").expect("-+5 should parse");
647 assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
648 }
649
650 #[test]
651 fn test_unary_double_minus_overflow() {
652 let result = parse_expression("--9223372036854775808");
654 assert!(
655 result.is_err(),
656 "expected overflow error, got: {:?}",
657 result
658 );
659 let msg = result.unwrap_err().to_string();
660 assert!(
661 msg.contains("IntegerOverflow"),
662 "expected IntegerOverflow, got: {msg}"
663 );
664 }
665
666 #[test]
667 fn test_unary_minus_i64_min() {
668 use crate::ast::{CypherLiteral, Expr};
669 let expr = parse_expression("-9223372036854775808").expect("-i64::MIN should parse");
671 assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(i64::MIN)));
672 }
673
674 #[test]
675 fn test_stacked_predicates_is_null_is_not_null() {
676 let result = parse("RETURN x IS NULL IS NOT NULL");
678 assert!(
679 result.is_err(),
680 "expected parse error for stacked IS NULL IS NOT NULL"
681 );
682 let msg = result.unwrap_err().to_string();
683 assert!(
684 msg.contains("InvalidPredicateChain"),
685 "expected InvalidPredicateChain, got: {msg}"
686 );
687 }
688
689 #[test]
690 fn test_stacked_predicates_starts_with() {
691 let result = parse("RETURN x STARTS WITH 'a' STARTS WITH 'b'");
693 assert!(
694 result.is_err(),
695 "expected parse error for stacked STARTS WITH"
696 );
697 let msg = result.unwrap_err().to_string();
698 assert!(
699 msg.contains("InvalidPredicateChain"),
700 "expected InvalidPredicateChain, got: {msg}"
701 );
702 }
703
704 #[test]
705 fn test_stacked_predicates_in() {
706 let result = parse("RETURN x IN [1] IN [true]");
708 assert!(result.is_err(), "expected parse error for stacked IN");
709 let msg = result.unwrap_err().to_string();
710 assert!(
711 msg.contains("InvalidPredicateChain"),
712 "expected InvalidPredicateChain, got: {msg}"
713 );
714 }
715
716 #[test]
717 fn test_stacked_predicates_contains_ends_with() {
718 let result = parse("RETURN x CONTAINS 'a' ENDS WITH 'b'");
720 assert!(
721 result.is_err(),
722 "expected parse error for stacked CONTAINS/ENDS WITH"
723 );
724 let msg = result.unwrap_err().to_string();
725 assert!(
726 msg.contains("InvalidPredicateChain"),
727 "expected InvalidPredicateChain, got: {msg}"
728 );
729 }
730
731 #[test]
732 fn test_label_stacking_allowed() {
733 assert!(
736 parse("MATCH (x) WHERE x:Person:Employee RETURN x").is_ok(),
737 "label stacking should be allowed"
738 );
739 }
740
741 #[test]
742 fn test_range_chaining_allowed() {
743 assert!(
745 parse("MATCH (n) WHERE 1 < n.num < 3 RETURN n").is_ok(),
746 "range chaining 1 < n.num < 3 should be allowed"
747 );
748 }
749
750 #[test]
751 fn test_reserved_keyword_as_variable_name() {
752 let msg = parse_err_msg("MATCH (match:N) RETURN match");
753 assert!(
754 msg.contains("ReservedKeyword"),
755 "expected ReservedKeyword, got: {msg}"
756 );
757 assert!(
758 msg.contains("backtick-quoting"),
759 "expected backtick hint, got: {msg}"
760 );
761 }
762
763 #[test]
764 fn test_reserved_keyword_return_as_variable() {
765 let msg = parse_err_msg("MATCH (return:N) RETURN return");
766 assert!(
767 msg.contains("ReservedKeyword"),
768 "expected ReservedKeyword, got: {msg}"
769 );
770 }
771
772 #[test]
773 fn test_non_reserved_keyword_allowed() {
774 assert!(
776 parse("MATCH (end:N) RETURN end").is_ok(),
777 "non-reserved keyword 'end' should be allowed as variable name"
778 );
779 }
780
781 #[test]
782 fn test_backtick_escaped_reserved_keyword() {
783 assert!(
784 parse("MATCH (`match`:N) RETURN `match`").is_ok(),
785 "backtick-escaped reserved keyword should be allowed"
786 );
787 }
788}