Skip to main content

uni_cypher/grammar/
mod.rs

1pub(crate) mod locy_parser;
2mod locy_walker;
3mod walker;
4
5use crate::ast::{Expr, Query};
6use crate::locy_ast::LocyProgram;
7use pest::Parser;
8use pest_derive::Parser;
9
10/// Error type for Cypher parsing failures.
11#[derive(Debug, thiserror::Error)]
12#[error("{message}")]
13pub struct ParseError {
14    message: String,
15}
16
17impl ParseError {
18    pub fn new(message: String) -> Self {
19        Self { message }
20    }
21}
22
23#[derive(Parser)]
24#[grammar = "grammar/cypher.pest"]
25pub struct CypherParser;
26
27/// Maximum supported nesting depth of bracketing constructs and `CASE`
28/// expressions in a query.
29///
30/// Both the pest parser and the AST walker are recursive-descent, so a query
31/// with thousands of nested parens / lists / maps / `CASE` expressions (or
32/// nested parenthesized patterns) would otherwise exhaust the thread stack and
33/// `abort()` the host process — an uncatchable crash for an embedded library
34/// triggered by a query string. The ceiling sits far above any legitimate
35/// query's nesting yet well below the depth at which the parser overflows even
36/// a small (1 MiB) stack.
37const MAX_NESTING_DEPTH: u32 = 200;
38
39/// Rejects an `input` that nests bracketing constructs / `CASE` expressions
40/// deeper than [`MAX_NESTING_DEPTH`], before any recursive parsing begins.
41///
42/// Counts `(`/`[`/`{` and the `CASE` keyword as opening a level and `)`/`]`/`}`
43/// and the `END` keyword as closing one, tracking the maximum live depth.
44/// Brackets and keywords inside string / backtick literals and `//` or `/* */`
45/// comments are skipped. This is a deliberately conservative O(n) check: it may
46/// over-count, but never under-counts the nesting the parser would recurse into.
47///
48/// # Errors
49///
50/// Returns [`ParseError`] when nesting exceeds [`MAX_NESTING_DEPTH`].
51fn check_nesting_depth(input: &str) -> Result<(), ParseError> {
52    let bytes = input.as_bytes();
53    let mut i = 0usize;
54    let mut depth: i32 = 0;
55    let mut max_depth: i32 = 0;
56
57    while i < bytes.len() {
58        match bytes[i] {
59            quote @ (b'\'' | b'"') => {
60                // String literal: skip to the matching quote, honoring `\` escapes.
61                i += 1;
62                while i < bytes.len() {
63                    match bytes[i] {
64                        b'\\' => i += 2,
65                        c if c == quote => {
66                            i += 1;
67                            break;
68                        }
69                        _ => i += 1,
70                    }
71                }
72            }
73            b'`' => {
74                // Backtick-quoted identifier (no escapes in the grammar).
75                i += 1;
76                while i < bytes.len() && bytes[i] != b'`' {
77                    i += 1;
78                }
79                i += 1;
80            }
81            b'/' if bytes.get(i + 1) == Some(&b'/') => {
82                i += 2;
83                while i < bytes.len() && bytes[i] != b'\n' {
84                    i += 1;
85                }
86            }
87            b'/' if bytes.get(i + 1) == Some(&b'*') => {
88                i += 2;
89                while i < bytes.len() && !(bytes[i] == b'*' && bytes.get(i + 1) == Some(&b'/')) {
90                    i += 1;
91                }
92                i += 2;
93            }
94            b'(' | b'[' | b'{' => {
95                depth += 1;
96                max_depth = max_depth.max(depth);
97                i += 1;
98            }
99            b')' | b']' | b'}' => {
100                depth = (depth - 1).max(0);
101                i += 1;
102            }
103            b if b.is_ascii_alphabetic() || b == b'_' => {
104                // Read a whole word so only the bare keywords CASE / END count.
105                let start = i;
106                while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
107                    i += 1;
108                }
109                let word = &input[start..i];
110                if word.eq_ignore_ascii_case("case") {
111                    depth += 1;
112                    max_depth = max_depth.max(depth);
113                } else if word.eq_ignore_ascii_case("end") {
114                    depth = (depth - 1).max(0);
115                }
116            }
117            _ => i += 1,
118        }
119
120        if max_depth as u32 > MAX_NESTING_DEPTH {
121            return Err(ParseError::new(format!(
122                "SyntaxError: NestingTooDeep - query nesting exceeds the maximum \
123                 supported depth ({MAX_NESTING_DEPTH})"
124            )));
125        }
126    }
127
128    Ok(())
129}
130
131pub fn parse(input: &str) -> Result<Query, ParseError> {
132    check_nesting_depth(input)?;
133    let pairs = CypherParser::parse(Rule::query, input).map_err(|e| map_pest_error(input, e))?;
134
135    walker::build_query(pairs)
136}
137
138pub fn parse_expression(input: &str) -> Result<Expr, ParseError> {
139    check_nesting_depth(input)?;
140    let pairs =
141        CypherParser::parse(Rule::expression, input).map_err(|e| map_pest_error(input, e))?;
142
143    walker::build_expression(pairs.into_iter().next().unwrap())
144}
145
146pub fn parse_locy(input: &str) -> Result<LocyProgram, ParseError> {
147    use locy_parser::LocyParser;
148    use locy_parser::Rule as LocyRule;
149
150    check_nesting_depth(input)?;
151    let pairs = LocyParser::parse(LocyRule::locy_query, input)
152        .map_err(|e| map_locy_pest_error(input, e))?;
153
154    locy_walker::build_program(pairs.into_iter().next().unwrap())
155}
156
157/// Returns true if the pest error expects an identifier-like rule at the error position.
158/// Used to gate the reserved-keyword check so it only fires when a keyword is used
159/// where a variable name is expected, not when it appears after unrelated syntax errors.
160fn expects_identifier(e: &pest::error::Error<Rule>) -> bool {
161    use pest::error::ErrorVariant;
162    match &e.variant {
163        ErrorVariant::ParsingError { positives, .. } => positives
164            .iter()
165            .any(|r| matches!(r, Rule::identifier | Rule::identifier_or_keyword)),
166        _ => false,
167    }
168}
169
170/// Locy analogue of [`expects_identifier`]: true only when the parser failed at a
171/// position where an identifier was a valid next token. Used to gate the
172/// reserved-keyword diagnostic so a genuine syntax error (e.g. a misplaced
173/// operator) is not mislabelled as "X is a reserved keyword". Mirrors the guard
174/// the Cypher path already applies (`map_pest_error`).
175fn expects_locy_identifier(e: &pest::error::Error<locy_parser::Rule>) -> bool {
176    use pest::error::ErrorVariant;
177    match &e.variant {
178        ErrorVariant::ParsingError { positives, .. } => positives
179            .iter()
180            .any(|r| matches!(r, locy_parser::Rule::locy_identifier)),
181        _ => false,
182    }
183}
184
185fn error_position<R: pest::RuleType>(e: &pest::error::Error<R>) -> usize {
186    match e.location {
187        pest::error::InputLocation::Pos(p) => p,
188        pest::error::InputLocation::Span((s, _)) => s,
189    }
190}
191
192fn extract_token_span_at(input: &str, pos: usize) -> Option<(usize, usize)> {
193    let bytes = input.as_bytes();
194    if bytes.is_empty() {
195        return None;
196    }
197
198    let mut p = pos.min(bytes.len() - 1);
199
200    let is_token_char =
201        |b: u8| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'-' | b'.' | b'#' | b'$');
202
203    if !is_token_char(bytes[p]) {
204        if p == 0 || !is_token_char(bytes[p - 1]) {
205            return None;
206        }
207        p -= 1;
208    }
209
210    let mut start = p;
211    while start > 0 && is_token_char(bytes[start - 1]) {
212        start -= 1;
213    }
214
215    let mut end = p;
216    while end < bytes.len() && is_token_char(bytes[end]) {
217        end += 1;
218    }
219
220    Some((start, end))
221}
222
223fn is_map_key_like_context(input: &str, start: usize, end: usize) -> bool {
224    let bytes = input.as_bytes();
225    if bytes.is_empty() || start >= bytes.len() || end > bytes.len() {
226        return false;
227    }
228
229    let mut colon_pos = end;
230    while colon_pos < bytes.len() && bytes[colon_pos].is_ascii_whitespace() {
231        colon_pos += 1;
232    }
233    if colon_pos >= bytes.len() || bytes[colon_pos] != b':' {
234        return false;
235    }
236
237    let mut prev_pos = start;
238    while prev_pos > 0 && bytes[prev_pos - 1].is_ascii_whitespace() {
239        prev_pos -= 1;
240    }
241    if prev_pos == 0 {
242        return false;
243    }
244
245    matches!(bytes[prev_pos - 1], b'{' | b',')
246}
247
248fn relationship_bracket_segment(input: &str, pos: usize) -> Option<&str> {
249    let pos = pos.min(input.len());
250    let before = &input[..pos];
251    let start = before.rfind('[')?;
252
253    // Restrict to relationship patterns: ...-[ ... ]-...
254    let prefix = &input[..start];
255    if !prefix.trim_end().ends_with('-') {
256        return None;
257    }
258
259    let after = &input[start..];
260    let end = after.find(']').map(|i| start + i + 1).unwrap_or(pos);
261    Some(&input[start..end])
262}
263
264fn is_invalid_relationship_pattern(input: &str, pos: usize) -> bool {
265    let Some(segment) = relationship_bracket_segment(input, pos) else {
266        return false;
267    };
268    // [:LIKES..] (missing `*`) or [:LIKES*-2] (negative range bound)
269    (segment.contains("..") && !segment.contains('*')) || segment.contains("*-")
270}
271
272fn is_invalid_number_literal(input: &str, pos: usize) -> bool {
273    let Some((start, end)) = extract_token_span_at(input, pos) else {
274        return false;
275    };
276    if is_map_key_like_context(input, start, end) {
277        return false;
278    }
279    let token = &input[start..end];
280
281    let t = token.strip_prefix('-').unwrap_or(token);
282    if !t.as_bytes().first().is_some_and(|b| b.is_ascii_digit()) {
283        return false;
284    }
285
286    let has_only = |digits: &str, valid: fn(&char) -> bool| {
287        digits.is_empty() || !digits.chars().all(|c| valid(&c) || c == '_')
288    };
289
290    if let Some(digits) = t.strip_prefix("0x").or_else(|| t.strip_prefix("0X")) {
291        return has_only(digits, char::is_ascii_hexdigit);
292    }
293    if let Some(digits) = t.strip_prefix("0o").or_else(|| t.strip_prefix("0O")) {
294        return has_only(digits, |c| matches!(c, '0'..='7'));
295    }
296
297    // Decimal-like token with alphabetic suffix/midfix, e.g. 9223372h54775808
298    t.chars().any(|c| c.is_ascii_alphabetic())
299}
300
301fn invalid_unicode_character(input: &str, pos: usize) -> Option<char> {
302    let ch = input.get(pos..)?.chars().next()?;
303    matches!(ch, '—' | '–' | '−').then_some(ch)
304}
305
306/// All Cypher reserved keywords (from `keyword_reserved` in cypher.pest).
307/// Stored lowercase for case-insensitive comparison.
308const CYPHER_RESERVED_KEYWORDS: &[&str] = &[
309    "match",
310    "optional",
311    "where",
312    "create",
313    "merge",
314    "set",
315    "remove",
316    "delete",
317    "detach",
318    "return",
319    "with",
320    "unwind",
321    "union",
322    "call",
323    "yield",
324    "distinct",
325    "order",
326    "by",
327    "asc",
328    "desc",
329    "skip",
330    "limit",
331    "as",
332    "and",
333    "or",
334    "xor",
335    "not",
336    "in",
337    "contains",
338    "starts",
339    "ends",
340    "is",
341    "null",
342    "true",
343    "false",
344    "case",
345    "when",
346    "then",
347    "else",
348    "if",
349    "from",
350    "to",
351    "on",
352    "drop",
353    "alter",
354    "show",
355    "over",
356    "partition",
357    "explain",
358    "recursive",
359    "valid_at",
360    "each",
361];
362
363/// Additional Locy-only reserved keywords (from `locy_keyword_reserved` in locy.pest).
364const LOCY_RESERVED_KEYWORDS: &[&str] = &[
365    "rule", "along", "prev", "fold", "best", "derive", "assume", "abduce", "query",
366];
367
368/// If the token at the error position is a reserved keyword, return it.
369fn reserved_keyword_at(input: &str, pos: usize, extra_keywords: &[&str]) -> Option<String> {
370    let (start, end) = extract_token_span_at(input, pos)?;
371    let token = &input[start..end];
372    let lower = token.to_lowercase();
373    if CYPHER_RESERVED_KEYWORDS.contains(&lower.as_str())
374        || extra_keywords.contains(&lower.as_str())
375    {
376        Some(token.to_string())
377    } else {
378        None
379    }
380}
381
382/// Categorize a Locy parse error based on context before the error position.
383fn locy_context_category(input: &str, pos: usize) -> Option<&'static str> {
384    let before = input[..pos].trim_end();
385    let before_upper = before.to_uppercase();
386    // Check in reverse order of specificity
387    if before_upper.ends_with("BEST BY") {
388        return Some("InvalidBestByClause");
389    }
390    if before_upper.ends_with("ALONG") {
391        return Some("InvalidAlongClause");
392    }
393    if before_upper.ends_with("FOLD") {
394        return Some("InvalidFoldClause");
395    }
396    if before_upper.ends_with("ASSUME") {
397        return Some("InvalidAssumeBlock");
398    }
399    if before_upper.ends_with("DERIVE") {
400        return Some("InvalidDeriveCommand");
401    }
402    // Check for CREATE RULE (may have name/priority between)
403    if before_upper.contains("CREATE RULE") {
404        return Some("InvalidRuleDefinition");
405    }
406    // Standalone QUERY (not part of CREATE RULE ... YIELD ... QUERY)
407    if before_upper.ends_with("QUERY") && !before_upper.contains("CREATE RULE") {
408        return Some("InvalidGoalQuery");
409    }
410    None
411}
412
413fn map_locy_pest_error(input: &str, e: pest::error::Error<locy_parser::Rule>) -> ParseError {
414    let pos = error_position(&e);
415
416    // Reuse input-based heuristics from the Cypher parser
417    if is_invalid_relationship_pattern(input, pos) {
418        return ParseError::new(format!("LocySyntaxError: InvalidRelationshipPattern - {e}"));
419    }
420    if is_invalid_number_literal(input, pos) {
421        return ParseError::new(format!("LocySyntaxError: InvalidNumberLiteral - {e}"));
422    }
423    if let Some(ch) = invalid_unicode_character(input, pos) {
424        return ParseError::new(format!(
425            "LocySyntaxError: InvalidUnicodeCharacter - Invalid character '{ch}'"
426        ));
427    }
428    if let Some(kw) = expects_locy_identifier(&e)
429        .then(|| reserved_keyword_at(input, pos, LOCY_RESERVED_KEYWORDS))
430        .flatten()
431    {
432        return ParseError::new(format!(
433            "LocySyntaxError: ReservedKeyword - \"{kw}\" is a reserved keyword \
434             and cannot be used as a variable name. Use backtick-quoting: `{kw}`\n{e}"
435        ));
436    }
437
438    // Locy-specific context categorization
439    if let Some(category) = locy_context_category(input, pos) {
440        return ParseError::new(format!("LocySyntaxError: {category} - {e}"));
441    }
442
443    ParseError::new(format!("LocySyntaxError: {e}"))
444}
445
446fn map_pest_error(input: &str, e: pest::error::Error<Rule>) -> ParseError {
447    let pos = error_position(&e);
448    if is_invalid_relationship_pattern(input, pos) {
449        return ParseError::new(format!("SyntaxError: InvalidRelationshipPattern - {e}"));
450    }
451    if is_invalid_number_literal(input, pos) {
452        return ParseError::new(format!("SyntaxError: InvalidNumberLiteral - {e}"));
453    }
454    if let Some(ch) = invalid_unicode_character(input, pos) {
455        return ParseError::new(format!(
456            "SyntaxError: InvalidUnicodeCharacter - Invalid character '{ch}'"
457        ));
458    }
459    if let Some(kw) = expects_identifier(&e)
460        .then(|| reserved_keyword_at(input, pos, &[]))
461        .flatten()
462    {
463        return ParseError::new(format!(
464            "SyntaxError: ReservedKeyword - \"{kw}\" is a reserved keyword \
465             and cannot be used as a variable name. Use backtick-quoting: `{kw}`\n{e}"
466        ));
467    }
468
469    ParseError::new(format!("UnexpectedSyntax: {e}"))
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475
476    #[test]
477    fn test_expression_parsing() {
478        let cases = [
479            ("1", Rule::integer),
480            ("3.14", Rule::float),
481            ("'hello'", Rule::string),
482            ("n.name", Rule::expression),
483            ("1 + 2", Rule::expression),
484            ("a AND b OR c", Rule::expression),
485        ];
486
487        for (input, rule) in cases {
488            let result = CypherParser::parse(rule, input);
489            assert!(
490                result.is_ok(),
491                "Failed to parse '{}' as {:?}: {:?}",
492                input,
493                rule,
494                result.err()
495            );
496        }
497    }
498
499    #[test]
500    fn test_list_expressions() {
501        // Empty list
502        assert!(parse_expression("[]").is_ok());
503
504        // List literal
505        assert!(parse_expression("[1, 2, 3]").is_ok());
506
507        // List comprehension
508        assert!(parse_expression("[x IN range(1,10) | x * 2]").is_ok());
509        assert!(parse_expression("[x IN list WHERE x > 5 | x]").is_ok());
510
511        // Pattern comprehension - THE KEY TEST
512        assert!(parse_expression("[(n)-[:KNOWS]->(m) | m.name]").is_ok());
513        assert!(parse_expression("[p = (n)-->(m) WHERE m.age > 30 | p]").is_ok());
514    }
515
516    #[test]
517    fn test_ambiguous_cases() {
518        // These caused LR(1) conflicts before
519        assert!(parse_expression("[n]").is_ok()); // List with variable
520        assert!(parse_expression("[n.name]").is_ok()); // List with property
521        assert!(parse_expression("[n IN list]").is_ok()); // Comprehension? No, missing |, so list with boolean IN expression?
522        // Wait, [n IN list] in Cypher is valid list literal containing one boolean expression `n IN list`.
523        // UNLESS it's a comprehension. Comprehension MUST have `|`.
524        // My grammar handles this:
525        // list_expression = { ... | "[" ~ list_comprehension_body ~ "]" | ... }
526        // list_comprehension_body = { identifier ~ IN ~ comprehension_expr ~ ... ~ pipe ~ expression }
527        // So `[n IN list]` matches `list_literal` containing `expression(n IN list)`.
528        // It does NOT match `list_comprehension_body` because of missing pipe.
529        // Correct.
530
531        assert!(parse_expression("[(n)]").is_ok()); // Pattern comprehension? No, pattern comprehension must have pattern.
532        // `[(n)]` -> List literal containing parenthesized expression `(n)` (node pattern used as expr? No, `(n)` is node pattern).
533        // But `(n)` as expression?
534        // `primary_expression` -> `(` expression `)`.
535        // If `n` is identifier, `(n)` is expression.
536        // So `[(n)]` is list literal.
537        // `[(n)-->(m)]`? List literal containing boolean pattern expression?
538        // Yes, `pattern_expression` is valid in `boolean_primary`.
539        // `pattern_comprehension` requires `|`.
540        // `[(n)-->(m) | x]` is comprehension.
541        // `[(n)-->(m)]` is list of pattern expression.
542    }
543
544    fn parse_err_msg(input: &str) -> String {
545        parse(input).unwrap_err().to_string()
546    }
547
548    #[test]
549    fn test_invalid_relationship_pattern_missing_star_error_code() {
550        let msg = parse_err_msg("MATCH (a:A)\nMATCH (a)-[:LIKES..]->(c)\nRETURN c.name");
551        assert!(
552            msg.contains("InvalidRelationshipPattern"),
553            "expected InvalidRelationshipPattern, got: {msg}"
554        );
555    }
556
557    #[test]
558    fn test_invalid_number_literal_error_code_decimal_alpha() {
559        let msg = parse_err_msg("RETURN 9223372h54775808 AS literal");
560        assert!(
561            msg.contains("InvalidNumberLiteral"),
562            "expected InvalidNumberLiteral, got: {msg}"
563        );
564    }
565
566    #[test]
567    fn test_invalid_number_literal_error_code_hex_prefix_only() {
568        let msg = parse_err_msg("RETURN 0x AS literal");
569        assert!(
570            msg.contains("InvalidNumberLiteral"),
571            "expected InvalidNumberLiteral, got: {msg}"
572        );
573    }
574
575    #[test]
576    fn test_invalid_unicode_character_error_code() {
577        let msg = parse_err_msg("RETURN 42 — 41");
578        assert!(
579            msg.contains("InvalidUnicodeCharacter"),
580            "expected InvalidUnicodeCharacter, got: {msg}"
581        );
582    }
583
584    #[test]
585    fn test_symbol_in_number_stays_unexpected_syntax() {
586        let msg = parse_err_msg("RETURN 9223372#54775808 AS literal");
587        assert!(
588            msg.contains("UnexpectedSyntax"),
589            "expected UnexpectedSyntax, got: {msg}"
590        );
591    }
592
593    #[test]
594    fn test_map_key_starting_with_number_stays_unexpected_syntax() {
595        let msg = parse_err_msg("RETURN {1B2c3e67:1} AS literal");
596        assert!(
597            msg.contains("UnexpectedSyntax"),
598            "expected UnexpectedSyntax, got: {msg}"
599        );
600    }
601
602    #[test]
603    fn test_unary_minus_double() {
604        use crate::ast::{CypherLiteral, Expr};
605        // --5 → Integer(5)
606        let expr = parse_expression("--5").expect("--5 should parse");
607        assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(5)));
608    }
609
610    #[test]
611    fn test_unary_minus_single() {
612        use crate::ast::{CypherLiteral, Expr};
613        // -5 → Integer(-5)
614        let expr = parse_expression("-5").expect("-5 should parse");
615        assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
616    }
617
618    #[test]
619    fn test_unary_minus_triple() {
620        use crate::ast::{CypherLiteral, Expr};
621        // ---5 → Integer(-5)
622        let expr = parse_expression("---5").expect("---5 should parse");
623        assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
624    }
625
626    #[test]
627    fn test_unary_plus_identity() {
628        use crate::ast::{CypherLiteral, Expr};
629        // +5 → Integer(5)
630        let expr = parse_expression("+5").expect("+5 should parse");
631        assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(5)));
632    }
633
634    #[test]
635    fn test_unary_plus_minus() {
636        use crate::ast::{CypherLiteral, Expr};
637        // +-5 → Integer(-5)
638        let expr = parse_expression("+-5").expect("+-5 should parse");
639        assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
640    }
641
642    #[test]
643    fn test_unary_minus_plus() {
644        use crate::ast::{CypherLiteral, Expr};
645        // -+5 → Integer(-5)
646        let expr = parse_expression("-+5").expect("-+5 should parse");
647        assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(-5)));
648    }
649
650    #[test]
651    fn test_unary_double_minus_overflow() {
652        // --9223372036854775808 → overflow error
653        let result = parse_expression("--9223372036854775808");
654        assert!(
655            result.is_err(),
656            "expected overflow error, got: {:?}",
657            result
658        );
659        let msg = result.unwrap_err().to_string();
660        assert!(
661            msg.contains("IntegerOverflow"),
662            "expected IntegerOverflow, got: {msg}"
663        );
664    }
665
666    #[test]
667    fn test_unary_minus_i64_min() {
668        use crate::ast::{CypherLiteral, Expr};
669        // -9223372036854775808 → Integer(i64::MIN) (valid)
670        let expr = parse_expression("-9223372036854775808").expect("-i64::MIN should parse");
671        assert_eq!(expr, Expr::Literal(CypherLiteral::Integer(i64::MIN)));
672    }
673
674    #[test]
675    fn test_stacked_predicates_is_null_is_not_null() {
676        // x IS NULL IS NOT NULL → error
677        let result = parse("RETURN x IS NULL IS NOT NULL");
678        assert!(
679            result.is_err(),
680            "expected parse error for stacked IS NULL IS NOT NULL"
681        );
682        let msg = result.unwrap_err().to_string();
683        assert!(
684            msg.contains("InvalidPredicateChain"),
685            "expected InvalidPredicateChain, got: {msg}"
686        );
687    }
688
689    #[test]
690    fn test_stacked_predicates_starts_with() {
691        // x STARTS WITH 'a' STARTS WITH 'b' → error
692        let result = parse("RETURN x STARTS WITH 'a' STARTS WITH 'b'");
693        assert!(
694            result.is_err(),
695            "expected parse error for stacked STARTS WITH"
696        );
697        let msg = result.unwrap_err().to_string();
698        assert!(
699            msg.contains("InvalidPredicateChain"),
700            "expected InvalidPredicateChain, got: {msg}"
701        );
702    }
703
704    #[test]
705    fn test_stacked_predicates_in() {
706        // x IN [1] IN [true] → error
707        let result = parse("RETURN x IN [1] IN [true]");
708        assert!(result.is_err(), "expected parse error for stacked IN");
709        let msg = result.unwrap_err().to_string();
710        assert!(
711            msg.contains("InvalidPredicateChain"),
712            "expected InvalidPredicateChain, got: {msg}"
713        );
714    }
715
716    #[test]
717    fn test_stacked_predicates_contains_ends_with() {
718        // x CONTAINS 'a' ENDS WITH 'b' → error
719        let result = parse("RETURN x CONTAINS 'a' ENDS WITH 'b'");
720        assert!(
721            result.is_err(),
722            "expected parse error for stacked CONTAINS/ENDS WITH"
723        );
724        let msg = result.unwrap_err().to_string();
725        assert!(
726            msg.contains("InvalidPredicateChain"),
727            "expected InvalidPredicateChain, got: {msg}"
728        );
729    }
730
731    #[test]
732    fn test_label_stacking_allowed() {
733        // x :Person :Employee → OK (label stacking is valid)
734        // Note: label predicates in comparison context are valid
735        assert!(
736            parse("MATCH (x) WHERE x:Person:Employee RETURN x").is_ok(),
737            "label stacking should be allowed"
738        );
739    }
740
741    #[test]
742    fn test_range_chaining_allowed() {
743        // 1 < n.num < 3 → OK (required by TCK Comparison3)
744        assert!(
745            parse("MATCH (n) WHERE 1 < n.num < 3 RETURN n").is_ok(),
746            "range chaining 1 < n.num < 3 should be allowed"
747        );
748    }
749
750    #[test]
751    fn test_reserved_keyword_as_variable_name() {
752        let msg = parse_err_msg("MATCH (match:N) RETURN match");
753        assert!(
754            msg.contains("ReservedKeyword"),
755            "expected ReservedKeyword, got: {msg}"
756        );
757        assert!(
758            msg.contains("backtick-quoting"),
759            "expected backtick hint, got: {msg}"
760        );
761    }
762
763    #[test]
764    fn test_reserved_keyword_return_as_variable() {
765        let msg = parse_err_msg("MATCH (return:N) RETURN return");
766        assert!(
767            msg.contains("ReservedKeyword"),
768            "expected ReservedKeyword, got: {msg}"
769        );
770    }
771
772    #[test]
773    fn test_non_reserved_keyword_allowed() {
774        // `end` was moved to keyword_nonreserved — should parse fine
775        assert!(
776            parse("MATCH (end:N) RETURN end").is_ok(),
777            "non-reserved keyword 'end' should be allowed as variable name"
778        );
779    }
780
781    #[test]
782    fn test_backtick_escaped_reserved_keyword() {
783        assert!(
784            parse("MATCH (`match`:N) RETURN `match`").is_ok(),
785            "backtick-escaped reserved keyword should be allowed"
786        );
787    }
788}