Skip to main content

lance_graph/
parser.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Cypher query parser
5//!
6//! This module provides parsing functionality for Cypher queries using nom parser combinators.
7//! It supports a subset of Cypher syntax focused on graph pattern matching and property access.
8
9use crate::ast::*;
10use crate::error::{GraphError, Result};
11use nom::{
12    branch::alt,
13    bytes::complete::{tag, tag_no_case, take_while1},
14    character::complete::{char, digit0, digit1, multispace0, multispace1, one_of},
15    combinator::{map, map_res, opt, peek, recognize},
16    multi::{many0, separated_list0, separated_list1},
17    sequence::{delimited, pair, preceded, tuple},
18    IResult,
19};
20use std::collections::HashMap;
21
22/// Parse a complete Cypher query
23pub fn parse_cypher_query(input: &str) -> Result<CypherQuery> {
24    let (remaining, query) = cypher_query(input).map_err(|e| GraphError::ParseError {
25        message: format!("Failed to parse Cypher query: {}", e),
26        position: 0,
27        location: snafu::Location::new(file!(), line!(), column!()),
28    })?;
29
30    if !remaining.trim().is_empty() {
31        return Err(GraphError::ParseError {
32            message: format!("Unexpected input after query: {}", remaining),
33            position: input.len() - remaining.len(),
34            location: snafu::Location::new(file!(), line!(), column!()),
35        });
36    }
37
38    Ok(query)
39}
40
41// Top-level parser for a complete Cypher query
42fn cypher_query(input: &str) -> IResult<&str, CypherQuery> {
43    let (input, _) = multispace0(input)?;
44    let (input, reading_clauses) = many0(reading_clause)(input)?;
45    let (input, pre_with_where) = opt(where_clause)(input)?;
46
47    // Optional WITH clause with optional post-WITH MATCH and WHERE
48    let (input, with_result) = opt(with_clause)(input)?;
49    // Only try to parse post-WITH clauses if we have a WITH clause
50    let (input, post_with_reading_clauses, post_with_where) = match with_result {
51        Some(_) => {
52            let (input, readings) = many0(reading_clause)(input)?;
53            let (input, where_cl) = opt(where_clause)(input)?;
54            (input, readings, where_cl)
55        }
56        None => (input, vec![], None),
57    };
58
59    let (input, return_clause) = return_clause(input)?;
60    let (input, order_by) = opt(order_by_clause)(input)?;
61    let (input, (skip, limit)) = pagination_clauses(input)?;
62    let (input, _) = multispace0(input)?;
63
64    Ok((
65        input,
66        CypherQuery {
67            reading_clauses,
68            where_clause: pre_with_where,
69            with_clause: with_result,
70            post_with_reading_clauses,
71            post_with_where_clause: post_with_where,
72            return_clause,
73            limit,
74            order_by,
75            skip,
76        },
77    ))
78}
79
80// Parse a reading clause (MATCH or UNWIND)
81fn reading_clause(input: &str) -> IResult<&str, ReadingClause> {
82    alt((
83        map(match_clause, ReadingClause::Match),
84        map(unwind_clause, ReadingClause::Unwind),
85    ))(input)
86}
87
88// Parse a MATCH clause
89fn match_clause(input: &str) -> IResult<&str, MatchClause> {
90    let (input, _) = multispace0(input)?;
91    let (input, _) = tag_no_case("MATCH")(input)?;
92    let (input, _) = multispace1(input)?;
93    let (input, patterns) = separated_list0(comma_ws, graph_pattern)(input)?;
94
95    Ok((input, MatchClause { patterns }))
96}
97
98// Parse an UNWIND clause
99fn unwind_clause(input: &str) -> IResult<&str, UnwindClause> {
100    let (input, _) = multispace0(input)?;
101    let (input, _) = tag_no_case("UNWIND")(input)?;
102    let (input, _) = multispace1(input)?;
103    let (input, expression) = value_expression(input)?;
104    let (input, _) = multispace1(input)?;
105    let (input, _) = tag_no_case("AS")(input)?;
106    let (input, _) = multispace1(input)?;
107    let (input, alias) = identifier(input)?;
108
109    Ok((
110        input,
111        UnwindClause {
112            expression,
113            alias: alias.to_string(),
114        },
115    ))
116}
117
118// Parse a graph pattern (node or path)
119fn graph_pattern(input: &str) -> IResult<&str, GraphPattern> {
120    alt((
121        map(path_pattern, GraphPattern::Path),
122        map(node_pattern, GraphPattern::Node),
123    ))(input)
124}
125
126// Parse a path pattern (only if there are segments)
127fn path_pattern(input: &str) -> IResult<&str, PathPattern> {
128    let (input, start_node) = node_pattern(input)?;
129    let (input, segments) = many0(path_segment)(input)?;
130
131    // Only succeed if we actually have path segments
132    if segments.is_empty() {
133        return Err(nom::Err::Error(nom::error::Error::new(
134            input,
135            nom::error::ErrorKind::Tag,
136        )));
137    }
138
139    Ok((
140        input,
141        PathPattern {
142            start_node,
143            segments,
144        },
145    ))
146}
147
148// Parse a path segment (relationship + node)
149fn path_segment(input: &str) -> IResult<&str, PathSegment> {
150    let (input, relationship) = relationship_pattern(input)?;
151    let (input, end_node) = node_pattern(input)?;
152
153    Ok((
154        input,
155        PathSegment {
156            relationship,
157            end_node,
158        },
159    ))
160}
161
162// Parse a node pattern: (variable:Label {prop: value})
163fn node_pattern(input: &str) -> IResult<&str, NodePattern> {
164    let (input, _) = multispace0(input)?;
165    let (input, _) = char('(')(input)?;
166    let (input, _) = multispace0(input)?;
167    let (input, variable) = opt(identifier)(input)?;
168    let (input, labels) = many0(preceded(char(':'), identifier))(input)?;
169    let (input, _) = multispace0(input)?;
170    let (input, properties) = opt(property_map)(input)?;
171    let (input, _) = multispace0(input)?;
172    let (input, _) = char(')')(input)?;
173
174    Ok((
175        input,
176        NodePattern {
177            variable: variable.map(|s| s.to_string()),
178            labels: labels.into_iter().map(|s| s.to_string()).collect(),
179            properties: properties.unwrap_or_default(),
180        },
181    ))
182}
183
184// Parse a relationship pattern: -[variable:TYPE {prop: value}]->
185fn relationship_pattern(input: &str) -> IResult<&str, RelationshipPattern> {
186    let (input, _) = multispace0(input)?;
187
188    // Parse direction and bracket content
189    let (input, (direction, content)) = alt((
190        // Outgoing: -[...]->
191        map(
192            tuple((
193                char('-'),
194                delimited(char('['), relationship_content, char(']')),
195                tag("->"),
196            )),
197            |(_, content, _)| (RelationshipDirection::Outgoing, content),
198        ),
199        // Incoming: <-[...]-
200        map(
201            tuple((
202                tag("<-"),
203                delimited(char('['), relationship_content, char(']')),
204                char('-'),
205            )),
206            |(_, content, _)| (RelationshipDirection::Incoming, content),
207        ),
208        // Undirected: -[...]-
209        map(
210            tuple((
211                char('-'),
212                delimited(char('['), relationship_content, char(']')),
213                char('-'),
214            )),
215            |(_, content, _)| (RelationshipDirection::Undirected, content),
216        ),
217    ))(input)?;
218
219    let (variable, types, properties, length) = content;
220
221    Ok((
222        input,
223        RelationshipPattern {
224            variable: variable.map(|s| s.to_string()),
225            types: types.into_iter().map(|s| s.to_string()).collect(),
226            direction,
227            properties: properties.unwrap_or_default(),
228            length,
229        },
230    ))
231}
232
233// Type alias for complex relationship content return type
234type RelationshipContentResult<'a> = (
235    Option<&'a str>,
236    Vec<&'a str>,
237    Option<HashMap<String, PropertyValue>>,
238    Option<LengthRange>,
239);
240
241// Parse relationship content inside brackets
242fn relationship_content(input: &str) -> IResult<&str, RelationshipContentResult<'_>> {
243    let (input, _) = multispace0(input)?;
244    let (input, variable) = opt(identifier)(input)?;
245    let (input, types) = many0(preceded(char(':'), identifier))(input)?;
246    let (input, _) = multispace0(input)?;
247    let (input, length) = opt(length_range)(input)?;
248    let (input, _) = multispace0(input)?;
249    let (input, properties) = opt(property_map)(input)?;
250    let (input, _) = multispace0(input)?;
251
252    Ok((input, (variable, types, properties, length)))
253}
254
255// Parse a property map: {key: value, key2: value2}
256fn property_map(input: &str) -> IResult<&str, HashMap<String, PropertyValue>> {
257    let (input, _) = multispace0(input)?;
258    let (input, _) = char('{')(input)?;
259    let (input, _) = multispace0(input)?;
260    let (input, pairs) = separated_list0(comma_ws, property_pair)(input)?;
261    let (input, _) = multispace0(input)?;
262    let (input, _) = char('}')(input)?;
263
264    Ok((input, pairs.into_iter().collect()))
265}
266
267// Parse a property key-value pair
268fn property_pair(input: &str) -> IResult<&str, (String, PropertyValue)> {
269    let (input, _) = multispace0(input)?;
270    let (input, key) = identifier(input)?;
271    let (input, _) = multispace0(input)?;
272    let (input, _) = char(':')(input)?;
273    let (input, _) = multispace0(input)?;
274    let (input, value) = property_value(input)?;
275
276    Ok((input, (key.to_string(), value)))
277}
278
279// Parse a property value
280fn property_value(input: &str) -> IResult<&str, PropertyValue> {
281    alt((
282        map(string_literal, PropertyValue::String),
283        map(float_literal, PropertyValue::Float), // Try float BEFORE integer (more specific)
284        map(integer_literal, PropertyValue::Integer),
285        map(boolean_literal, PropertyValue::Boolean),
286        map(tag("null"), |_| PropertyValue::Null),
287        map(parameter, PropertyValue::Parameter),
288    ))(input)
289}
290
291// Parse a WHERE clause
292fn where_clause(input: &str) -> IResult<&str, WhereClause> {
293    let (input, _) = multispace0(input)?;
294    let (input, _) = tag_no_case("WHERE")(input)?;
295    let (input, _) = multispace1(input)?;
296    let (input, expression) = boolean_expression(input)?;
297
298    Ok((input, WhereClause { expression }))
299}
300
301// Parse a boolean expression with OR precedence
302fn boolean_expression(input: &str) -> IResult<&str, BooleanExpression> {
303    boolean_or_expression(input)
304}
305
306fn boolean_or_expression(input: &str) -> IResult<&str, BooleanExpression> {
307    let (input, first) = boolean_and_expression(input)?;
308    let (input, rest) = many0(preceded(
309        tuple((multispace0, tag_no_case("OR"), multispace1)),
310        boolean_and_expression,
311    ))(input)?;
312    let expr = rest.into_iter().fold(first, |acc, item| {
313        BooleanExpression::Or(Box::new(acc), Box::new(item))
314    });
315    Ok((input, expr))
316}
317
318fn boolean_and_expression(input: &str) -> IResult<&str, BooleanExpression> {
319    let (input, first) = boolean_not_expression(input)?;
320    let (input, rest) = many0(preceded(
321        tuple((multispace0, tag_no_case("AND"), multispace1)),
322        boolean_not_expression,
323    ))(input)?;
324    let expr = rest.into_iter().fold(first, |acc, item| {
325        BooleanExpression::And(Box::new(acc), Box::new(item))
326    });
327    Ok((input, expr))
328}
329
330fn boolean_not_expression(input: &str) -> IResult<&str, BooleanExpression> {
331    let (input, _) = multispace0(input)?;
332    alt((
333        map(
334            preceded(
335                tuple((tag_no_case("NOT"), multispace1)),
336                boolean_not_expression,
337            ),
338            |expr| BooleanExpression::Not(Box::new(expr)),
339        ),
340        boolean_primary_expression,
341    ))(input)
342}
343
344fn boolean_primary_expression(input: &str) -> IResult<&str, BooleanExpression> {
345    let (input, _) = multispace0(input)?;
346    alt((
347        map(
348            delimited(
349                tuple((char('('), multispace0)),
350                boolean_expression,
351                tuple((multispace0, char(')'))),
352            ),
353            |expr| expr,
354        ),
355        comparison_expression,
356    ))(input)
357}
358
359fn comparison_expression(input: &str) -> IResult<&str, BooleanExpression> {
360    let (input, _) = multispace0(input)?;
361    let (input, left) = value_expression(input)?;
362    let (input, _) = multispace0(input)?;
363    let left_clone = left.clone();
364
365    if let Ok((input_after_in, (_, _, list))) =
366        tuple((tag_no_case("IN"), multispace0, value_expression_list))(input)
367    {
368        return Ok((
369            input_after_in,
370            BooleanExpression::In {
371                expression: left,
372                list,
373            },
374        ));
375    }
376    // Match LIKE pattern
377    if let Ok((input_after_like, (_, _, pattern))) =
378        tuple((tag_no_case("LIKE"), multispace0, string_literal))(input)
379    {
380        return Ok((
381            input_after_like,
382            BooleanExpression::Like {
383                expression: left,
384                pattern,
385            },
386        ));
387    }
388    // Match ILIKE pattern (case-insensitive LIKE)
389    if let Ok((input_after_ilike, (_, _, pattern))) =
390        tuple((tag_no_case("ILIKE"), multispace0, string_literal))(input)
391    {
392        return Ok((
393            input_after_ilike,
394            BooleanExpression::ILike {
395                expression: left,
396                pattern,
397            },
398        ));
399    }
400    // Match CONTAINS substring
401    if let Ok((input_after_contains, (_, _, substring))) =
402        tuple((tag_no_case("CONTAINS"), multispace0, string_literal))(input)
403    {
404        return Ok((
405            input_after_contains,
406            BooleanExpression::Contains {
407                expression: left,
408                substring,
409            },
410        ));
411    }
412    // Match STARTS WITH prefix (note: multi-word operator)
413    if let Ok((input_after_starts, (_, _, _, _, prefix))) = tuple((
414        tag_no_case("STARTS"),
415        multispace1,
416        tag_no_case("WITH"),
417        multispace0,
418        string_literal,
419    ))(input)
420    {
421        return Ok((
422            input_after_starts,
423            BooleanExpression::StartsWith {
424                expression: left,
425                prefix,
426            },
427        ));
428    }
429    // Match ENDS WITH suffix (note: multi-word operator)
430    if let Ok((input_after_ends, (_, _, _, _, suffix))) = tuple((
431        tag_no_case("ENDS"),
432        multispace1,
433        tag_no_case("WITH"),
434        multispace0,
435        string_literal,
436    ))(input)
437    {
438        return Ok((
439            input_after_ends,
440            BooleanExpression::EndsWith {
441                expression: left,
442                suffix,
443            },
444        ));
445    }
446    // Match is null
447    if let Ok((rest, ())) = is_null_comparison(input) {
448        return Ok((rest, BooleanExpression::IsNull(left_clone)));
449    }
450    // Match is not null
451    if let Ok((rest, ())) = is_not_null_comparison(input) {
452        return Ok((rest, BooleanExpression::IsNotNull(left_clone)));
453    }
454
455    let (input, operator) = comparison_operator(input)?;
456    let (input, _) = multispace0(input)?;
457    let (input, right) = value_expression(input)?;
458
459    Ok((
460        input,
461        BooleanExpression::Comparison {
462            left: left_clone,
463            operator,
464            right,
465        },
466    ))
467}
468
469// Parse a comparison operator
470fn comparison_operator(input: &str) -> IResult<&str, ComparisonOperator> {
471    alt((
472        map(tag("="), |_| ComparisonOperator::Equal),
473        map(tag("<>"), |_| ComparisonOperator::NotEqual),
474        map(tag("!="), |_| ComparisonOperator::NotEqual),
475        map(tag("<="), |_| ComparisonOperator::LessThanOrEqual),
476        map(tag(">="), |_| ComparisonOperator::GreaterThanOrEqual),
477        map(tag("<"), |_| ComparisonOperator::LessThan),
478        map(tag(">"), |_| ComparisonOperator::GreaterThan),
479    ))(input)
480}
481
482// Parse a basic value expression (without vector functions to avoid circular dependency)
483fn basic_value_expression(input: &str) -> IResult<&str, ValueExpression> {
484    alt((
485        parse_vector_literal, // Try vector literal first [0.1, 0.2]
486        parse_parameter,      // Try $parameter
487        function_call,        // Regular function calls
488        map(property_value, ValueExpression::Literal), // Try literals BEFORE property references
489        map(property_reference, ValueExpression::Property),
490        map(identifier, |id| ValueExpression::Variable(id.to_string())),
491    ))(input)
492}
493
494// Parse a value expression
495// Optimization: Use peek to avoid expensive backtracking for non-vector queries
496fn value_expression(input: &str) -> IResult<&str, ValueExpression> {
497    // Peek at first identifier to dispatch to correct parser
498    // This eliminates failed parser attempts for every non-vector expression
499    if let Ok((_, first_ident)) = peek(identifier)(input) {
500        let ident_lower = first_ident.to_lowercase();
501
502        match ident_lower.as_str() {
503            "vector_distance" => return parse_vector_distance(input),
504            "vector_similarity" => return parse_vector_similarity(input),
505            _ => {} // Not a vector function, continue to basic expressions
506        }
507    }
508
509    // Fast path for common expressions
510    basic_value_expression(input)
511}
512
513// Parse distance metric: cosine, l2, dot
514fn parse_distance_metric(input: &str) -> IResult<&str, DistanceMetric> {
515    alt((
516        map(tag_no_case("cosine"), |_| DistanceMetric::Cosine),
517        map(tag_no_case("l2"), |_| DistanceMetric::L2),
518        map(tag_no_case("dot"), |_| DistanceMetric::Dot),
519    ))(input)
520}
521
522// Parse vector_distance(expr, expr, metric)
523fn parse_vector_distance(input: &str) -> IResult<&str, ValueExpression> {
524    let (input, _) = tag_no_case("vector_distance")(input)?;
525    let (input, _) = multispace0(input)?;
526    let (input, _) = char('(')(input)?;
527    let (input, _) = multispace0(input)?;
528
529    // Parse left expression - use basic_value_expression to avoid circular dependency
530    let (input, left) = basic_value_expression(input)?;
531    let (input, _) = multispace0(input)?;
532    let (input, _) = char(',')(input)?;
533    let (input, _) = multispace0(input)?;
534
535    // Parse right expression - use basic_value_expression to avoid circular dependency
536    let (input, right) = basic_value_expression(input)?;
537    let (input, _) = multispace0(input)?;
538    let (input, _) = char(',')(input)?;
539    let (input, _) = multispace0(input)?;
540
541    // Parse metric
542    let (input, metric) = parse_distance_metric(input)?;
543    let (input, _) = multispace0(input)?;
544    let (input, _) = char(')')(input)?;
545
546    Ok((
547        input,
548        ValueExpression::VectorDistance {
549            left: Box::new(left),
550            right: Box::new(right),
551            metric,
552        },
553    ))
554}
555
556// Parse vector_similarity(expr, expr, metric)
557fn parse_vector_similarity(input: &str) -> IResult<&str, ValueExpression> {
558    let (input, _) = tag_no_case("vector_similarity")(input)?;
559    let (input, _) = multispace0(input)?;
560    let (input, _) = char('(')(input)?;
561    let (input, _) = multispace0(input)?;
562
563    // Parse left expression - use basic_value_expression to avoid circular dependency
564    let (input, left) = basic_value_expression(input)?;
565    let (input, _) = multispace0(input)?;
566    let (input, _) = char(',')(input)?;
567    let (input, _) = multispace0(input)?;
568
569    // Parse right expression - use basic_value_expression to avoid circular dependency
570    let (input, right) = basic_value_expression(input)?;
571    let (input, _) = multispace0(input)?;
572    let (input, _) = char(',')(input)?;
573    let (input, _) = multispace0(input)?;
574
575    // Parse metric
576    let (input, metric) = parse_distance_metric(input)?;
577    let (input, _) = multispace0(input)?;
578    let (input, _) = char(')')(input)?;
579
580    Ok((
581        input,
582        ValueExpression::VectorSimilarity {
583            left: Box::new(left),
584            right: Box::new(right),
585            metric,
586        },
587    ))
588}
589
590// Parse parameter reference: $name
591fn parse_parameter(input: &str) -> IResult<&str, ValueExpression> {
592    let (input, name) = parameter(input)?;
593    Ok((input, ValueExpression::Parameter(name)))
594}
595
596// Parse a function call: function_name(args)
597fn function_call(input: &str) -> IResult<&str, ValueExpression> {
598    let (input, name) = identifier(input)?;
599    let (input, _) = multispace0(input)?;
600    let (input, _) = char('(')(input)?;
601    let (input, _) = multispace0(input)?;
602
603    // Parse optional DISTINCT keyword
604    let (input, distinct) = opt(tag_no_case("DISTINCT"))(input)?;
605    let distinct = distinct.is_some();
606    let (input, _) = if distinct {
607        multispace1(input)?
608    } else {
609        (input, "")
610    };
611
612    // Handle COUNT(*) special case - only allow * for COUNT function
613    if let Ok((input_after_star, _)) = char::<_, nom::error::Error<&str>>('*')(input) {
614        // Validate that this is COUNT function
615        if name.to_lowercase() == "count" {
616            let (input, _) = multispace0(input_after_star)?;
617            let (input, _) = char(')')(input)?;
618            return Ok((
619                input,
620                ValueExpression::AggregateFunction {
621                    name: name.to_string(),
622                    args: vec![ValueExpression::Variable("*".to_string())],
623                    distinct,
624                },
625            ));
626        } else {
627            // Not COUNT - fail parsing to try regular argument parsing
628            // This will naturally fail since * is not a valid value_expression
629        }
630    }
631
632    // Parse regular function arguments
633    let (input, args) = separated_list0(
634        tuple((multispace0, char(','), multispace0)),
635        value_expression,
636    )(input)?;
637    let (input, _) = multispace0(input)?;
638    let (input, _) = char(')')(input)?;
639
640    // Route based on function type
641    use crate::ast::{classify_function, FunctionType};
642    match classify_function(name) {
643        FunctionType::Aggregate => Ok((
644            input,
645            ValueExpression::AggregateFunction {
646                name: name.to_string(),
647                args,
648                distinct,
649            },
650        )),
651        FunctionType::Scalar => {
652            // Validate: reject DISTINCT on scalar functions at parse time
653            if distinct {
654                return Err(nom::Err::Failure(nom::error::Error::new(
655                    input,
656                    nom::error::ErrorKind::Verify,
657                )));
658            }
659            Ok((
660                input,
661                ValueExpression::ScalarFunction {
662                    name: name.to_string(),
663                    args,
664                },
665            ))
666        }
667        FunctionType::Unknown => {
668            // Default to ScalarFunction for unknown functions
669            // They'll be handled as NULL in expression conversion
670            if distinct {
671                return Err(nom::Err::Failure(nom::error::Error::new(
672                    input,
673                    nom::error::ErrorKind::Verify,
674                )));
675            }
676            Ok((
677                input,
678                ValueExpression::ScalarFunction {
679                    name: name.to_string(),
680                    args,
681                },
682            ))
683        }
684    }
685}
686
687fn value_expression_list(input: &str) -> IResult<&str, Vec<ValueExpression>> {
688    delimited(
689        tuple((char('['), multispace0)),
690        separated_list1(
691            tuple((multispace0, char(','), multispace0)),
692            value_expression,
693        ),
694        tuple((multispace0, char(']'))),
695    )(input)
696}
697
698// Parse a float32 literal for vectors
699fn float32_literal(input: &str) -> IResult<&str, f32> {
700    map_res(
701        recognize(tuple((
702            opt(char('-')),
703            alt((
704                // Scientific notation: 1e-3, 2.5e2
705                recognize(tuple((
706                    digit1,
707                    opt(tuple((char('.'), digit0))),
708                    one_of("eE"),
709                    opt(one_of("+-")),
710                    digit1,
711                ))),
712                // Regular float: 1.23 or integer: 123
713                recognize(tuple((digit1, opt(tuple((char('.'), digit0)))))),
714            )),
715        ))),
716        |s: &str| s.parse::<f32>(),
717    )(input)
718}
719
720// Parse vector literal: [0.1, 0.2, 0.3]
721fn parse_vector_literal(input: &str) -> IResult<&str, ValueExpression> {
722    let (input, _) = char('[')(input)?;
723    let (input, _) = multispace0(input)?;
724
725    let (input, values) = separated_list1(
726        tuple((multispace0, char(','), multispace0)),
727        float32_literal,
728    )(input)?;
729
730    let (input, _) = multispace0(input)?;
731    let (input, _) = char(']')(input)?;
732
733    Ok((input, ValueExpression::VectorLiteral(values)))
734}
735
736// Parse a property reference: variable.property
737fn property_reference(input: &str) -> IResult<&str, PropertyRef> {
738    let (input, variable) = identifier(input)?;
739    let (input, _) = char('.')(input)?;
740    let (input, property) = identifier(input)?;
741
742    Ok((
743        input,
744        PropertyRef {
745            variable: variable.to_string(),
746            property: property.to_string(),
747        },
748    ))
749}
750
751// Parse a WITH clause (intermediate projection/aggregation)
752fn with_clause(input: &str) -> IResult<&str, WithClause> {
753    let (input, _) = multispace0(input)?;
754    let (input, _) = tag_no_case("WITH")(input)?;
755    let (input, _) = multispace1(input)?;
756    let (input, items) = separated_list0(comma_ws, return_item)(input)?;
757    let (input, order_by) = opt(order_by_clause)(input)?;
758    let (input, limit) = opt(limit_clause)(input)?;
759
760    Ok((
761        input,
762        WithClause {
763            items,
764            order_by,
765            limit,
766        },
767    ))
768}
769
770// Parse a RETURN clause
771fn return_clause(input: &str) -> IResult<&str, ReturnClause> {
772    let (input, _) = multispace0(input)?;
773    let (input, _) = tag_no_case("RETURN")(input)?;
774    let (input, _) = multispace1(input)?;
775    let (input, distinct) = opt(tag_no_case("DISTINCT"))(input)?;
776    let (input, _) = if distinct.is_some() {
777        multispace1(input)?
778    } else {
779        (input, "")
780    };
781    let (input, items) = separated_list0(comma_ws, return_item)(input)?;
782
783    Ok((
784        input,
785        ReturnClause {
786            distinct: distinct.is_some(),
787            items,
788        },
789    ))
790}
791
792// Parse a return item
793fn return_item(input: &str) -> IResult<&str, ReturnItem> {
794    let (input, expression) = value_expression(input)?;
795    let (input, _) = multispace0(input)?;
796    let (input, alias) = opt(preceded(
797        tuple((tag_no_case("AS"), multispace1)),
798        identifier,
799    ))(input)?;
800
801    Ok((
802        input,
803        ReturnItem {
804            expression,
805            alias: alias.map(|s| s.to_string()),
806        },
807    ))
808}
809
810// Match IS NULL in WHERE clause
811fn is_null_comparison(input: &str) -> IResult<&str, ()> {
812    let (input, _) = multispace0(input)?;
813    let (input, _) = tag_no_case("IS")(input)?;
814    let (input, _) = multispace1(input)?;
815    let (input, _) = tag_no_case("NULL")(input)?;
816    let (input, _) = multispace0(input)?;
817
818    Ok((input, ()))
819}
820
821// Match IS NOT NULL in WHERE clause
822fn is_not_null_comparison(input: &str) -> IResult<&str, ()> {
823    let (input, _) = multispace0(input)?;
824    let (input, _) = tag_no_case("IS")(input)?;
825    let (input, _) = multispace1(input)?;
826    let (input, _) = tag_no_case("NOT")(input)?;
827    let (input, _) = multispace1(input)?;
828    let (input, _) = tag_no_case("NULL")(input)?;
829    let (input, _) = multispace0(input)?;
830
831    Ok((input, ()))
832}
833
834// Parse an ORDER BY clause
835fn order_by_clause(input: &str) -> IResult<&str, OrderByClause> {
836    let (input, _) = multispace0(input)?;
837    let (input, _) = tag_no_case("ORDER")(input)?;
838    let (input, _) = multispace1(input)?;
839    let (input, _) = tag_no_case("BY")(input)?;
840    let (input, _) = multispace1(input)?;
841    let (input, items) = separated_list0(comma_ws, order_by_item)(input)?;
842
843    Ok((input, OrderByClause { items }))
844}
845
846// Parse an order by item
847fn order_by_item(input: &str) -> IResult<&str, OrderByItem> {
848    let (input, expression) = value_expression(input)?;
849    let (input, _) = multispace0(input)?;
850    let (input, direction) = opt(alt((
851        map(tag_no_case("ASC"), |_| SortDirection::Ascending),
852        map(tag_no_case("DESC"), |_| SortDirection::Descending),
853    )))(input)?;
854
855    Ok((
856        input,
857        OrderByItem {
858            expression,
859            direction: direction.unwrap_or(SortDirection::Ascending),
860        },
861    ))
862}
863
864// Parse a LIMIT clause
865fn limit_clause(input: &str) -> IResult<&str, u64> {
866    let (input, _) = multispace0(input)?;
867    let (input, _) = tag_no_case("LIMIT")(input)?;
868    let (input, _) = multispace1(input)?;
869    let (input, limit) = integer_literal(input)?;
870
871    Ok((input, limit as u64))
872}
873
874// Parse a SKIP clause
875fn skip_clause(input: &str) -> IResult<&str, u64> {
876    let (input, _) = multispace0(input)?;
877    let (input, _) = tag_no_case("SKIP")(input)?;
878    let (input, _) = multispace1(input)?;
879    let (input, skip) = integer_literal(input)?;
880
881    Ok((input, skip as u64))
882}
883
884// Parse pagination clauses (SKIP and LIMIT)
885fn pagination_clauses(input: &str) -> IResult<&str, (Option<u64>, Option<u64>)> {
886    let (mut remaining, _) = multispace0(input)?;
887    let mut skip: Option<u64> = None;
888    let mut limit: Option<u64> = None;
889
890    loop {
891        let before = remaining;
892
893        if skip.is_none() {
894            if let Ok((i, s)) = skip_clause(remaining) {
895                skip = Some(s);
896                remaining = i;
897                continue;
898            }
899        }
900
901        if limit.is_none() {
902            if let Ok((i, l)) = limit_clause(remaining) {
903                limit = Some(l);
904                remaining = i;
905                continue;
906            }
907        }
908
909        if before == remaining {
910            break;
911        }
912    }
913
914    Ok((remaining, (skip, limit)))
915}
916
917// Helper parsers
918
919// Parse an identifier
920fn identifier(input: &str) -> IResult<&str, &str> {
921    take_while1(|c: char| c.is_alphanumeric() || c == '_')(input)
922}
923
924// Parse a string literal
925fn string_literal(input: &str) -> IResult<&str, String> {
926    alt((double_quoted_string, single_quoted_string))(input)
927}
928
929fn double_quoted_string(input: &str) -> IResult<&str, String> {
930    let (input, _) = char('"')(input)?;
931    let (input, content) = take_while1(|c| c != '"')(input)?;
932    let (input, _) = char('"')(input)?;
933    Ok((input, content.to_string()))
934}
935
936fn single_quoted_string(input: &str) -> IResult<&str, String> {
937    let (input, _) = char('\'')(input)?;
938    let (input, content) = take_while1(|c| c != '\'')(input)?;
939    let (input, _) = char('\'')(input)?;
940    Ok((input, content.to_string()))
941}
942
943// Parse an integer literal
944fn integer_literal(input: &str) -> IResult<&str, i64> {
945    let (input, digits) = recognize(pair(
946        opt(char('-')),
947        take_while1(|c: char| c.is_ascii_digit()),
948    ))(input)?;
949
950    Ok((input, digits.parse().unwrap()))
951}
952
953// Parse a float literal
954fn float_literal(input: &str) -> IResult<&str, f64> {
955    let (input, number) = recognize(tuple((
956        opt(char('-')),
957        take_while1(|c: char| c.is_ascii_digit()),
958        char('.'),
959        take_while1(|c: char| c.is_ascii_digit()),
960    )))(input)?;
961
962    Ok((input, number.parse().unwrap()))
963}
964
965// Parse a boolean literal
966fn boolean_literal(input: &str) -> IResult<&str, bool> {
967    alt((
968        map(tag_no_case("true"), |_| true),
969        map(tag_no_case("false"), |_| false),
970    ))(input)
971}
972
973// Parse a parameter reference
974fn parameter(input: &str) -> IResult<&str, String> {
975    // Only support $param syntax
976    map(preceded(char('$'), identifier), |s| s.to_string())(input)
977}
978
979// Parse comma with optional whitespace
980fn comma_ws(input: &str) -> IResult<&str, ()> {
981    let (input, _) = multispace0(input)?;
982    let (input, _) = char(',')(input)?;
983    let (input, _) = multispace0(input)?;
984    Ok((input, ()))
985}
986
987// Parse variable-length path syntax: *1..2, *..3, *2.., *
988fn length_range(input: &str) -> IResult<&str, LengthRange> {
989    let (input, _) = char('*')(input)?;
990    let (input, _) = multispace0(input)?;
991
992    // Parse different length patterns
993    alt((
994        // *min..max (e.g., *1..3)
995        map(
996            tuple((
997                nom::character::complete::u32,
998                tag(".."),
999                nom::character::complete::u32,
1000            )),
1001            |(min, _, max)| LengthRange {
1002                min: Some(min),
1003                max: Some(max),
1004            },
1005        ),
1006        // *..max (e.g., *..3)
1007        map(preceded(tag(".."), nom::character::complete::u32), |max| {
1008            LengthRange {
1009                min: None,
1010                max: Some(max),
1011            }
1012        }),
1013        // *min.. (e.g., *2..)
1014        map(
1015            tuple((nom::character::complete::u32, tag(".."))),
1016            |(min, _)| LengthRange {
1017                min: Some(min),
1018                max: None,
1019            },
1020        ),
1021        // *min (e.g., *2)
1022        map(nom::character::complete::u32, |min| LengthRange {
1023            min: Some(min),
1024            max: Some(min),
1025        }),
1026        // * (unlimited)
1027        map(multispace0, |_| LengthRange {
1028            min: None,
1029            max: None,
1030        }),
1031    ))(input)
1032}
1033
1034#[cfg(test)]
1035mod tests {
1036    use super::*;
1037    use crate::ast::{BooleanExpression, ComparisonOperator, PropertyValue, ValueExpression};
1038
1039    #[test]
1040    fn test_parse_simple_node_query() {
1041        let query = "MATCH (n:Person) RETURN n.name";
1042        let result = parse_cypher_query(query).unwrap();
1043
1044        assert_eq!(result.reading_clauses.len(), 1);
1045        assert_eq!(result.return_clause.items.len(), 1);
1046    }
1047
1048    #[test]
1049    fn test_parse_node_with_properties() {
1050        let query = r#"MATCH (n:Person {name: "John", age: 30}) RETURN n"#;
1051        let result = parse_cypher_query(query).unwrap();
1052
1053        if let ReadingClause::Match(match_clause) = &result.reading_clauses[0] {
1054            if let GraphPattern::Node(node) = &match_clause.patterns[0] {
1055                assert_eq!(node.labels, vec!["Person"]);
1056                assert_eq!(node.properties.len(), 2);
1057            } else {
1058                panic!("Expected node pattern");
1059            }
1060        } else {
1061            panic!("Expected match clause");
1062        }
1063    }
1064
1065    #[test]
1066    fn test_parse_simple_relationship_query() {
1067        let query = "MATCH (a:Person)-[r:KNOWS]->(b:Person) RETURN a.name, b.name";
1068        let result = parse_cypher_query(query).unwrap();
1069
1070        assert_eq!(result.reading_clauses.len(), 1);
1071        assert_eq!(result.return_clause.items.len(), 2);
1072
1073        if let ReadingClause::Match(match_clause) = &result.reading_clauses[0] {
1074            if let GraphPattern::Path(path) = &match_clause.patterns[0] {
1075                assert_eq!(path.segments.len(), 1);
1076                assert_eq!(path.segments[0].relationship.types, vec!["KNOWS"]);
1077            } else {
1078                panic!("Expected path pattern");
1079            }
1080        } else {
1081            panic!("Expected match clause");
1082        }
1083    }
1084
1085    #[test]
1086    fn test_parse_variable_length_path() {
1087        let query = "MATCH (a:Person)-[:FRIEND_OF*1..2]-(b:Person) RETURN a.name, b.name";
1088        let result = parse_cypher_query(query).unwrap();
1089
1090        assert_eq!(result.reading_clauses.len(), 1);
1091
1092        if let ReadingClause::Match(match_clause) = &result.reading_clauses[0] {
1093            if let GraphPattern::Path(path) = &match_clause.patterns[0] {
1094                assert_eq!(path.segments.len(), 1);
1095                assert_eq!(path.segments[0].relationship.types, vec!["FRIEND_OF"]);
1096
1097                let length = path.segments[0].relationship.length.as_ref().unwrap();
1098                assert_eq!(length.min, Some(1));
1099                assert_eq!(length.max, Some(2));
1100            } else {
1101                panic!("Expected path pattern");
1102            }
1103        } else {
1104            panic!("Expected match clause");
1105        }
1106    }
1107
1108    #[test]
1109    fn test_parse_query_with_where_clause() {
1110        let query = "MATCH (n:Person) WHERE n.age > 30 RETURN n.name";
1111        let result = parse_cypher_query(query).unwrap();
1112
1113        assert!(result.where_clause.is_some());
1114    }
1115
1116    #[test]
1117    fn test_parse_query_with_single_quoted_literal() {
1118        let query = "MATCH (n:Person) WHERE n.name = 'Alice' RETURN n.name";
1119        let result = parse_cypher_query(query).unwrap();
1120
1121        assert!(result.where_clause.is_some());
1122    }
1123
1124    #[test]
1125    fn test_parse_query_with_and_conditions() {
1126        let query = "MATCH (src:Entity)-[rel:RELATIONSHIP]->(dst:Entity) WHERE rel.relationship_type = 'WORKS_ON' AND dst.name_lower = 'presto' RETURN src.name, src.entity_id";
1127        let result = parse_cypher_query(query).unwrap();
1128
1129        let where_clause = result.where_clause.expect("Expected WHERE clause");
1130        match where_clause.expression {
1131            BooleanExpression::And(left, right) => {
1132                match *left {
1133                    BooleanExpression::Comparison {
1134                        left: ValueExpression::Property(ref prop),
1135                        operator,
1136                        right: ValueExpression::Literal(PropertyValue::String(ref value)),
1137                    } => {
1138                        assert_eq!(prop.variable, "rel");
1139                        assert_eq!(prop.property, "relationship_type");
1140                        assert_eq!(operator, ComparisonOperator::Equal);
1141                        assert_eq!(value, "WORKS_ON");
1142                    }
1143                    _ => panic!("Expected comparison for relationship_type filter"),
1144                }
1145
1146                match *right {
1147                    BooleanExpression::Comparison {
1148                        left: ValueExpression::Property(ref prop),
1149                        operator,
1150                        right: ValueExpression::Literal(PropertyValue::String(ref value)),
1151                    } => {
1152                        assert_eq!(prop.variable, "dst");
1153                        assert_eq!(prop.property, "name_lower");
1154                        assert_eq!(operator, ComparisonOperator::Equal);
1155                        assert_eq!(value, "presto");
1156                    }
1157                    _ => panic!("Expected comparison for destination name filter"),
1158                }
1159            }
1160            other => panic!("Expected AND expression, got {:?}", other),
1161        }
1162    }
1163
1164    #[test]
1165    fn test_parse_query_with_in_clause() {
1166        let query = "MATCH (src:Entity)-[rel:RELATIONSHIP]->(dst:Entity) WHERE rel.relationship_type IN ['WORKS_FOR', 'PART_OF'] RETURN src.name";
1167        let result = parse_cypher_query(query).unwrap();
1168
1169        let where_clause = result.where_clause.expect("Expected WHERE clause");
1170        match where_clause.expression {
1171            BooleanExpression::In { expression, list } => {
1172                match expression {
1173                    ValueExpression::Property(prop_ref) => {
1174                        assert_eq!(prop_ref.variable, "rel");
1175                        assert_eq!(prop_ref.property, "relationship_type");
1176                    }
1177                    _ => panic!("Expected property reference in IN expression"),
1178                }
1179                assert_eq!(list.len(), 2);
1180                match &list[0] {
1181                    ValueExpression::Literal(PropertyValue::String(val)) => {
1182                        assert_eq!(val, "WORKS_FOR");
1183                    }
1184                    _ => panic!("Expected first list item to be a string literal"),
1185                }
1186                match &list[1] {
1187                    ValueExpression::Literal(PropertyValue::String(val)) => {
1188                        assert_eq!(val, "PART_OF");
1189                    }
1190                    _ => panic!("Expected second list item to be a string literal"),
1191                }
1192            }
1193            other => panic!("Expected IN expression, got {:?}", other),
1194        }
1195    }
1196
1197    #[test]
1198    fn test_parse_query_with_is_null() {
1199        let query = "MATCH (n:Person) WHERE n.age IS NULL RETURN n.name";
1200        let result = parse_cypher_query(query).unwrap();
1201
1202        let where_clause = result.where_clause.expect("Expected WHERE clause");
1203
1204        match where_clause.expression {
1205            BooleanExpression::IsNull(expr) => match expr {
1206                ValueExpression::Property(prop_ref) => {
1207                    assert_eq!(prop_ref.variable, "n");
1208                    assert_eq!(prop_ref.property, "age");
1209                }
1210                _ => panic!("Expected property reference in IS NULL expression"),
1211            },
1212            other => panic!("Expected IS NULL expression, got {:?}", other),
1213        }
1214    }
1215
1216    #[test]
1217    fn test_parse_query_with_is_not_null() {
1218        let query = "MATCH (n:Person) WHERE n.age IS NOT NULL RETURN n.name";
1219        let result = parse_cypher_query(query).unwrap();
1220
1221        let where_clause = result.where_clause.expect("Expected WHERE clause");
1222
1223        match where_clause.expression {
1224            BooleanExpression::IsNotNull(expr) => match expr {
1225                ValueExpression::Property(prop_ref) => {
1226                    assert_eq!(prop_ref.variable, "n");
1227                    assert_eq!(prop_ref.property, "age");
1228                }
1229                _ => panic!("Expected property reference in IS NOT NULL expression"),
1230            },
1231            other => panic!("Expected IS NOT NULL expression, got {:?}", other),
1232        }
1233    }
1234
1235    #[test]
1236    fn test_parse_query_with_limit() {
1237        let query = "MATCH (n:Person) RETURN n.name LIMIT 10";
1238        let result = parse_cypher_query(query).unwrap();
1239
1240        assert_eq!(result.limit, Some(10));
1241    }
1242
1243    #[test]
1244    fn test_parse_query_with_skip() {
1245        let query = "MATCH (n:Person) RETURN n.name SKIP 5";
1246        let result = parse_cypher_query(query).unwrap();
1247
1248        assert_eq!(result.skip, Some(5));
1249        assert_eq!(result.limit, None);
1250    }
1251
1252    #[test]
1253    fn test_parse_query_with_skip_and_limit() {
1254        let query = "MATCH (n:Person) RETURN n.name SKIP 5 LIMIT 10";
1255        let result = parse_cypher_query(query).unwrap();
1256
1257        assert_eq!(result.skip, Some(5));
1258        assert_eq!(result.limit, Some(10));
1259    }
1260
1261    #[test]
1262    fn test_parse_query_with_skip_and_order_by() {
1263        let query = "MATCH (n:Person) RETURN n.name ORDER BY n.age SKIP 5";
1264        let result = parse_cypher_query(query).unwrap();
1265
1266        assert_eq!(result.skip, Some(5));
1267        assert!(result.order_by.is_some());
1268    }
1269
1270    #[test]
1271    fn test_parse_query_with_skip_order_by_and_limit() {
1272        let query = "MATCH (n:Person) RETURN n.name ORDER BY n.age SKIP 5 LIMIT 10";
1273        let result = parse_cypher_query(query).unwrap();
1274
1275        assert_eq!(result.skip, Some(5));
1276        assert_eq!(result.limit, Some(10));
1277        assert!(result.order_by.is_some());
1278    }
1279
1280    #[test]
1281    fn test_parse_count_star() {
1282        let query = "MATCH (n:Person) RETURN count(*) AS total";
1283        let result = parse_cypher_query(query).unwrap();
1284
1285        assert_eq!(result.return_clause.items.len(), 1);
1286        let item = &result.return_clause.items[0];
1287        assert_eq!(item.alias, Some("total".to_string()));
1288
1289        match &item.expression {
1290            ValueExpression::AggregateFunction { name, args, .. } => {
1291                assert_eq!(name, "count");
1292                assert_eq!(args.len(), 1);
1293                match &args[0] {
1294                    ValueExpression::Variable(v) => assert_eq!(v, "*"),
1295                    _ => panic!("Expected Variable(*) in count(*)"),
1296                }
1297            }
1298            _ => panic!("Expected AggregateFunction expression"),
1299        }
1300    }
1301
1302    #[test]
1303    fn test_parse_count_property() {
1304        let query = "MATCH (n:Person) RETURN count(n.age)";
1305        let result = parse_cypher_query(query).unwrap();
1306
1307        assert_eq!(result.return_clause.items.len(), 1);
1308        let item = &result.return_clause.items[0];
1309
1310        match &item.expression {
1311            ValueExpression::AggregateFunction { name, args, .. } => {
1312                assert_eq!(name, "count");
1313                assert_eq!(args.len(), 1);
1314                match &args[0] {
1315                    ValueExpression::Property(prop) => {
1316                        assert_eq!(prop.variable, "n");
1317                        assert_eq!(prop.property, "age");
1318                    }
1319                    _ => panic!("Expected Property in count(n.age)"),
1320                }
1321            }
1322            _ => panic!("Expected AggregateFunction expression"),
1323        }
1324    }
1325
1326    #[test]
1327    fn test_parse_non_count_function_rejects_star() {
1328        // FOO(*) should fail to parse since * is only allowed for COUNT
1329        let query = "MATCH (n:Person) RETURN foo(*)";
1330        let result = parse_cypher_query(query);
1331        assert!(result.is_err(), "foo(*) should not parse successfully");
1332    }
1333
1334    #[test]
1335    fn test_parse_count_with_multiple_args() {
1336        // COUNT with multiple arguments parses successfully
1337        // but will be rejected during semantic validation
1338        let query = "MATCH (n:Person) RETURN count(n.age, n.name)";
1339        let result = parse_cypher_query(query);
1340        assert!(
1341            result.is_ok(),
1342            "Parser should accept multiple args (validation happens in semantic phase)"
1343        );
1344
1345        // Verify the AST structure
1346        let ast = result.unwrap();
1347        match &ast.return_clause.items[0].expression {
1348            ValueExpression::AggregateFunction { name, args, .. } => {
1349                assert_eq!(name, "count");
1350                assert_eq!(args.len(), 2);
1351            }
1352            _ => panic!("Expected AggregateFunction expression"),
1353        }
1354    }
1355
1356    #[test]
1357    fn test_parser_rejects_distinct_on_scalar() {
1358        // Parser should reject DISTINCT on scalar functions at parse time
1359        let query = "RETURN toLower(DISTINCT p.name)";
1360        let result = parse_cypher_query(query);
1361        assert!(
1362            result.is_err(),
1363            "Parser should reject DISTINCT on scalar functions"
1364        );
1365
1366        let query2 = "RETURN upper(DISTINCT p.name)";
1367        let result2 = parse_cypher_query(query2);
1368        assert!(
1369            result2.is_err(),
1370            "Parser should reject DISTINCT on scalar functions"
1371        );
1372    }
1373
1374    #[test]
1375    fn test_parse_like_pattern() {
1376        let query = "MATCH (n:Person) WHERE n.name LIKE 'A%' RETURN n.name";
1377        let result = parse_cypher_query(query);
1378        assert!(result.is_ok(), "LIKE pattern should parse successfully");
1379
1380        let ast = result.unwrap();
1381        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1382
1383        match where_clause.expression {
1384            BooleanExpression::Like {
1385                expression,
1386                pattern,
1387            } => {
1388                match expression {
1389                    ValueExpression::Property(prop) => {
1390                        assert_eq!(prop.variable, "n");
1391                        assert_eq!(prop.property, "name");
1392                    }
1393                    _ => panic!("Expected property expression"),
1394                }
1395                assert_eq!(pattern, "A%");
1396            }
1397            _ => panic!("Expected LIKE expression"),
1398        }
1399    }
1400
1401    #[test]
1402    fn test_parse_like_with_double_quotes() {
1403        let query = r#"MATCH (n:Person) WHERE n.email LIKE "%@example.com" RETURN n.email"#;
1404        let result = parse_cypher_query(query);
1405        assert!(result.is_ok(), "LIKE with double quotes should parse");
1406
1407        let ast = result.unwrap();
1408        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1409
1410        match where_clause.expression {
1411            BooleanExpression::Like { pattern, .. } => {
1412                assert_eq!(pattern, "%@example.com");
1413            }
1414            _ => panic!("Expected LIKE expression"),
1415        }
1416    }
1417
1418    #[test]
1419    fn test_parse_like_in_complex_where() {
1420        let query = "MATCH (n:Person) WHERE n.age > 20 AND n.name LIKE 'J%' RETURN n.name";
1421        let result = parse_cypher_query(query);
1422        assert!(result.is_ok(), "LIKE in complex WHERE should parse");
1423
1424        let ast = result.unwrap();
1425        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1426
1427        match where_clause.expression {
1428            BooleanExpression::And(left, right) => {
1429                // Left should be age > 20
1430                match *left {
1431                    BooleanExpression::Comparison { .. } => {}
1432                    _ => panic!("Expected comparison on left"),
1433                }
1434                // Right should be LIKE
1435                match *right {
1436                    BooleanExpression::Like { pattern, .. } => {
1437                        assert_eq!(pattern, "J%");
1438                    }
1439                    _ => panic!("Expected LIKE expression on right"),
1440                }
1441            }
1442            _ => panic!("Expected AND expression"),
1443        }
1444    }
1445
1446    #[test]
1447    fn test_parse_contains() {
1448        let query = "MATCH (n:Person) WHERE n.name CONTAINS 'Jo' RETURN n.name";
1449        let result = parse_cypher_query(query);
1450        assert!(result.is_ok());
1451
1452        let query = result.unwrap();
1453        assert!(query.where_clause.is_some());
1454
1455        match &query.where_clause.unwrap().expression {
1456            BooleanExpression::Contains {
1457                expression,
1458                substring,
1459            } => {
1460                assert_eq!(substring, "Jo");
1461                match expression {
1462                    ValueExpression::Property(prop) => {
1463                        assert_eq!(prop.variable, "n");
1464                        assert_eq!(prop.property, "name");
1465                    }
1466                    _ => panic!("Expected property reference"),
1467                }
1468            }
1469            _ => panic!("Expected CONTAINS expression"),
1470        }
1471    }
1472
1473    #[test]
1474    fn test_parse_starts_with() {
1475        let query = "MATCH (n:Person) WHERE n.name STARTS WITH 'Alice' RETURN n.name";
1476        let result = parse_cypher_query(query);
1477        assert!(result.is_ok());
1478
1479        let query = result.unwrap();
1480        assert!(query.where_clause.is_some());
1481
1482        match &query.where_clause.unwrap().expression {
1483            BooleanExpression::StartsWith { expression, prefix } => {
1484                assert_eq!(prefix, "Alice");
1485                match expression {
1486                    ValueExpression::Property(prop) => {
1487                        assert_eq!(prop.variable, "n");
1488                        assert_eq!(prop.property, "name");
1489                    }
1490                    _ => panic!("Expected property reference"),
1491                }
1492            }
1493            _ => panic!("Expected STARTS WITH expression"),
1494        }
1495    }
1496
1497    #[test]
1498    fn test_parse_ends_with() {
1499        let query = "MATCH (n:Person) WHERE n.email ENDS WITH '@example.com' RETURN n.email";
1500        let result = parse_cypher_query(query);
1501        assert!(result.is_ok());
1502
1503        let query = result.unwrap();
1504        assert!(query.where_clause.is_some());
1505
1506        match &query.where_clause.unwrap().expression {
1507            BooleanExpression::EndsWith { expression, suffix } => {
1508                assert_eq!(suffix, "@example.com");
1509                match expression {
1510                    ValueExpression::Property(prop) => {
1511                        assert_eq!(prop.variable, "n");
1512                        assert_eq!(prop.property, "email");
1513                    }
1514                    _ => panic!("Expected property reference"),
1515                }
1516            }
1517            _ => panic!("Expected ENDS WITH expression"),
1518        }
1519    }
1520
1521    #[test]
1522    fn test_parse_contains_case_insensitive_keyword() {
1523        let query = "MATCH (n:Person) WHERE n.name contains 'test' RETURN n.name";
1524        let result = parse_cypher_query(query);
1525        assert!(result.is_ok());
1526
1527        match &result.unwrap().where_clause.unwrap().expression {
1528            BooleanExpression::Contains { substring, .. } => {
1529                assert_eq!(substring, "test");
1530            }
1531            _ => panic!("Expected CONTAINS expression"),
1532        }
1533    }
1534
1535    #[test]
1536    fn test_parse_string_operators_in_complex_where() {
1537        let query =
1538            "MATCH (n:Person) WHERE n.name CONTAINS 'Jo' AND n.email ENDS WITH '.com' RETURN n";
1539        let result = parse_cypher_query(query);
1540        assert!(result.is_ok());
1541
1542        match &result.unwrap().where_clause.unwrap().expression {
1543            BooleanExpression::And(left, right) => {
1544                // Left should be CONTAINS
1545                match **left {
1546                    BooleanExpression::Contains { ref substring, .. } => {
1547                        assert_eq!(substring, "Jo");
1548                    }
1549                    _ => panic!("Expected CONTAINS expression on left"),
1550                }
1551                // Right should be ENDS WITH
1552                match **right {
1553                    BooleanExpression::EndsWith { ref suffix, .. } => {
1554                        assert_eq!(suffix, ".com");
1555                    }
1556                    _ => panic!("Expected ENDS WITH expression on right"),
1557                }
1558            }
1559            _ => panic!("Expected AND expression"),
1560        }
1561    }
1562
1563    #[test]
1564    fn test_parse_ilike_pattern() {
1565        let query = "MATCH (n:Person) WHERE n.name ILIKE 'alice%' RETURN n.name";
1566        let result = parse_cypher_query(query);
1567        assert!(result.is_ok(), "ILIKE pattern should parse successfully");
1568
1569        let ast = result.unwrap();
1570        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1571
1572        match where_clause.expression {
1573            BooleanExpression::ILike {
1574                expression,
1575                pattern,
1576            } => {
1577                match expression {
1578                    ValueExpression::Property(prop) => {
1579                        assert_eq!(prop.variable, "n");
1580                        assert_eq!(prop.property, "name");
1581                    }
1582                    _ => panic!("Expected property expression"),
1583                }
1584                assert_eq!(pattern, "alice%");
1585            }
1586            _ => panic!("Expected ILIKE expression"),
1587        }
1588    }
1589
1590    #[test]
1591    fn test_parse_like_and_ilike_together() {
1592        let query =
1593            "MATCH (n:Person) WHERE n.name LIKE 'Alice%' OR n.name ILIKE 'bob%' RETURN n.name";
1594        let result = parse_cypher_query(query);
1595        assert!(result.is_ok(), "LIKE and ILIKE together should parse");
1596
1597        let ast = result.unwrap();
1598        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1599
1600        match where_clause.expression {
1601            BooleanExpression::Or(left, right) => {
1602                // Left should be LIKE (case-sensitive)
1603                match *left {
1604                    BooleanExpression::Like { pattern, .. } => {
1605                        assert_eq!(pattern, "Alice%");
1606                    }
1607                    _ => panic!("Expected LIKE expression on left"),
1608                }
1609                // Right should be ILIKE (case-insensitive)
1610                match *right {
1611                    BooleanExpression::ILike { pattern, .. } => {
1612                        assert_eq!(pattern, "bob%");
1613                    }
1614                    _ => panic!("Expected ILIKE expression on right"),
1615                }
1616            }
1617            _ => panic!("Expected OR expression"),
1618        }
1619    }
1620
1621    #[test]
1622    fn test_parse_vector_distance() {
1623        let query = "MATCH (p:Person) WHERE vector_distance(p.embedding, $query_vec, cosine) < 0.5 RETURN p.name";
1624        let result = parse_cypher_query(query);
1625        assert!(result.is_ok(), "vector_distance should parse successfully");
1626
1627        let ast = result.unwrap();
1628        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1629
1630        // Verify it's a comparison with vector_distance
1631        match where_clause.expression {
1632            BooleanExpression::Comparison { left, operator, .. } => {
1633                match left {
1634                    ValueExpression::VectorDistance {
1635                        left,
1636                        right,
1637                        metric,
1638                    } => {
1639                        assert_eq!(metric, DistanceMetric::Cosine);
1640                        // Verify left is property reference
1641                        assert!(matches!(*left, ValueExpression::Property(_)));
1642                        // Verify right is parameter
1643                        assert!(matches!(*right, ValueExpression::Parameter(_)));
1644                    }
1645                    _ => panic!("Expected VectorDistance"),
1646                }
1647                assert_eq!(operator, ComparisonOperator::LessThan);
1648            }
1649            _ => panic!("Expected comparison"),
1650        }
1651    }
1652
1653    #[test]
1654    fn test_parse_vector_similarity() {
1655        let query =
1656            "MATCH (p:Person) WHERE vector_similarity(p.embedding, $vec, l2) > 0.8 RETURN p";
1657        let result = parse_cypher_query(query);
1658        assert!(
1659            result.is_ok(),
1660            "vector_similarity should parse successfully"
1661        );
1662
1663        let ast = result.unwrap();
1664        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1665
1666        match where_clause.expression {
1667            BooleanExpression::Comparison { left, operator, .. } => {
1668                match left {
1669                    ValueExpression::VectorSimilarity { metric, .. } => {
1670                        assert_eq!(metric, DistanceMetric::L2);
1671                    }
1672                    _ => panic!("Expected VectorSimilarity"),
1673                }
1674                assert_eq!(operator, ComparisonOperator::GreaterThan);
1675            }
1676            _ => panic!("Expected comparison"),
1677        }
1678    }
1679
1680    #[test]
1681    fn test_parse_parameter() {
1682        let query = "MATCH (p:Person) WHERE p.age = $min_age RETURN p";
1683        let result = parse_cypher_query(query);
1684        assert!(result.is_ok(), "Parameter should parse successfully");
1685
1686        let ast = result.unwrap();
1687        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1688
1689        match where_clause.expression {
1690            BooleanExpression::Comparison { right, .. } => match right {
1691                ValueExpression::Parameter(name) => {
1692                    assert_eq!(name, "min_age");
1693                }
1694                _ => panic!("Expected Parameter"),
1695            },
1696            _ => panic!("Expected comparison"),
1697        }
1698    }
1699
1700    #[test]
1701    fn test_parse_multiple_parameters() {
1702        let query = "MATCH (p:Person) WHERE p.age > $min_age AND p.age < $max_age RETURN p";
1703        let result = parse_cypher_query(query);
1704        assert!(
1705            result.is_ok(),
1706            "Multiple parameters should parse successfully"
1707        );
1708
1709        let ast = result.unwrap();
1710        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1711
1712        match where_clause.expression {
1713            BooleanExpression::And(left, right) => {
1714                // Check left: p.age > $min_age
1715                match *left {
1716                    BooleanExpression::Comparison {
1717                        right: val_right, ..
1718                    } => match val_right {
1719                        ValueExpression::Parameter(name) => {
1720                            assert_eq!(name, "min_age");
1721                        }
1722                        _ => panic!("Expected Parameter min_age"),
1723                    },
1724                    _ => panic!("Expected comparison on left"),
1725                }
1726
1727                // Check right: p.age < $max_age
1728                match *right {
1729                    BooleanExpression::Comparison {
1730                        right: val_right, ..
1731                    } => match val_right {
1732                        ValueExpression::Parameter(name) => {
1733                            assert_eq!(name, "max_age");
1734                        }
1735                        _ => panic!("Expected Parameter max_age"),
1736                    },
1737                    _ => panic!("Expected comparison on right"),
1738                }
1739            }
1740            _ => panic!("Expected AND expression"),
1741        }
1742    }
1743
1744    #[test]
1745    fn test_parse_parameter_formats() {
1746        // Test $param (should succeed)
1747        let query = "MATCH (p:Person) WHERE p.age > $min_age RETURN p";
1748        let result = parse_cypher_query(query);
1749        assert!(result.is_ok(), "$param should parse successfully");
1750    }
1751
1752    #[test]
1753    fn test_vector_distance_metrics() {
1754        for metric in &["cosine", "l2", "dot"] {
1755            let query = format!(
1756                "MATCH (p:Person) RETURN vector_distance(p.emb, $v, {}) AS dist",
1757                metric
1758            );
1759            let result = parse_cypher_query(&query);
1760            assert!(result.is_ok(), "Failed to parse metric: {}", metric);
1761
1762            let ast = result.unwrap();
1763            let return_item = &ast.return_clause.items[0];
1764
1765            match &return_item.expression {
1766                ValueExpression::VectorDistance {
1767                    metric: parsed_metric,
1768                    ..
1769                } => {
1770                    let expected = match *metric {
1771                        "cosine" => DistanceMetric::Cosine,
1772                        "l2" => DistanceMetric::L2,
1773                        "dot" => DistanceMetric::Dot,
1774                        _ => panic!("Unexpected metric"),
1775                    };
1776                    assert_eq!(*parsed_metric, expected);
1777                }
1778                _ => panic!("Expected VectorDistance"),
1779            }
1780        }
1781    }
1782
1783    #[test]
1784    fn test_vector_search_in_order_by() {
1785        let query = "MATCH (p:Person) RETURN p.name ORDER BY vector_distance(p.embedding, $query_vec, cosine) ASC LIMIT 10";
1786        let result = parse_cypher_query(query);
1787        assert!(result.is_ok(), "vector_distance in ORDER BY should parse");
1788
1789        let ast = result.unwrap();
1790        let order_by = ast.order_by.expect("Expected ORDER BY clause");
1791
1792        assert_eq!(order_by.items.len(), 1);
1793        match &order_by.items[0].expression {
1794            ValueExpression::VectorDistance { .. } => {
1795                // Success
1796            }
1797            _ => panic!("Expected VectorDistance in ORDER BY"),
1798        }
1799    }
1800
1801    #[test]
1802    fn test_hybrid_query_with_vector_and_property_filters() {
1803        let query = "MATCH (p:Person) WHERE p.age > 25 AND vector_similarity(p.embedding, $query_vec, cosine) > 0.7 RETURN p.name";
1804        let result = parse_cypher_query(query);
1805        assert!(result.is_ok(), "Hybrid query should parse");
1806
1807        let ast = result.unwrap();
1808        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1809
1810        // Should be an AND expression
1811        match where_clause.expression {
1812            BooleanExpression::And(left, right) => {
1813                // Left should be age > 25
1814                match *left {
1815                    BooleanExpression::Comparison { .. } => {}
1816                    _ => panic!("Expected comparison on left"),
1817                }
1818                // Right should be vector_similarity > 0.7
1819                match *right {
1820                    BooleanExpression::Comparison { left, .. } => match left {
1821                        ValueExpression::VectorSimilarity { .. } => {}
1822                        _ => panic!("Expected VectorSimilarity"),
1823                    },
1824                    _ => panic!("Expected comparison on right"),
1825                }
1826            }
1827            _ => panic!("Expected AND expression"),
1828        }
1829    }
1830
1831    #[test]
1832    fn test_parse_vector_literal() {
1833        let result = parse_vector_literal("[0.1, 0.2, 0.3]");
1834        assert!(result.is_ok());
1835        let (_, expr) = result.unwrap();
1836        match expr {
1837            ValueExpression::VectorLiteral(vec) => {
1838                assert_eq!(vec.len(), 3);
1839                assert_eq!(vec[0], 0.1);
1840                assert_eq!(vec[1], 0.2);
1841                assert_eq!(vec[2], 0.3);
1842            }
1843            _ => panic!("Expected VectorLiteral"),
1844        }
1845    }
1846
1847    #[test]
1848    fn test_parse_vector_literal_with_negative_values() {
1849        let result = parse_vector_literal("[-0.1, 0.2, -0.3]");
1850        assert!(result.is_ok());
1851        let (_, expr) = result.unwrap();
1852        match expr {
1853            ValueExpression::VectorLiteral(vec) => {
1854                assert_eq!(vec.len(), 3);
1855                assert_eq!(vec[0], -0.1);
1856                assert_eq!(vec[2], -0.3);
1857            }
1858            _ => panic!("Expected VectorLiteral"),
1859        }
1860    }
1861
1862    #[test]
1863    fn test_parse_vector_literal_scientific_notation() {
1864        let result = parse_vector_literal("[1e-3, 2.5e2, -3e-1]");
1865        assert!(result.is_ok());
1866        let (_, expr) = result.unwrap();
1867        match expr {
1868            ValueExpression::VectorLiteral(vec) => {
1869                assert_eq!(vec.len(), 3);
1870                assert!((vec[0] - 0.001).abs() < 1e-6);
1871                assert!((vec[1] - 250.0).abs() < 1e-6);
1872                assert!((vec[2] - (-0.3)).abs() < 1e-6);
1873            }
1874            _ => panic!("Expected VectorLiteral"),
1875        }
1876    }
1877
1878    #[test]
1879    fn test_vector_distance_with_literal() {
1880        let query =
1881            "MATCH (p:Person) WHERE vector_distance(p.embedding, [0.1, 0.2], l2) < 0.5 RETURN p";
1882        let result = parse_cypher_query(query);
1883        assert!(result.is_ok());
1884
1885        let ast = result.unwrap();
1886        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1887
1888        match where_clause.expression {
1889            BooleanExpression::Comparison { left, operator, .. } => {
1890                match left {
1891                    ValueExpression::VectorDistance {
1892                        left,
1893                        right,
1894                        metric,
1895                    } => {
1896                        // Left should be property reference
1897                        assert!(matches!(*left, ValueExpression::Property(_)));
1898                        // Right should be vector literal
1899                        match *right {
1900                            ValueExpression::VectorLiteral(vec) => {
1901                                assert_eq!(vec.len(), 2);
1902                                assert_eq!(vec[0], 0.1);
1903                                assert_eq!(vec[1], 0.2);
1904                            }
1905                            _ => panic!("Expected VectorLiteral"),
1906                        }
1907                        assert_eq!(metric, DistanceMetric::L2);
1908                    }
1909                    _ => panic!("Expected VectorDistance"),
1910                }
1911                assert_eq!(operator, ComparisonOperator::LessThan);
1912            }
1913            _ => panic!("Expected comparison"),
1914        }
1915    }
1916
1917    // UNWIND parser tests
1918    #[test]
1919    fn test_parse_unwind_simple() {
1920        let query = "UNWIND [1, 2, 3] AS num RETURN num";
1921        let ast = parse_cypher_query(query);
1922        assert!(ast.is_ok(), "Failed to parse simple UNWIND query");
1923    }
1924
1925    #[test]
1926    fn test_parse_unwind_after_match() {
1927        let query = "MATCH (n) UNWIND n.list AS item RETURN item";
1928        let ast = parse_cypher_query(query);
1929        assert!(ast.is_ok(), "Failed to parse UNWIND after MATCH");
1930    }
1931}