Skip to main content

lance_graph/
parser.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Cypher query parser
5//!
6//! This module provides parsing functionality for Cypher queries using nom parser combinators.
7//! It supports a subset of Cypher syntax focused on graph pattern matching and property access.
8
9use crate::ast::*;
10use crate::error::{GraphError, Result};
11use nom::{
12    branch::alt,
13    bytes::complete::{tag, tag_no_case, take_while1},
14    character::complete::{char, digit0, digit1, multispace0, multispace1, one_of},
15    combinator::{map, map_res, opt, peek, recognize},
16    multi::{many0, separated_list0, separated_list1},
17    sequence::{delimited, pair, preceded, tuple},
18    IResult,
19};
20use std::collections::HashMap;
21
22/// Parse a complete Cypher query
23pub fn parse_cypher_query(input: &str) -> Result<CypherQuery> {
24    let (remaining, query) = cypher_query(input).map_err(|e| GraphError::ParseError {
25        message: format!("Failed to parse Cypher query: {}", e),
26        position: 0,
27        location: snafu::Location::new(file!(), line!(), column!()),
28    })?;
29
30    if !remaining.trim().is_empty() {
31        return Err(GraphError::ParseError {
32            message: format!("Unexpected input after query: {}", remaining),
33            position: input.len() - remaining.len(),
34            location: snafu::Location::new(file!(), line!(), column!()),
35        });
36    }
37
38    Ok(query)
39}
40
41// Top-level parser for a complete Cypher query
42fn cypher_query(input: &str) -> IResult<&str, CypherQuery> {
43    let (input, _) = multispace0(input)?;
44    let (input, reading_clauses) = many0(reading_clause)(input)?;
45    let (input, pre_with_where) = opt(where_clause)(input)?;
46
47    // Optional WITH clause with optional post-WITH MATCH and WHERE
48    let (input, with_result) = opt(with_clause)(input)?;
49    // Only try to parse post-WITH clauses if we have a WITH clause
50    let (input, post_with_reading_clauses, post_with_where) = match with_result {
51        Some(_) => {
52            let (input, readings) = many0(reading_clause)(input)?;
53            let (input, where_cl) = opt(where_clause)(input)?;
54            (input, readings, where_cl)
55        }
56        None => (input, vec![], None),
57    };
58
59    let (input, return_clause) = return_clause(input)?;
60    let (input, order_by) = opt(order_by_clause)(input)?;
61    let (input, (skip, limit)) = pagination_clauses(input)?;
62    let (input, _) = multispace0(input)?;
63
64    Ok((
65        input,
66        CypherQuery {
67            reading_clauses,
68            where_clause: pre_with_where,
69            with_clause: with_result,
70            post_with_reading_clauses,
71            post_with_where_clause: post_with_where,
72            return_clause,
73            limit,
74            order_by,
75            skip,
76        },
77    ))
78}
79
80// Parse a reading clause (MATCH or UNWIND)
81fn reading_clause(input: &str) -> IResult<&str, ReadingClause> {
82    alt((
83        map(match_clause, ReadingClause::Match),
84        map(unwind_clause, ReadingClause::Unwind),
85    ))(input)
86}
87
88// Parse a MATCH clause
89fn match_clause(input: &str) -> IResult<&str, MatchClause> {
90    let (input, _) = multispace0(input)?;
91    let (input, _) = tag_no_case("MATCH")(input)?;
92    let (input, _) = multispace1(input)?;
93    let (input, patterns) = separated_list0(comma_ws, graph_pattern)(input)?;
94
95    Ok((input, MatchClause { patterns }))
96}
97
98// Parse an UNWIND clause
99fn unwind_clause(input: &str) -> IResult<&str, UnwindClause> {
100    let (input, _) = multispace0(input)?;
101    let (input, _) = tag_no_case("UNWIND")(input)?;
102    let (input, _) = multispace1(input)?;
103    let (input, expression) = value_expression(input)?;
104    let (input, _) = multispace1(input)?;
105    let (input, _) = tag_no_case("AS")(input)?;
106    let (input, _) = multispace1(input)?;
107    let (input, alias) = identifier(input)?;
108
109    Ok((
110        input,
111        UnwindClause {
112            expression,
113            alias: alias.to_string(),
114        },
115    ))
116}
117
118// Parse a graph pattern (node or path)
119fn graph_pattern(input: &str) -> IResult<&str, GraphPattern> {
120    alt((
121        map(path_pattern, GraphPattern::Path),
122        map(node_pattern, GraphPattern::Node),
123    ))(input)
124}
125
126// Parse a path pattern (only if there are segments)
127fn path_pattern(input: &str) -> IResult<&str, PathPattern> {
128    let (input, start_node) = node_pattern(input)?;
129    let (input, segments) = many0(path_segment)(input)?;
130
131    // Only succeed if we actually have path segments
132    if segments.is_empty() {
133        return Err(nom::Err::Error(nom::error::Error::new(
134            input,
135            nom::error::ErrorKind::Tag,
136        )));
137    }
138
139    Ok((
140        input,
141        PathPattern {
142            start_node,
143            segments,
144        },
145    ))
146}
147
148// Parse a path segment (relationship + node)
149fn path_segment(input: &str) -> IResult<&str, PathSegment> {
150    let (input, relationship) = relationship_pattern(input)?;
151    let (input, end_node) = node_pattern(input)?;
152
153    Ok((
154        input,
155        PathSegment {
156            relationship,
157            end_node,
158        },
159    ))
160}
161
162// Parse a node pattern: (variable:Label {prop: value})
163fn node_pattern(input: &str) -> IResult<&str, NodePattern> {
164    let (input, _) = multispace0(input)?;
165    let (input, _) = char('(')(input)?;
166    let (input, _) = multispace0(input)?;
167    let (input, variable) = opt(identifier)(input)?;
168    let (input, labels) = many0(preceded(char(':'), identifier))(input)?;
169    let (input, _) = multispace0(input)?;
170    let (input, properties) = opt(property_map)(input)?;
171    let (input, _) = multispace0(input)?;
172    let (input, _) = char(')')(input)?;
173
174    Ok((
175        input,
176        NodePattern {
177            variable: variable.map(|s| s.to_string()),
178            labels: labels.into_iter().map(|s| s.to_string()).collect(),
179            properties: properties.unwrap_or_default(),
180        },
181    ))
182}
183
184// Parse a relationship pattern: -[variable:TYPE {prop: value}]->
185fn relationship_pattern(input: &str) -> IResult<&str, RelationshipPattern> {
186    let (input, _) = multispace0(input)?;
187
188    // Parse direction and bracket content
189    let (input, (direction, content)) = alt((
190        // Outgoing: -[...]->
191        map(
192            tuple((
193                char('-'),
194                delimited(char('['), relationship_content, char(']')),
195                tag("->"),
196            )),
197            |(_, content, _)| (RelationshipDirection::Outgoing, content),
198        ),
199        // Incoming: <-[...]-
200        map(
201            tuple((
202                tag("<-"),
203                delimited(char('['), relationship_content, char(']')),
204                char('-'),
205            )),
206            |(_, content, _)| (RelationshipDirection::Incoming, content),
207        ),
208        // Undirected: -[...]-
209        map(
210            tuple((
211                char('-'),
212                delimited(char('['), relationship_content, char(']')),
213                char('-'),
214            )),
215            |(_, content, _)| (RelationshipDirection::Undirected, content),
216        ),
217    ))(input)?;
218
219    let (variable, types, properties, length) = content;
220
221    Ok((
222        input,
223        RelationshipPattern {
224            variable: variable.map(|s| s.to_string()),
225            types: types.into_iter().map(|s| s.to_string()).collect(),
226            direction,
227            properties: properties.unwrap_or_default(),
228            length,
229        },
230    ))
231}
232
233// Type alias for complex relationship content return type
234type RelationshipContentResult<'a> = (
235    Option<&'a str>,
236    Vec<&'a str>,
237    Option<HashMap<String, PropertyValue>>,
238    Option<LengthRange>,
239);
240
241// Parse relationship content inside brackets
242fn relationship_content(input: &str) -> IResult<&str, RelationshipContentResult<'_>> {
243    let (input, _) = multispace0(input)?;
244    let (input, variable) = opt(identifier)(input)?;
245    let (input, types) = many0(preceded(char(':'), identifier))(input)?;
246    let (input, _) = multispace0(input)?;
247    let (input, length) = opt(length_range)(input)?;
248    let (input, _) = multispace0(input)?;
249    let (input, properties) = opt(property_map)(input)?;
250    let (input, _) = multispace0(input)?;
251
252    Ok((input, (variable, types, properties, length)))
253}
254
255// Parse a property map: {key: value, key2: value2}
256fn property_map(input: &str) -> IResult<&str, HashMap<String, PropertyValue>> {
257    let (input, _) = multispace0(input)?;
258    let (input, _) = char('{')(input)?;
259    let (input, _) = multispace0(input)?;
260    let (input, pairs) = separated_list0(comma_ws, property_pair)(input)?;
261    let (input, _) = multispace0(input)?;
262    let (input, _) = char('}')(input)?;
263
264    Ok((input, pairs.into_iter().collect()))
265}
266
267// Parse a property key-value pair
268fn property_pair(input: &str) -> IResult<&str, (String, PropertyValue)> {
269    let (input, _) = multispace0(input)?;
270    let (input, key) = identifier(input)?;
271    let (input, _) = multispace0(input)?;
272    let (input, _) = char(':')(input)?;
273    let (input, _) = multispace0(input)?;
274    let (input, value) = property_value(input)?;
275
276    Ok((input, (key.to_string(), value)))
277}
278
279// Parse a property value
280fn property_value(input: &str) -> IResult<&str, PropertyValue> {
281    alt((
282        map(string_literal, PropertyValue::String),
283        map(float_literal, PropertyValue::Float), // Try float BEFORE integer (more specific)
284        map(integer_literal, PropertyValue::Integer),
285        map(boolean_literal, PropertyValue::Boolean),
286        map(tag("null"), |_| PropertyValue::Null),
287        map(parameter, PropertyValue::Parameter),
288    ))(input)
289}
290
291// Parse a WHERE clause
292fn where_clause(input: &str) -> IResult<&str, WhereClause> {
293    let (input, _) = multispace0(input)?;
294    let (input, _) = tag_no_case("WHERE")(input)?;
295    let (input, _) = multispace1(input)?;
296    let (input, expression) = boolean_expression(input)?;
297
298    Ok((input, WhereClause { expression }))
299}
300
301// Parse a boolean expression with OR precedence
302fn boolean_expression(input: &str) -> IResult<&str, BooleanExpression> {
303    boolean_or_expression(input)
304}
305
306fn boolean_or_expression(input: &str) -> IResult<&str, BooleanExpression> {
307    let (input, first) = boolean_and_expression(input)?;
308    let (input, rest) = many0(preceded(
309        tuple((multispace0, tag_no_case("OR"), multispace1)),
310        boolean_and_expression,
311    ))(input)?;
312    let expr = rest.into_iter().fold(first, |acc, item| {
313        BooleanExpression::Or(Box::new(acc), Box::new(item))
314    });
315    Ok((input, expr))
316}
317
318fn boolean_and_expression(input: &str) -> IResult<&str, BooleanExpression> {
319    let (input, first) = boolean_not_expression(input)?;
320    let (input, rest) = many0(preceded(
321        tuple((multispace0, tag_no_case("AND"), multispace1)),
322        boolean_not_expression,
323    ))(input)?;
324    let expr = rest.into_iter().fold(first, |acc, item| {
325        BooleanExpression::And(Box::new(acc), Box::new(item))
326    });
327    Ok((input, expr))
328}
329
330fn boolean_not_expression(input: &str) -> IResult<&str, BooleanExpression> {
331    let (input, _) = multispace0(input)?;
332    alt((
333        map(
334            preceded(
335                tuple((tag_no_case("NOT"), multispace1)),
336                boolean_not_expression,
337            ),
338            |expr| BooleanExpression::Not(Box::new(expr)),
339        ),
340        boolean_primary_expression,
341    ))(input)
342}
343
344fn boolean_primary_expression(input: &str) -> IResult<&str, BooleanExpression> {
345    let (input, _) = multispace0(input)?;
346    alt((
347        map(
348            delimited(
349                tuple((char('('), multispace0)),
350                boolean_expression,
351                tuple((multispace0, char(')'))),
352            ),
353            |expr| expr,
354        ),
355        comparison_expression,
356    ))(input)
357}
358
359fn comparison_expression(input: &str) -> IResult<&str, BooleanExpression> {
360    let (input, _) = multispace0(input)?;
361    let (input, left) = value_expression(input)?;
362    let (input, _) = multispace0(input)?;
363    let left_clone = left.clone();
364
365    if let Ok((input_after_in, (_, _, list))) =
366        tuple((tag_no_case("IN"), multispace0, value_expression_list))(input)
367    {
368        return Ok((
369            input_after_in,
370            BooleanExpression::In {
371                expression: left,
372                list,
373            },
374        ));
375    }
376    // Match LIKE pattern
377    if let Ok((input_after_like, (_, _, pattern))) =
378        tuple((tag_no_case("LIKE"), multispace0, string_literal))(input)
379    {
380        return Ok((
381            input_after_like,
382            BooleanExpression::Like {
383                expression: left,
384                pattern,
385            },
386        ));
387    }
388    // Match ILIKE pattern (case-insensitive LIKE)
389    if let Ok((input_after_ilike, (_, _, pattern))) =
390        tuple((tag_no_case("ILIKE"), multispace0, string_literal))(input)
391    {
392        return Ok((
393            input_after_ilike,
394            BooleanExpression::ILike {
395                expression: left,
396                pattern,
397            },
398        ));
399    }
400    // Match CONTAINS substring
401    if let Ok((input_after_contains, (_, _, substring))) =
402        tuple((tag_no_case("CONTAINS"), multispace0, string_literal))(input)
403    {
404        return Ok((
405            input_after_contains,
406            BooleanExpression::Contains {
407                expression: left,
408                substring,
409            },
410        ));
411    }
412    // Match STARTS WITH prefix (note: multi-word operator)
413    if let Ok((input_after_starts, (_, _, _, _, prefix))) = tuple((
414        tag_no_case("STARTS"),
415        multispace1,
416        tag_no_case("WITH"),
417        multispace0,
418        string_literal,
419    ))(input)
420    {
421        return Ok((
422            input_after_starts,
423            BooleanExpression::StartsWith {
424                expression: left,
425                prefix,
426            },
427        ));
428    }
429    // Match ENDS WITH suffix (note: multi-word operator)
430    if let Ok((input_after_ends, (_, _, _, _, suffix))) = tuple((
431        tag_no_case("ENDS"),
432        multispace1,
433        tag_no_case("WITH"),
434        multispace0,
435        string_literal,
436    ))(input)
437    {
438        return Ok((
439            input_after_ends,
440            BooleanExpression::EndsWith {
441                expression: left,
442                suffix,
443            },
444        ));
445    }
446    // Match is null
447    if let Ok((rest, ())) = is_null_comparison(input) {
448        return Ok((rest, BooleanExpression::IsNull(left_clone)));
449    }
450    // Match is not null
451    if let Ok((rest, ())) = is_not_null_comparison(input) {
452        return Ok((rest, BooleanExpression::IsNotNull(left_clone)));
453    }
454
455    let (input, operator) = comparison_operator(input)?;
456    let (input, _) = multispace0(input)?;
457    let (input, right) = value_expression(input)?;
458
459    Ok((
460        input,
461        BooleanExpression::Comparison {
462            left: left_clone,
463            operator,
464            right,
465        },
466    ))
467}
468
469// Parse a comparison operator
470fn comparison_operator(input: &str) -> IResult<&str, ComparisonOperator> {
471    alt((
472        map(tag("="), |_| ComparisonOperator::Equal),
473        map(tag("<>"), |_| ComparisonOperator::NotEqual),
474        map(tag("!="), |_| ComparisonOperator::NotEqual),
475        map(tag("<="), |_| ComparisonOperator::LessThanOrEqual),
476        map(tag(">="), |_| ComparisonOperator::GreaterThanOrEqual),
477        map(tag("<"), |_| ComparisonOperator::LessThan),
478        map(tag(">"), |_| ComparisonOperator::GreaterThan),
479    ))(input)
480}
481
482// Parse a basic value expression (without vector functions to avoid circular dependency)
483fn basic_value_expression(input: &str) -> IResult<&str, ValueExpression> {
484    alt((
485        parse_vector_literal, // Try vector literal first [0.1, 0.2]
486        parse_parameter,      // Try $parameter
487        function_call,        // Regular function calls
488        map(property_value, ValueExpression::Literal), // Try literals BEFORE property references
489        map(property_reference, ValueExpression::Property),
490        map(identifier, |id| ValueExpression::Variable(id.to_string())),
491    ))(input)
492}
493
494// Parse a value expression
495// Optimization: Use peek to avoid expensive backtracking for non-vector queries
496fn value_expression(input: &str) -> IResult<&str, ValueExpression> {
497    // Peek at first identifier to dispatch to correct parser
498    // This eliminates failed parser attempts for every non-vector expression
499    if let Ok((_, first_ident)) = peek(identifier)(input) {
500        let ident_lower = first_ident.to_lowercase();
501
502        match ident_lower.as_str() {
503            "vector_distance" => return parse_vector_distance(input),
504            "vector_similarity" => return parse_vector_similarity(input),
505            _ => {} // Not a vector function, continue to basic expressions
506        }
507    }
508
509    // Fast path for common expressions
510    basic_value_expression(input)
511}
512
513// Parse distance metric: cosine, l2, dot
514fn parse_distance_metric(input: &str) -> IResult<&str, DistanceMetric> {
515    alt((
516        map(tag_no_case("cosine"), |_| DistanceMetric::Cosine),
517        map(tag_no_case("l2"), |_| DistanceMetric::L2),
518        map(tag_no_case("dot"), |_| DistanceMetric::Dot),
519    ))(input)
520}
521
522// Parse vector_distance(expr, expr, metric)
523fn parse_vector_distance(input: &str) -> IResult<&str, ValueExpression> {
524    let (input, _) = tag_no_case("vector_distance")(input)?;
525    let (input, _) = multispace0(input)?;
526    let (input, _) = char('(')(input)?;
527    let (input, _) = multispace0(input)?;
528
529    // Parse left expression - use basic_value_expression to avoid circular dependency
530    let (input, left) = basic_value_expression(input)?;
531    let (input, _) = multispace0(input)?;
532    let (input, _) = char(',')(input)?;
533    let (input, _) = multispace0(input)?;
534
535    // Parse right expression - use basic_value_expression to avoid circular dependency
536    let (input, right) = basic_value_expression(input)?;
537    let (input, _) = multispace0(input)?;
538    let (input, _) = char(',')(input)?;
539    let (input, _) = multispace0(input)?;
540
541    // Parse metric
542    let (input, metric) = parse_distance_metric(input)?;
543    let (input, _) = multispace0(input)?;
544    let (input, _) = char(')')(input)?;
545
546    Ok((
547        input,
548        ValueExpression::VectorDistance {
549            left: Box::new(left),
550            right: Box::new(right),
551            metric,
552        },
553    ))
554}
555
556// Parse vector_similarity(expr, expr, metric)
557fn parse_vector_similarity(input: &str) -> IResult<&str, ValueExpression> {
558    let (input, _) = tag_no_case("vector_similarity")(input)?;
559    let (input, _) = multispace0(input)?;
560    let (input, _) = char('(')(input)?;
561    let (input, _) = multispace0(input)?;
562
563    // Parse left expression - use basic_value_expression to avoid circular dependency
564    let (input, left) = basic_value_expression(input)?;
565    let (input, _) = multispace0(input)?;
566    let (input, _) = char(',')(input)?;
567    let (input, _) = multispace0(input)?;
568
569    // Parse right expression - use basic_value_expression to avoid circular dependency
570    let (input, right) = basic_value_expression(input)?;
571    let (input, _) = multispace0(input)?;
572    let (input, _) = char(',')(input)?;
573    let (input, _) = multispace0(input)?;
574
575    // Parse metric
576    let (input, metric) = parse_distance_metric(input)?;
577    let (input, _) = multispace0(input)?;
578    let (input, _) = char(')')(input)?;
579
580    Ok((
581        input,
582        ValueExpression::VectorSimilarity {
583            left: Box::new(left),
584            right: Box::new(right),
585            metric,
586        },
587    ))
588}
589
590// Parse parameter reference: $name
591fn parse_parameter(input: &str) -> IResult<&str, ValueExpression> {
592    let (input, _) = char('$')(input)?;
593    let (input, name) = identifier(input)?;
594    Ok((input, ValueExpression::Parameter(name.to_string())))
595}
596
597// Parse a function call: function_name(args)
598fn function_call(input: &str) -> IResult<&str, ValueExpression> {
599    let (input, name) = identifier(input)?;
600    let (input, _) = multispace0(input)?;
601    let (input, _) = char('(')(input)?;
602    let (input, _) = multispace0(input)?;
603
604    // Parse optional DISTINCT keyword
605    let (input, distinct) = opt(tag_no_case("DISTINCT"))(input)?;
606    let distinct = distinct.is_some();
607    let (input, _) = if distinct {
608        multispace1(input)?
609    } else {
610        (input, "")
611    };
612
613    // Handle COUNT(*) special case - only allow * for COUNT function
614    if let Ok((input_after_star, _)) = char::<_, nom::error::Error<&str>>('*')(input) {
615        // Validate that this is COUNT function
616        if name.to_lowercase() == "count" {
617            let (input, _) = multispace0(input_after_star)?;
618            let (input, _) = char(')')(input)?;
619            return Ok((
620                input,
621                ValueExpression::AggregateFunction {
622                    name: name.to_string(),
623                    args: vec![ValueExpression::Variable("*".to_string())],
624                    distinct,
625                },
626            ));
627        } else {
628            // Not COUNT - fail parsing to try regular argument parsing
629            // This will naturally fail since * is not a valid value_expression
630        }
631    }
632
633    // Parse regular function arguments
634    let (input, args) = separated_list0(
635        tuple((multispace0, char(','), multispace0)),
636        value_expression,
637    )(input)?;
638    let (input, _) = multispace0(input)?;
639    let (input, _) = char(')')(input)?;
640
641    // Route based on function type
642    use crate::ast::{classify_function, FunctionType};
643    match classify_function(name) {
644        FunctionType::Aggregate => Ok((
645            input,
646            ValueExpression::AggregateFunction {
647                name: name.to_string(),
648                args,
649                distinct,
650            },
651        )),
652        FunctionType::Scalar => {
653            // Validate: reject DISTINCT on scalar functions at parse time
654            if distinct {
655                return Err(nom::Err::Failure(nom::error::Error::new(
656                    input,
657                    nom::error::ErrorKind::Verify,
658                )));
659            }
660            Ok((
661                input,
662                ValueExpression::ScalarFunction {
663                    name: name.to_string(),
664                    args,
665                },
666            ))
667        }
668        FunctionType::Unknown => {
669            // Default to ScalarFunction for unknown functions
670            // They'll be handled as NULL in expression conversion
671            if distinct {
672                return Err(nom::Err::Failure(nom::error::Error::new(
673                    input,
674                    nom::error::ErrorKind::Verify,
675                )));
676            }
677            Ok((
678                input,
679                ValueExpression::ScalarFunction {
680                    name: name.to_string(),
681                    args,
682                },
683            ))
684        }
685    }
686}
687
688fn value_expression_list(input: &str) -> IResult<&str, Vec<ValueExpression>> {
689    delimited(
690        tuple((char('['), multispace0)),
691        separated_list1(
692            tuple((multispace0, char(','), multispace0)),
693            value_expression,
694        ),
695        tuple((multispace0, char(']'))),
696    )(input)
697}
698
699// Parse a float32 literal for vectors
700fn float32_literal(input: &str) -> IResult<&str, f32> {
701    map_res(
702        recognize(tuple((
703            opt(char('-')),
704            alt((
705                // Scientific notation: 1e-3, 2.5e2
706                recognize(tuple((
707                    digit1,
708                    opt(tuple((char('.'), digit0))),
709                    one_of("eE"),
710                    opt(one_of("+-")),
711                    digit1,
712                ))),
713                // Regular float: 1.23 or integer: 123
714                recognize(tuple((digit1, opt(tuple((char('.'), digit0)))))),
715            )),
716        ))),
717        |s: &str| s.parse::<f32>(),
718    )(input)
719}
720
721// Parse vector literal: [0.1, 0.2, 0.3]
722fn parse_vector_literal(input: &str) -> IResult<&str, ValueExpression> {
723    let (input, _) = char('[')(input)?;
724    let (input, _) = multispace0(input)?;
725
726    let (input, values) = separated_list1(
727        tuple((multispace0, char(','), multispace0)),
728        float32_literal,
729    )(input)?;
730
731    let (input, _) = multispace0(input)?;
732    let (input, _) = char(']')(input)?;
733
734    Ok((input, ValueExpression::VectorLiteral(values)))
735}
736
737// Parse a property reference: variable.property
738fn property_reference(input: &str) -> IResult<&str, PropertyRef> {
739    let (input, variable) = identifier(input)?;
740    let (input, _) = char('.')(input)?;
741    let (input, property) = identifier(input)?;
742
743    Ok((
744        input,
745        PropertyRef {
746            variable: variable.to_string(),
747            property: property.to_string(),
748        },
749    ))
750}
751
752// Parse a WITH clause (intermediate projection/aggregation)
753fn with_clause(input: &str) -> IResult<&str, WithClause> {
754    let (input, _) = multispace0(input)?;
755    let (input, _) = tag_no_case("WITH")(input)?;
756    let (input, _) = multispace1(input)?;
757    let (input, items) = separated_list0(comma_ws, return_item)(input)?;
758    let (input, order_by) = opt(order_by_clause)(input)?;
759    let (input, limit) = opt(limit_clause)(input)?;
760
761    Ok((
762        input,
763        WithClause {
764            items,
765            order_by,
766            limit,
767        },
768    ))
769}
770
771// Parse a RETURN clause
772fn return_clause(input: &str) -> IResult<&str, ReturnClause> {
773    let (input, _) = multispace0(input)?;
774    let (input, _) = tag_no_case("RETURN")(input)?;
775    let (input, _) = multispace1(input)?;
776    let (input, distinct) = opt(tag_no_case("DISTINCT"))(input)?;
777    let (input, _) = if distinct.is_some() {
778        multispace1(input)?
779    } else {
780        (input, "")
781    };
782    let (input, items) = separated_list0(comma_ws, return_item)(input)?;
783
784    Ok((
785        input,
786        ReturnClause {
787            distinct: distinct.is_some(),
788            items,
789        },
790    ))
791}
792
793// Parse a return item
794fn return_item(input: &str) -> IResult<&str, ReturnItem> {
795    let (input, expression) = value_expression(input)?;
796    let (input, _) = multispace0(input)?;
797    let (input, alias) = opt(preceded(
798        tuple((tag_no_case("AS"), multispace1)),
799        identifier,
800    ))(input)?;
801
802    Ok((
803        input,
804        ReturnItem {
805            expression,
806            alias: alias.map(|s| s.to_string()),
807        },
808    ))
809}
810
811// Match IS NULL in WHERE clause
812fn is_null_comparison(input: &str) -> IResult<&str, ()> {
813    let (input, _) = multispace0(input)?;
814    let (input, _) = tag_no_case("IS")(input)?;
815    let (input, _) = multispace1(input)?;
816    let (input, _) = tag_no_case("NULL")(input)?;
817    let (input, _) = multispace0(input)?;
818
819    Ok((input, ()))
820}
821
822// Match IS NOT NULL in WHERE clause
823fn is_not_null_comparison(input: &str) -> IResult<&str, ()> {
824    let (input, _) = multispace0(input)?;
825    let (input, _) = tag_no_case("IS")(input)?;
826    let (input, _) = multispace1(input)?;
827    let (input, _) = tag_no_case("NOT")(input)?;
828    let (input, _) = multispace1(input)?;
829    let (input, _) = tag_no_case("NULL")(input)?;
830    let (input, _) = multispace0(input)?;
831
832    Ok((input, ()))
833}
834
835// Parse an ORDER BY clause
836fn order_by_clause(input: &str) -> IResult<&str, OrderByClause> {
837    let (input, _) = multispace0(input)?;
838    let (input, _) = tag_no_case("ORDER")(input)?;
839    let (input, _) = multispace1(input)?;
840    let (input, _) = tag_no_case("BY")(input)?;
841    let (input, _) = multispace1(input)?;
842    let (input, items) = separated_list0(comma_ws, order_by_item)(input)?;
843
844    Ok((input, OrderByClause { items }))
845}
846
847// Parse an order by item
848fn order_by_item(input: &str) -> IResult<&str, OrderByItem> {
849    let (input, expression) = value_expression(input)?;
850    let (input, _) = multispace0(input)?;
851    let (input, direction) = opt(alt((
852        map(tag_no_case("ASC"), |_| SortDirection::Ascending),
853        map(tag_no_case("DESC"), |_| SortDirection::Descending),
854    )))(input)?;
855
856    Ok((
857        input,
858        OrderByItem {
859            expression,
860            direction: direction.unwrap_or(SortDirection::Ascending),
861        },
862    ))
863}
864
865// Parse a LIMIT clause
866fn limit_clause(input: &str) -> IResult<&str, u64> {
867    let (input, _) = multispace0(input)?;
868    let (input, _) = tag_no_case("LIMIT")(input)?;
869    let (input, _) = multispace1(input)?;
870    let (input, limit) = integer_literal(input)?;
871
872    Ok((input, limit as u64))
873}
874
875// Parse a SKIP clause
876fn skip_clause(input: &str) -> IResult<&str, u64> {
877    let (input, _) = multispace0(input)?;
878    let (input, _) = tag_no_case("SKIP")(input)?;
879    let (input, _) = multispace1(input)?;
880    let (input, skip) = integer_literal(input)?;
881
882    Ok((input, skip as u64))
883}
884
885// Parse pagination clauses (SKIP and LIMIT)
886fn pagination_clauses(input: &str) -> IResult<&str, (Option<u64>, Option<u64>)> {
887    let (mut remaining, _) = multispace0(input)?;
888    let mut skip: Option<u64> = None;
889    let mut limit: Option<u64> = None;
890
891    loop {
892        let before = remaining;
893
894        if skip.is_none() {
895            if let Ok((i, s)) = skip_clause(remaining) {
896                skip = Some(s);
897                remaining = i;
898                continue;
899            }
900        }
901
902        if limit.is_none() {
903            if let Ok((i, l)) = limit_clause(remaining) {
904                limit = Some(l);
905                remaining = i;
906                continue;
907            }
908        }
909
910        if before == remaining {
911            break;
912        }
913    }
914
915    Ok((remaining, (skip, limit)))
916}
917
918// Helper parsers
919
920// Parse an identifier
921fn identifier(input: &str) -> IResult<&str, &str> {
922    take_while1(|c: char| c.is_alphanumeric() || c == '_')(input)
923}
924
925// Parse a string literal
926fn string_literal(input: &str) -> IResult<&str, String> {
927    alt((double_quoted_string, single_quoted_string))(input)
928}
929
930fn double_quoted_string(input: &str) -> IResult<&str, String> {
931    let (input, _) = char('"')(input)?;
932    let (input, content) = take_while1(|c| c != '"')(input)?;
933    let (input, _) = char('"')(input)?;
934    Ok((input, content.to_string()))
935}
936
937fn single_quoted_string(input: &str) -> IResult<&str, String> {
938    let (input, _) = char('\'')(input)?;
939    let (input, content) = take_while1(|c| c != '\'')(input)?;
940    let (input, _) = char('\'')(input)?;
941    Ok((input, content.to_string()))
942}
943
944// Parse an integer literal
945fn integer_literal(input: &str) -> IResult<&str, i64> {
946    let (input, digits) = recognize(pair(
947        opt(char('-')),
948        take_while1(|c: char| c.is_ascii_digit()),
949    ))(input)?;
950
951    Ok((input, digits.parse().unwrap()))
952}
953
954// Parse a float literal
955fn float_literal(input: &str) -> IResult<&str, f64> {
956    let (input, number) = recognize(tuple((
957        opt(char('-')),
958        take_while1(|c: char| c.is_ascii_digit()),
959        char('.'),
960        take_while1(|c: char| c.is_ascii_digit()),
961    )))(input)?;
962
963    Ok((input, number.parse().unwrap()))
964}
965
966// Parse a boolean literal
967fn boolean_literal(input: &str) -> IResult<&str, bool> {
968    alt((
969        map(tag_no_case("true"), |_| true),
970        map(tag_no_case("false"), |_| false),
971    ))(input)
972}
973
974// Parse a parameter reference
975fn parameter(input: &str) -> IResult<&str, String> {
976    let (input, _) = char('$')(input)?;
977    let (input, name) = identifier(input)?;
978    Ok((input, name.to_string()))
979}
980
981// Parse comma with optional whitespace
982fn comma_ws(input: &str) -> IResult<&str, ()> {
983    let (input, _) = multispace0(input)?;
984    let (input, _) = char(',')(input)?;
985    let (input, _) = multispace0(input)?;
986    Ok((input, ()))
987}
988
989// Parse variable-length path syntax: *1..2, *..3, *2.., *
990fn length_range(input: &str) -> IResult<&str, LengthRange> {
991    let (input, _) = char('*')(input)?;
992    let (input, _) = multispace0(input)?;
993
994    // Parse different length patterns
995    alt((
996        // *min..max (e.g., *1..3)
997        map(
998            tuple((
999                nom::character::complete::u32,
1000                tag(".."),
1001                nom::character::complete::u32,
1002            )),
1003            |(min, _, max)| LengthRange {
1004                min: Some(min),
1005                max: Some(max),
1006            },
1007        ),
1008        // *..max (e.g., *..3)
1009        map(preceded(tag(".."), nom::character::complete::u32), |max| {
1010            LengthRange {
1011                min: None,
1012                max: Some(max),
1013            }
1014        }),
1015        // *min.. (e.g., *2..)
1016        map(
1017            tuple((nom::character::complete::u32, tag(".."))),
1018            |(min, _)| LengthRange {
1019                min: Some(min),
1020                max: None,
1021            },
1022        ),
1023        // *min (e.g., *2)
1024        map(nom::character::complete::u32, |min| LengthRange {
1025            min: Some(min),
1026            max: Some(min),
1027        }),
1028        // * (unlimited)
1029        map(multispace0, |_| LengthRange {
1030            min: None,
1031            max: None,
1032        }),
1033    ))(input)
1034}
1035
1036#[cfg(test)]
1037mod tests {
1038    use super::*;
1039    use crate::ast::{BooleanExpression, ComparisonOperator, PropertyValue, ValueExpression};
1040
1041    #[test]
1042    fn test_parse_simple_node_query() {
1043        let query = "MATCH (n:Person) RETURN n.name";
1044        let result = parse_cypher_query(query).unwrap();
1045
1046        assert_eq!(result.reading_clauses.len(), 1);
1047        assert_eq!(result.return_clause.items.len(), 1);
1048    }
1049
1050    #[test]
1051    fn test_parse_node_with_properties() {
1052        let query = r#"MATCH (n:Person {name: "John", age: 30}) RETURN n"#;
1053        let result = parse_cypher_query(query).unwrap();
1054
1055        if let ReadingClause::Match(match_clause) = &result.reading_clauses[0] {
1056            if let GraphPattern::Node(node) = &match_clause.patterns[0] {
1057                assert_eq!(node.labels, vec!["Person"]);
1058                assert_eq!(node.properties.len(), 2);
1059            } else {
1060                panic!("Expected node pattern");
1061            }
1062        } else {
1063            panic!("Expected match clause");
1064        }
1065    }
1066
1067    #[test]
1068    fn test_parse_simple_relationship_query() {
1069        let query = "MATCH (a:Person)-[r:KNOWS]->(b:Person) RETURN a.name, b.name";
1070        let result = parse_cypher_query(query).unwrap();
1071
1072        assert_eq!(result.reading_clauses.len(), 1);
1073        assert_eq!(result.return_clause.items.len(), 2);
1074
1075        if let ReadingClause::Match(match_clause) = &result.reading_clauses[0] {
1076            if let GraphPattern::Path(path) = &match_clause.patterns[0] {
1077                assert_eq!(path.segments.len(), 1);
1078                assert_eq!(path.segments[0].relationship.types, vec!["KNOWS"]);
1079            } else {
1080                panic!("Expected path pattern");
1081            }
1082        } else {
1083            panic!("Expected match clause");
1084        }
1085    }
1086
1087    #[test]
1088    fn test_parse_variable_length_path() {
1089        let query = "MATCH (a:Person)-[:FRIEND_OF*1..2]-(b:Person) RETURN a.name, b.name";
1090        let result = parse_cypher_query(query).unwrap();
1091
1092        assert_eq!(result.reading_clauses.len(), 1);
1093
1094        if let ReadingClause::Match(match_clause) = &result.reading_clauses[0] {
1095            if let GraphPattern::Path(path) = &match_clause.patterns[0] {
1096                assert_eq!(path.segments.len(), 1);
1097                assert_eq!(path.segments[0].relationship.types, vec!["FRIEND_OF"]);
1098
1099                let length = path.segments[0].relationship.length.as_ref().unwrap();
1100                assert_eq!(length.min, Some(1));
1101                assert_eq!(length.max, Some(2));
1102            } else {
1103                panic!("Expected path pattern");
1104            }
1105        } else {
1106            panic!("Expected match clause");
1107        }
1108    }
1109
1110    #[test]
1111    fn test_parse_query_with_where_clause() {
1112        let query = "MATCH (n:Person) WHERE n.age > 30 RETURN n.name";
1113        let result = parse_cypher_query(query).unwrap();
1114
1115        assert!(result.where_clause.is_some());
1116    }
1117
1118    #[test]
1119    fn test_parse_query_with_single_quoted_literal() {
1120        let query = "MATCH (n:Person) WHERE n.name = 'Alice' RETURN n.name";
1121        let result = parse_cypher_query(query).unwrap();
1122
1123        assert!(result.where_clause.is_some());
1124    }
1125
1126    #[test]
1127    fn test_parse_query_with_and_conditions() {
1128        let query = "MATCH (src:Entity)-[rel:RELATIONSHIP]->(dst:Entity) WHERE rel.relationship_type = 'WORKS_ON' AND dst.name_lower = 'presto' RETURN src.name, src.entity_id";
1129        let result = parse_cypher_query(query).unwrap();
1130
1131        let where_clause = result.where_clause.expect("Expected WHERE clause");
1132        match where_clause.expression {
1133            BooleanExpression::And(left, right) => {
1134                match *left {
1135                    BooleanExpression::Comparison {
1136                        left: ValueExpression::Property(ref prop),
1137                        operator,
1138                        right: ValueExpression::Literal(PropertyValue::String(ref value)),
1139                    } => {
1140                        assert_eq!(prop.variable, "rel");
1141                        assert_eq!(prop.property, "relationship_type");
1142                        assert_eq!(operator, ComparisonOperator::Equal);
1143                        assert_eq!(value, "WORKS_ON");
1144                    }
1145                    _ => panic!("Expected comparison for relationship_type filter"),
1146                }
1147
1148                match *right {
1149                    BooleanExpression::Comparison {
1150                        left: ValueExpression::Property(ref prop),
1151                        operator,
1152                        right: ValueExpression::Literal(PropertyValue::String(ref value)),
1153                    } => {
1154                        assert_eq!(prop.variable, "dst");
1155                        assert_eq!(prop.property, "name_lower");
1156                        assert_eq!(operator, ComparisonOperator::Equal);
1157                        assert_eq!(value, "presto");
1158                    }
1159                    _ => panic!("Expected comparison for destination name filter"),
1160                }
1161            }
1162            other => panic!("Expected AND expression, got {:?}", other),
1163        }
1164    }
1165
1166    #[test]
1167    fn test_parse_query_with_in_clause() {
1168        let query = "MATCH (src:Entity)-[rel:RELATIONSHIP]->(dst:Entity) WHERE rel.relationship_type IN ['WORKS_FOR', 'PART_OF'] RETURN src.name";
1169        let result = parse_cypher_query(query).unwrap();
1170
1171        let where_clause = result.where_clause.expect("Expected WHERE clause");
1172        match where_clause.expression {
1173            BooleanExpression::In { expression, list } => {
1174                match expression {
1175                    ValueExpression::Property(prop_ref) => {
1176                        assert_eq!(prop_ref.variable, "rel");
1177                        assert_eq!(prop_ref.property, "relationship_type");
1178                    }
1179                    _ => panic!("Expected property reference in IN expression"),
1180                }
1181                assert_eq!(list.len(), 2);
1182                match &list[0] {
1183                    ValueExpression::Literal(PropertyValue::String(val)) => {
1184                        assert_eq!(val, "WORKS_FOR");
1185                    }
1186                    _ => panic!("Expected first list item to be a string literal"),
1187                }
1188                match &list[1] {
1189                    ValueExpression::Literal(PropertyValue::String(val)) => {
1190                        assert_eq!(val, "PART_OF");
1191                    }
1192                    _ => panic!("Expected second list item to be a string literal"),
1193                }
1194            }
1195            other => panic!("Expected IN expression, got {:?}", other),
1196        }
1197    }
1198
1199    #[test]
1200    fn test_parse_query_with_is_null() {
1201        let query = "MATCH (n:Person) WHERE n.age IS NULL RETURN n.name";
1202        let result = parse_cypher_query(query).unwrap();
1203
1204        let where_clause = result.where_clause.expect("Expected WHERE clause");
1205
1206        match where_clause.expression {
1207            BooleanExpression::IsNull(expr) => match expr {
1208                ValueExpression::Property(prop_ref) => {
1209                    assert_eq!(prop_ref.variable, "n");
1210                    assert_eq!(prop_ref.property, "age");
1211                }
1212                _ => panic!("Expected property reference in IS NULL expression"),
1213            },
1214            other => panic!("Expected IS NULL expression, got {:?}", other),
1215        }
1216    }
1217
1218    #[test]
1219    fn test_parse_query_with_is_not_null() {
1220        let query = "MATCH (n:Person) WHERE n.age IS NOT NULL RETURN n.name";
1221        let result = parse_cypher_query(query).unwrap();
1222
1223        let where_clause = result.where_clause.expect("Expected WHERE clause");
1224
1225        match where_clause.expression {
1226            BooleanExpression::IsNotNull(expr) => match expr {
1227                ValueExpression::Property(prop_ref) => {
1228                    assert_eq!(prop_ref.variable, "n");
1229                    assert_eq!(prop_ref.property, "age");
1230                }
1231                _ => panic!("Expected property reference in IS NOT NULL expression"),
1232            },
1233            other => panic!("Expected IS NOT NULL expression, got {:?}", other),
1234        }
1235    }
1236
1237    #[test]
1238    fn test_parse_query_with_limit() {
1239        let query = "MATCH (n:Person) RETURN n.name LIMIT 10";
1240        let result = parse_cypher_query(query).unwrap();
1241
1242        assert_eq!(result.limit, Some(10));
1243    }
1244
1245    #[test]
1246    fn test_parse_query_with_skip() {
1247        let query = "MATCH (n:Person) RETURN n.name SKIP 5";
1248        let result = parse_cypher_query(query).unwrap();
1249
1250        assert_eq!(result.skip, Some(5));
1251        assert_eq!(result.limit, None);
1252    }
1253
1254    #[test]
1255    fn test_parse_query_with_skip_and_limit() {
1256        let query = "MATCH (n:Person) RETURN n.name SKIP 5 LIMIT 10";
1257        let result = parse_cypher_query(query).unwrap();
1258
1259        assert_eq!(result.skip, Some(5));
1260        assert_eq!(result.limit, Some(10));
1261    }
1262
1263    #[test]
1264    fn test_parse_query_with_skip_and_order_by() {
1265        let query = "MATCH (n:Person) RETURN n.name ORDER BY n.age SKIP 5";
1266        let result = parse_cypher_query(query).unwrap();
1267
1268        assert_eq!(result.skip, Some(5));
1269        assert!(result.order_by.is_some());
1270    }
1271
1272    #[test]
1273    fn test_parse_query_with_skip_order_by_and_limit() {
1274        let query = "MATCH (n:Person) RETURN n.name ORDER BY n.age SKIP 5 LIMIT 10";
1275        let result = parse_cypher_query(query).unwrap();
1276
1277        assert_eq!(result.skip, Some(5));
1278        assert_eq!(result.limit, Some(10));
1279        assert!(result.order_by.is_some());
1280    }
1281
1282    #[test]
1283    fn test_parse_count_star() {
1284        let query = "MATCH (n:Person) RETURN count(*) AS total";
1285        let result = parse_cypher_query(query).unwrap();
1286
1287        assert_eq!(result.return_clause.items.len(), 1);
1288        let item = &result.return_clause.items[0];
1289        assert_eq!(item.alias, Some("total".to_string()));
1290
1291        match &item.expression {
1292            ValueExpression::AggregateFunction { name, args, .. } => {
1293                assert_eq!(name, "count");
1294                assert_eq!(args.len(), 1);
1295                match &args[0] {
1296                    ValueExpression::Variable(v) => assert_eq!(v, "*"),
1297                    _ => panic!("Expected Variable(*) in count(*)"),
1298                }
1299            }
1300            _ => panic!("Expected AggregateFunction expression"),
1301        }
1302    }
1303
1304    #[test]
1305    fn test_parse_count_property() {
1306        let query = "MATCH (n:Person) RETURN count(n.age)";
1307        let result = parse_cypher_query(query).unwrap();
1308
1309        assert_eq!(result.return_clause.items.len(), 1);
1310        let item = &result.return_clause.items[0];
1311
1312        match &item.expression {
1313            ValueExpression::AggregateFunction { name, args, .. } => {
1314                assert_eq!(name, "count");
1315                assert_eq!(args.len(), 1);
1316                match &args[0] {
1317                    ValueExpression::Property(prop) => {
1318                        assert_eq!(prop.variable, "n");
1319                        assert_eq!(prop.property, "age");
1320                    }
1321                    _ => panic!("Expected Property in count(n.age)"),
1322                }
1323            }
1324            _ => panic!("Expected AggregateFunction expression"),
1325        }
1326    }
1327
1328    #[test]
1329    fn test_parse_non_count_function_rejects_star() {
1330        // FOO(*) should fail to parse since * is only allowed for COUNT
1331        let query = "MATCH (n:Person) RETURN foo(*)";
1332        let result = parse_cypher_query(query);
1333        assert!(result.is_err(), "foo(*) should not parse successfully");
1334    }
1335
1336    #[test]
1337    fn test_parse_count_with_multiple_args() {
1338        // COUNT with multiple arguments parses successfully
1339        // but will be rejected during semantic validation
1340        let query = "MATCH (n:Person) RETURN count(n.age, n.name)";
1341        let result = parse_cypher_query(query);
1342        assert!(
1343            result.is_ok(),
1344            "Parser should accept multiple args (validation happens in semantic phase)"
1345        );
1346
1347        // Verify the AST structure
1348        let ast = result.unwrap();
1349        match &ast.return_clause.items[0].expression {
1350            ValueExpression::AggregateFunction { name, args, .. } => {
1351                assert_eq!(name, "count");
1352                assert_eq!(args.len(), 2);
1353            }
1354            _ => panic!("Expected AggregateFunction expression"),
1355        }
1356    }
1357
1358    #[test]
1359    fn test_parser_rejects_distinct_on_scalar() {
1360        // Parser should reject DISTINCT on scalar functions at parse time
1361        let query = "RETURN toLower(DISTINCT p.name)";
1362        let result = parse_cypher_query(query);
1363        assert!(
1364            result.is_err(),
1365            "Parser should reject DISTINCT on scalar functions"
1366        );
1367
1368        let query2 = "RETURN upper(DISTINCT p.name)";
1369        let result2 = parse_cypher_query(query2);
1370        assert!(
1371            result2.is_err(),
1372            "Parser should reject DISTINCT on scalar functions"
1373        );
1374    }
1375
1376    #[test]
1377    fn test_parse_like_pattern() {
1378        let query = "MATCH (n:Person) WHERE n.name LIKE 'A%' RETURN n.name";
1379        let result = parse_cypher_query(query);
1380        assert!(result.is_ok(), "LIKE pattern should parse successfully");
1381
1382        let ast = result.unwrap();
1383        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1384
1385        match where_clause.expression {
1386            BooleanExpression::Like {
1387                expression,
1388                pattern,
1389            } => {
1390                match expression {
1391                    ValueExpression::Property(prop) => {
1392                        assert_eq!(prop.variable, "n");
1393                        assert_eq!(prop.property, "name");
1394                    }
1395                    _ => panic!("Expected property expression"),
1396                }
1397                assert_eq!(pattern, "A%");
1398            }
1399            _ => panic!("Expected LIKE expression"),
1400        }
1401    }
1402
1403    #[test]
1404    fn test_parse_like_with_double_quotes() {
1405        let query = r#"MATCH (n:Person) WHERE n.email LIKE "%@example.com" RETURN n.email"#;
1406        let result = parse_cypher_query(query);
1407        assert!(result.is_ok(), "LIKE with double quotes should parse");
1408
1409        let ast = result.unwrap();
1410        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1411
1412        match where_clause.expression {
1413            BooleanExpression::Like { pattern, .. } => {
1414                assert_eq!(pattern, "%@example.com");
1415            }
1416            _ => panic!("Expected LIKE expression"),
1417        }
1418    }
1419
1420    #[test]
1421    fn test_parse_like_in_complex_where() {
1422        let query = "MATCH (n:Person) WHERE n.age > 20 AND n.name LIKE 'J%' RETURN n.name";
1423        let result = parse_cypher_query(query);
1424        assert!(result.is_ok(), "LIKE in complex WHERE should parse");
1425
1426        let ast = result.unwrap();
1427        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1428
1429        match where_clause.expression {
1430            BooleanExpression::And(left, right) => {
1431                // Left should be age > 20
1432                match *left {
1433                    BooleanExpression::Comparison { .. } => {}
1434                    _ => panic!("Expected comparison on left"),
1435                }
1436                // Right should be LIKE
1437                match *right {
1438                    BooleanExpression::Like { pattern, .. } => {
1439                        assert_eq!(pattern, "J%");
1440                    }
1441                    _ => panic!("Expected LIKE expression on right"),
1442                }
1443            }
1444            _ => panic!("Expected AND expression"),
1445        }
1446    }
1447
1448    #[test]
1449    fn test_parse_contains() {
1450        let query = "MATCH (n:Person) WHERE n.name CONTAINS 'Jo' RETURN n.name";
1451        let result = parse_cypher_query(query);
1452        assert!(result.is_ok());
1453
1454        let query = result.unwrap();
1455        assert!(query.where_clause.is_some());
1456
1457        match &query.where_clause.unwrap().expression {
1458            BooleanExpression::Contains {
1459                expression,
1460                substring,
1461            } => {
1462                assert_eq!(substring, "Jo");
1463                match expression {
1464                    ValueExpression::Property(prop) => {
1465                        assert_eq!(prop.variable, "n");
1466                        assert_eq!(prop.property, "name");
1467                    }
1468                    _ => panic!("Expected property reference"),
1469                }
1470            }
1471            _ => panic!("Expected CONTAINS expression"),
1472        }
1473    }
1474
1475    #[test]
1476    fn test_parse_starts_with() {
1477        let query = "MATCH (n:Person) WHERE n.name STARTS WITH 'Alice' RETURN n.name";
1478        let result = parse_cypher_query(query);
1479        assert!(result.is_ok());
1480
1481        let query = result.unwrap();
1482        assert!(query.where_clause.is_some());
1483
1484        match &query.where_clause.unwrap().expression {
1485            BooleanExpression::StartsWith { expression, prefix } => {
1486                assert_eq!(prefix, "Alice");
1487                match expression {
1488                    ValueExpression::Property(prop) => {
1489                        assert_eq!(prop.variable, "n");
1490                        assert_eq!(prop.property, "name");
1491                    }
1492                    _ => panic!("Expected property reference"),
1493                }
1494            }
1495            _ => panic!("Expected STARTS WITH expression"),
1496        }
1497    }
1498
1499    #[test]
1500    fn test_parse_ends_with() {
1501        let query = "MATCH (n:Person) WHERE n.email ENDS WITH '@example.com' RETURN n.email";
1502        let result = parse_cypher_query(query);
1503        assert!(result.is_ok());
1504
1505        let query = result.unwrap();
1506        assert!(query.where_clause.is_some());
1507
1508        match &query.where_clause.unwrap().expression {
1509            BooleanExpression::EndsWith { expression, suffix } => {
1510                assert_eq!(suffix, "@example.com");
1511                match expression {
1512                    ValueExpression::Property(prop) => {
1513                        assert_eq!(prop.variable, "n");
1514                        assert_eq!(prop.property, "email");
1515                    }
1516                    _ => panic!("Expected property reference"),
1517                }
1518            }
1519            _ => panic!("Expected ENDS WITH expression"),
1520        }
1521    }
1522
1523    #[test]
1524    fn test_parse_contains_case_insensitive_keyword() {
1525        let query = "MATCH (n:Person) WHERE n.name contains 'test' RETURN n.name";
1526        let result = parse_cypher_query(query);
1527        assert!(result.is_ok());
1528
1529        match &result.unwrap().where_clause.unwrap().expression {
1530            BooleanExpression::Contains { substring, .. } => {
1531                assert_eq!(substring, "test");
1532            }
1533            _ => panic!("Expected CONTAINS expression"),
1534        }
1535    }
1536
1537    #[test]
1538    fn test_parse_string_operators_in_complex_where() {
1539        let query =
1540            "MATCH (n:Person) WHERE n.name CONTAINS 'Jo' AND n.email ENDS WITH '.com' RETURN n";
1541        let result = parse_cypher_query(query);
1542        assert!(result.is_ok());
1543
1544        match &result.unwrap().where_clause.unwrap().expression {
1545            BooleanExpression::And(left, right) => {
1546                // Left should be CONTAINS
1547                match **left {
1548                    BooleanExpression::Contains { ref substring, .. } => {
1549                        assert_eq!(substring, "Jo");
1550                    }
1551                    _ => panic!("Expected CONTAINS expression on left"),
1552                }
1553                // Right should be ENDS WITH
1554                match **right {
1555                    BooleanExpression::EndsWith { ref suffix, .. } => {
1556                        assert_eq!(suffix, ".com");
1557                    }
1558                    _ => panic!("Expected ENDS WITH expression on right"),
1559                }
1560            }
1561            _ => panic!("Expected AND expression"),
1562        }
1563    }
1564
1565    #[test]
1566    fn test_parse_ilike_pattern() {
1567        let query = "MATCH (n:Person) WHERE n.name ILIKE 'alice%' RETURN n.name";
1568        let result = parse_cypher_query(query);
1569        assert!(result.is_ok(), "ILIKE pattern should parse successfully");
1570
1571        let ast = result.unwrap();
1572        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1573
1574        match where_clause.expression {
1575            BooleanExpression::ILike {
1576                expression,
1577                pattern,
1578            } => {
1579                match expression {
1580                    ValueExpression::Property(prop) => {
1581                        assert_eq!(prop.variable, "n");
1582                        assert_eq!(prop.property, "name");
1583                    }
1584                    _ => panic!("Expected property expression"),
1585                }
1586                assert_eq!(pattern, "alice%");
1587            }
1588            _ => panic!("Expected ILIKE expression"),
1589        }
1590    }
1591
1592    #[test]
1593    fn test_parse_like_and_ilike_together() {
1594        let query =
1595            "MATCH (n:Person) WHERE n.name LIKE 'Alice%' OR n.name ILIKE 'bob%' RETURN n.name";
1596        let result = parse_cypher_query(query);
1597        assert!(result.is_ok(), "LIKE and ILIKE together should parse");
1598
1599        let ast = result.unwrap();
1600        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1601
1602        match where_clause.expression {
1603            BooleanExpression::Or(left, right) => {
1604                // Left should be LIKE (case-sensitive)
1605                match *left {
1606                    BooleanExpression::Like { pattern, .. } => {
1607                        assert_eq!(pattern, "Alice%");
1608                    }
1609                    _ => panic!("Expected LIKE expression on left"),
1610                }
1611                // Right should be ILIKE (case-insensitive)
1612                match *right {
1613                    BooleanExpression::ILike { pattern, .. } => {
1614                        assert_eq!(pattern, "bob%");
1615                    }
1616                    _ => panic!("Expected ILIKE expression on right"),
1617                }
1618            }
1619            _ => panic!("Expected OR expression"),
1620        }
1621    }
1622
1623    #[test]
1624    fn test_parse_vector_distance() {
1625        let query = "MATCH (p:Person) WHERE vector_distance(p.embedding, $query_vec, cosine) < 0.5 RETURN p.name";
1626        let result = parse_cypher_query(query);
1627        assert!(result.is_ok(), "vector_distance should parse successfully");
1628
1629        let ast = result.unwrap();
1630        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1631
1632        // Verify it's a comparison with vector_distance
1633        match where_clause.expression {
1634            BooleanExpression::Comparison { left, operator, .. } => {
1635                match left {
1636                    ValueExpression::VectorDistance {
1637                        left,
1638                        right,
1639                        metric,
1640                    } => {
1641                        assert_eq!(metric, DistanceMetric::Cosine);
1642                        // Verify left is property reference
1643                        assert!(matches!(*left, ValueExpression::Property(_)));
1644                        // Verify right is parameter
1645                        assert!(matches!(*right, ValueExpression::Parameter(_)));
1646                    }
1647                    _ => panic!("Expected VectorDistance"),
1648                }
1649                assert_eq!(operator, ComparisonOperator::LessThan);
1650            }
1651            _ => panic!("Expected comparison"),
1652        }
1653    }
1654
1655    #[test]
1656    fn test_parse_vector_similarity() {
1657        let query =
1658            "MATCH (p:Person) WHERE vector_similarity(p.embedding, $vec, l2) > 0.8 RETURN p";
1659        let result = parse_cypher_query(query);
1660        assert!(
1661            result.is_ok(),
1662            "vector_similarity should parse successfully"
1663        );
1664
1665        let ast = result.unwrap();
1666        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1667
1668        match where_clause.expression {
1669            BooleanExpression::Comparison { left, operator, .. } => {
1670                match left {
1671                    ValueExpression::VectorSimilarity { metric, .. } => {
1672                        assert_eq!(metric, DistanceMetric::L2);
1673                    }
1674                    _ => panic!("Expected VectorSimilarity"),
1675                }
1676                assert_eq!(operator, ComparisonOperator::GreaterThan);
1677            }
1678            _ => panic!("Expected comparison"),
1679        }
1680    }
1681
1682    #[test]
1683    fn test_parse_parameter() {
1684        let query = "MATCH (p:Person) WHERE p.age = $min_age RETURN p";
1685        let result = parse_cypher_query(query);
1686        assert!(result.is_ok(), "Parameter should parse successfully");
1687
1688        let ast = result.unwrap();
1689        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1690
1691        match where_clause.expression {
1692            BooleanExpression::Comparison { right, .. } => match right {
1693                ValueExpression::Parameter(name) => {
1694                    assert_eq!(name, "min_age");
1695                }
1696                _ => panic!("Expected Parameter"),
1697            },
1698            _ => panic!("Expected comparison"),
1699        }
1700    }
1701
1702    #[test]
1703    fn test_vector_distance_metrics() {
1704        for metric in &["cosine", "l2", "dot"] {
1705            let query = format!(
1706                "MATCH (p:Person) RETURN vector_distance(p.emb, $v, {}) AS dist",
1707                metric
1708            );
1709            let result = parse_cypher_query(&query);
1710            assert!(result.is_ok(), "Failed to parse metric: {}", metric);
1711
1712            let ast = result.unwrap();
1713            let return_item = &ast.return_clause.items[0];
1714
1715            match &return_item.expression {
1716                ValueExpression::VectorDistance {
1717                    metric: parsed_metric,
1718                    ..
1719                } => {
1720                    let expected = match *metric {
1721                        "cosine" => DistanceMetric::Cosine,
1722                        "l2" => DistanceMetric::L2,
1723                        "dot" => DistanceMetric::Dot,
1724                        _ => panic!("Unexpected metric"),
1725                    };
1726                    assert_eq!(*parsed_metric, expected);
1727                }
1728                _ => panic!("Expected VectorDistance"),
1729            }
1730        }
1731    }
1732
1733    #[test]
1734    fn test_vector_search_in_order_by() {
1735        let query = "MATCH (p:Person) RETURN p.name ORDER BY vector_distance(p.embedding, $query_vec, cosine) ASC LIMIT 10";
1736        let result = parse_cypher_query(query);
1737        assert!(result.is_ok(), "vector_distance in ORDER BY should parse");
1738
1739        let ast = result.unwrap();
1740        let order_by = ast.order_by.expect("Expected ORDER BY clause");
1741
1742        assert_eq!(order_by.items.len(), 1);
1743        match &order_by.items[0].expression {
1744            ValueExpression::VectorDistance { .. } => {
1745                // Success
1746            }
1747            _ => panic!("Expected VectorDistance in ORDER BY"),
1748        }
1749    }
1750
1751    #[test]
1752    fn test_hybrid_query_with_vector_and_property_filters() {
1753        let query = "MATCH (p:Person) WHERE p.age > 25 AND vector_similarity(p.embedding, $query_vec, cosine) > 0.7 RETURN p.name";
1754        let result = parse_cypher_query(query);
1755        assert!(result.is_ok(), "Hybrid query should parse");
1756
1757        let ast = result.unwrap();
1758        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1759
1760        // Should be an AND expression
1761        match where_clause.expression {
1762            BooleanExpression::And(left, right) => {
1763                // Left should be age > 25
1764                match *left {
1765                    BooleanExpression::Comparison { .. } => {}
1766                    _ => panic!("Expected comparison on left"),
1767                }
1768                // Right should be vector_similarity > 0.7
1769                match *right {
1770                    BooleanExpression::Comparison { left, .. } => match left {
1771                        ValueExpression::VectorSimilarity { .. } => {}
1772                        _ => panic!("Expected VectorSimilarity"),
1773                    },
1774                    _ => panic!("Expected comparison on right"),
1775                }
1776            }
1777            _ => panic!("Expected AND expression"),
1778        }
1779    }
1780
1781    #[test]
1782    fn test_parse_vector_literal() {
1783        let result = parse_vector_literal("[0.1, 0.2, 0.3]");
1784        assert!(result.is_ok());
1785        let (_, expr) = result.unwrap();
1786        match expr {
1787            ValueExpression::VectorLiteral(vec) => {
1788                assert_eq!(vec.len(), 3);
1789                assert_eq!(vec[0], 0.1);
1790                assert_eq!(vec[1], 0.2);
1791                assert_eq!(vec[2], 0.3);
1792            }
1793            _ => panic!("Expected VectorLiteral"),
1794        }
1795    }
1796
1797    #[test]
1798    fn test_parse_vector_literal_with_negative_values() {
1799        let result = parse_vector_literal("[-0.1, 0.2, -0.3]");
1800        assert!(result.is_ok());
1801        let (_, expr) = result.unwrap();
1802        match expr {
1803            ValueExpression::VectorLiteral(vec) => {
1804                assert_eq!(vec.len(), 3);
1805                assert_eq!(vec[0], -0.1);
1806                assert_eq!(vec[2], -0.3);
1807            }
1808            _ => panic!("Expected VectorLiteral"),
1809        }
1810    }
1811
1812    #[test]
1813    fn test_parse_vector_literal_scientific_notation() {
1814        let result = parse_vector_literal("[1e-3, 2.5e2, -3e-1]");
1815        assert!(result.is_ok());
1816        let (_, expr) = result.unwrap();
1817        match expr {
1818            ValueExpression::VectorLiteral(vec) => {
1819                assert_eq!(vec.len(), 3);
1820                assert!((vec[0] - 0.001).abs() < 1e-6);
1821                assert!((vec[1] - 250.0).abs() < 1e-6);
1822                assert!((vec[2] - (-0.3)).abs() < 1e-6);
1823            }
1824            _ => panic!("Expected VectorLiteral"),
1825        }
1826    }
1827
1828    #[test]
1829    fn test_vector_distance_with_literal() {
1830        let query =
1831            "MATCH (p:Person) WHERE vector_distance(p.embedding, [0.1, 0.2], l2) < 0.5 RETURN p";
1832        let result = parse_cypher_query(query);
1833        assert!(result.is_ok());
1834
1835        let ast = result.unwrap();
1836        let where_clause = ast.where_clause.expect("Expected WHERE clause");
1837
1838        match where_clause.expression {
1839            BooleanExpression::Comparison { left, operator, .. } => {
1840                match left {
1841                    ValueExpression::VectorDistance {
1842                        left,
1843                        right,
1844                        metric,
1845                    } => {
1846                        // Left should be property reference
1847                        assert!(matches!(*left, ValueExpression::Property(_)));
1848                        // Right should be vector literal
1849                        match *right {
1850                            ValueExpression::VectorLiteral(vec) => {
1851                                assert_eq!(vec.len(), 2);
1852                                assert_eq!(vec[0], 0.1);
1853                                assert_eq!(vec[1], 0.2);
1854                            }
1855                            _ => panic!("Expected VectorLiteral"),
1856                        }
1857                        assert_eq!(metric, DistanceMetric::L2);
1858                    }
1859                    _ => panic!("Expected VectorDistance"),
1860                }
1861                assert_eq!(operator, ComparisonOperator::LessThan);
1862            }
1863            _ => panic!("Expected comparison"),
1864        }
1865    }
1866
1867    // UNWIND parser tests
1868    #[test]
1869    fn test_parse_unwind_simple() {
1870        let query = "UNWIND [1, 2, 3] AS num RETURN num";
1871        let ast = parse_cypher_query(query);
1872        assert!(ast.is_ok(), "Failed to parse simple UNWIND query");
1873    }
1874
1875    #[test]
1876    fn test_parse_unwind_after_match() {
1877        let query = "MATCH (n) UNWIND n.list AS item RETURN item";
1878        let ast = parse_cypher_query(query);
1879        assert!(ast.is_ok(), "Failed to parse UNWIND after MATCH");
1880    }
1881}