Skip to main content

lance_graph/
semantic.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Semantic analysis for graph queries
5//!
6//! This module implements the semantic analysis phase of the query pipeline:
7//! Parse → **Semantic Analysis** → Logical Plan → Physical Plan
8//!
9//! Semantic analysis validates the query and enriches the AST with type information.
10
11use crate::ast::*;
12use crate::case_insensitive::CaseInsensitiveLookup;
13use crate::config::GraphConfig;
14use crate::error::{GraphError, Result};
15use std::collections::{HashMap, HashSet};
16
17/// Semantic analyzer - validates and enriches the AST
18pub struct SemanticAnalyzer {
19    config: GraphConfig,
20    variables: HashMap<String, VariableInfo>,
21    current_scope: ScopeType,
22}
23
24/// Information about a variable in the query
25#[derive(Debug, Clone)]
26pub struct VariableInfo {
27    pub name: String,
28    pub variable_type: VariableType,
29    pub labels: Vec<String>,
30    pub properties: HashSet<String>,
31    pub defined_in: ScopeType,
32}
33
34/// Type of a variable
35#[derive(Debug, Clone, PartialEq)]
36pub enum VariableType {
37    Node,
38    Relationship,
39    Path,
40    Property,
41}
42
43/// Scope where a variable is defined
44#[derive(Debug, Clone, PartialEq)]
45pub enum ScopeType {
46    Match,
47    Where,
48    With,
49    PostWithWhere,
50    Return,
51    OrderBy,
52}
53
54/// Semantic analysis result with validated and enriched AST
55#[derive(Debug, Clone)]
56pub struct SemanticResult {
57    pub variables: HashMap<String, VariableInfo>,
58    pub errors: Vec<String>,
59    pub warnings: Vec<String>,
60}
61
62impl SemanticAnalyzer {
63    pub fn new(config: GraphConfig) -> Self {
64        Self {
65            config,
66            variables: HashMap::new(),
67            current_scope: ScopeType::Match,
68        }
69    }
70
71    /// Analyze a Cypher query AST
72    pub fn analyze(&mut self, query: &CypherQuery) -> Result<SemanticResult> {
73        let mut errors = Vec::new();
74        let mut warnings = Vec::new();
75
76        // Phase 1: Variable discovery in READING clauses (MATCH/UNWIND)
77        self.current_scope = ScopeType::Match;
78        for clause in &query.reading_clauses {
79            match clause {
80                ReadingClause::Match(match_clause) => {
81                    if let Err(e) = self.analyze_match_clause(match_clause) {
82                        errors.push(format!("MATCH clause error: {}", e));
83                    }
84                }
85                ReadingClause::Unwind(unwind_clause) => {
86                    if let Err(e) = self.analyze_unwind_clause(unwind_clause) {
87                        errors.push(format!("UNWIND clause error: {}", e));
88                    }
89                }
90            }
91        }
92
93        // Phase 2: Validate WHERE clause (before WITH)
94        if let Some(where_clause) = &query.where_clause {
95            self.current_scope = ScopeType::Where;
96            if let Err(e) = self.analyze_where_clause(where_clause) {
97                errors.push(format!("WHERE clause error: {}", e));
98            }
99        }
100
101        // Phase 3: Validate WITH clause if present
102        if let Some(with_clause) = &query.with_clause {
103            self.current_scope = ScopeType::With;
104            if let Err(e) = self.analyze_with_clause(with_clause) {
105                errors.push(format!("WITH clause error: {}", e));
106            }
107        }
108
109        // Phase 4: Variable discovery in post-WITH READING clauses (query chaining)
110        self.current_scope = ScopeType::Match;
111        for clause in &query.post_with_reading_clauses {
112            match clause {
113                ReadingClause::Match(match_clause) => {
114                    if let Err(e) = self.analyze_match_clause(match_clause) {
115                        errors.push(format!("Post-WITH MATCH clause error: {}", e));
116                    }
117                }
118                ReadingClause::Unwind(unwind_clause) => {
119                    if let Err(e) = self.analyze_unwind_clause(unwind_clause) {
120                        errors.push(format!("Post-WITH UNWIND clause error: {}", e));
121                    }
122                }
123            }
124        }
125
126        // Phase 4: Validate post-WITH WHERE clause if present
127        if let Some(post_where) = &query.post_with_where_clause {
128            self.current_scope = ScopeType::PostWithWhere;
129            if let Err(e) = self.analyze_where_clause(post_where) {
130                errors.push(format!("Post-WITH WHERE clause error: {}", e));
131            }
132        }
133
134        // Phase 5: Validate RETURN clause
135        self.current_scope = ScopeType::Return;
136        if let Err(e) = self.analyze_return_clause(&query.return_clause) {
137            errors.push(format!("RETURN clause error: {}", e));
138        }
139
140        // Phase 6: Validate ORDER BY clause
141        if let Some(order_by) = &query.order_by {
142            self.current_scope = ScopeType::OrderBy;
143            if let Err(e) = self.analyze_order_by_clause(order_by) {
144                errors.push(format!("ORDER BY clause error: {}", e));
145            }
146        }
147
148        // Phase 7: Schema validation
149        self.validate_schema(&mut warnings);
150
151        // Phase 8: Type checking
152        self.validate_types(&mut errors);
153
154        Ok(SemanticResult {
155            variables: self.variables.clone(),
156            errors,
157            warnings,
158        })
159    }
160
161    /// Analyze MATCH clause and discover variables
162    fn analyze_match_clause(&mut self, match_clause: &MatchClause) -> Result<()> {
163        for pattern in &match_clause.patterns {
164            self.analyze_graph_pattern(pattern)?;
165        }
166        Ok(())
167    }
168
169    /// Analyze UNWIND clause and register variables
170    fn analyze_unwind_clause(&mut self, unwind_clause: &UnwindClause) -> Result<()> {
171        self.analyze_value_expression(&unwind_clause.expression)?;
172
173        // Register the aliased variable (normalize to lowercase for case-insensitive behavior)
174        let var_name = &unwind_clause.alias;
175        let var_name_lower = var_name.to_lowercase();
176        if let Some(existing) = self.variables.get_mut(&var_name_lower) {
177            // Shadowing or redefinition - in Cypher variables can be bound multiple times in some contexts
178            // But here we enforce uniqueness of types mostly.
179            // For now, treat UNWIND alias as a Property type variable.
180            if existing.variable_type != VariableType::Property {
181                return Err(GraphError::PlanError {
182                    message: format!("Variable '{}' redefined with different type", var_name),
183                    location: snafu::Location::new(file!(), line!(), column!()),
184                });
185            }
186        } else {
187            let var_info = VariableInfo {
188                name: var_name.clone(),
189                variable_type: VariableType::Property,
190                labels: vec![],
191                properties: HashSet::new(),
192                defined_in: self.current_scope.clone(),
193            };
194            self.variables.insert(var_name_lower, var_info);
195        }
196        Ok(())
197    }
198
199    /// Analyze a graph pattern and register variables
200    fn analyze_graph_pattern(&mut self, pattern: &GraphPattern) -> Result<()> {
201        match pattern {
202            GraphPattern::Node(node) => {
203                self.register_node_variable(node)?;
204            }
205            GraphPattern::Path(path) => {
206                // Register start node
207                self.register_node_variable(&path.start_node)?;
208
209                // Register variables in each segment
210                for segment in &path.segments {
211                    // Validate relationship length constraints if present
212                    self.validate_length_range(&segment.relationship)?;
213                    // Register relationship variable if present
214                    if let Some(rel_var) = &segment.relationship.variable {
215                        self.register_relationship_variable(rel_var, &segment.relationship)?;
216                    }
217
218                    // Register end node
219                    self.register_node_variable(&segment.end_node)?;
220                }
221            }
222        }
223        Ok(())
224    }
225
226    /// Register a node variable
227    fn register_node_variable(&mut self, node: &NodePattern) -> Result<()> {
228        if let Some(var_name) = &node.variable {
229            // Normalize to lowercase for case-insensitive behavior
230            let var_name_lower = var_name.to_lowercase();
231            if let Some(existing) = self.variables.get_mut(&var_name_lower) {
232                if existing.variable_type != VariableType::Node {
233                    return Err(GraphError::PlanError {
234                        message: format!("Variable '{}' redefined with different type", var_name),
235                        location: snafu::Location::new(file!(), line!(), column!()),
236                    });
237                }
238                for label in &node.labels {
239                    if !existing.labels.contains(label) {
240                        existing.labels.push(label.clone());
241                    }
242                }
243                for prop in node.properties.keys() {
244                    existing.properties.insert(prop.clone());
245                }
246            } else {
247                let var_info = VariableInfo {
248                    name: var_name.clone(),
249                    variable_type: VariableType::Node,
250                    labels: node.labels.clone(),
251                    properties: node.properties.keys().cloned().collect(),
252                    defined_in: self.current_scope.clone(),
253                };
254                self.variables.insert(var_name_lower, var_info);
255            }
256        }
257        Ok(())
258    }
259
260    /// Register a relationship variable
261    fn register_relationship_variable(
262        &mut self,
263        var_name: &str,
264        rel: &RelationshipPattern,
265    ) -> Result<()> {
266        // Normalize to lowercase for case-insensitive behavior
267        let var_name_lower = var_name.to_lowercase();
268        if let Some(existing) = self.variables.get_mut(&var_name_lower) {
269            if existing.variable_type != VariableType::Relationship {
270                return Err(GraphError::PlanError {
271                    message: format!("Variable '{}' redefined with different type", var_name),
272                    location: snafu::Location::new(file!(), line!(), column!()),
273                });
274            }
275            for rel_type in &rel.types {
276                if !existing.labels.contains(rel_type) {
277                    existing.labels.push(rel_type.clone());
278                }
279            }
280            for prop in rel.properties.keys() {
281                existing.properties.insert(prop.clone());
282            }
283        } else {
284            let var_info = VariableInfo {
285                name: var_name.to_string(),
286                variable_type: VariableType::Relationship,
287                labels: rel.types.clone(), // Relationship types are like labels
288                properties: rel.properties.keys().cloned().collect(),
289                defined_in: self.current_scope.clone(),
290            };
291            self.variables.insert(var_name_lower, var_info);
292        }
293        Ok(())
294    }
295
296    /// Analyze WHERE clause
297    fn analyze_where_clause(&mut self, where_clause: &WhereClause) -> Result<()> {
298        self.analyze_boolean_expression(&where_clause.expression)
299    }
300
301    /// Analyze boolean expression and check variable references
302    fn analyze_boolean_expression(&mut self, expr: &BooleanExpression) -> Result<()> {
303        match expr {
304            BooleanExpression::Comparison { left, right, .. } => {
305                self.analyze_value_expression(left)?;
306                self.analyze_value_expression(right)?;
307            }
308            BooleanExpression::And(left, right) | BooleanExpression::Or(left, right) => {
309                self.analyze_boolean_expression(left)?;
310                self.analyze_boolean_expression(right)?;
311            }
312            BooleanExpression::Not(inner) => {
313                self.analyze_boolean_expression(inner)?;
314            }
315            BooleanExpression::Exists(prop_ref) => {
316                self.validate_property_reference(prop_ref)?;
317            }
318            BooleanExpression::In { expression, list } => {
319                self.analyze_value_expression(expression)?;
320                for item in list {
321                    self.analyze_value_expression(item)?;
322                }
323            }
324            BooleanExpression::Like { expression, .. } => {
325                self.analyze_value_expression(expression)?;
326            }
327            BooleanExpression::ILike { expression, .. } => {
328                self.analyze_value_expression(expression)?;
329            }
330            BooleanExpression::Contains { expression, .. } => {
331                self.analyze_value_expression(expression)?;
332            }
333            BooleanExpression::StartsWith { expression, .. } => {
334                self.analyze_value_expression(expression)?;
335            }
336            BooleanExpression::EndsWith { expression, .. } => {
337                self.analyze_value_expression(expression)?;
338            }
339            BooleanExpression::IsNull(expression) => {
340                self.analyze_value_expression(expression)?;
341            }
342            BooleanExpression::IsNotNull(expression) => {
343                self.analyze_value_expression(expression)?;
344            }
345        }
346        Ok(())
347    }
348
349    /// Analyze value expression and check variable references
350    fn analyze_value_expression(&mut self, expr: &ValueExpression) -> Result<()> {
351        match expr {
352            ValueExpression::Property(prop_ref) => {
353                self.validate_property_reference(prop_ref)?;
354            }
355            ValueExpression::Literal(_) => {
356                // Literals are always valid
357            }
358            ValueExpression::Variable(var) => {
359                // Use case-insensitive lookup
360                if !self.variables.contains_key_ci(var) {
361                    return Err(GraphError::PlanError {
362                        message: format!("Undefined variable: '{}'", var),
363                        location: snafu::Location::new(file!(), line!(), column!()),
364                    });
365                }
366            }
367            ValueExpression::ScalarFunction { name, args } => {
368                let function_name = name.to_lowercase();
369                // Validate arity and known functions
370                match function_name.as_str() {
371                    "tolower" | "lower" | "toupper" | "upper" => {
372                        if args.len() != 1 {
373                            return Err(GraphError::PlanError {
374                                message: format!(
375                                    "{} requires exactly 1 argument, got {}",
376                                    name.to_uppercase(),
377                                    args.len()
378                                ),
379                                location: snafu::Location::new(file!(), line!(), column!()),
380                            });
381                        }
382                    }
383                    _ => {
384                        // Unknown scalar function - reject early with helpful error
385                        return Err(GraphError::UnsupportedFeature {
386                            feature: format!(
387                                "Cypher function '{}' is not implemented. Supported scalar functions: toLower, lower, toUpper, upper. Supported aggregate functions: COUNT, SUM, AVG, MIN, MAX, COLLECT.",
388                                name
389                            ),
390                            location: snafu::Location::new(file!(), line!(), column!()),
391                        });
392                    }
393                }
394
395                // Validate arguments recursively
396                for arg in args {
397                    self.analyze_value_expression(arg)?;
398                }
399            }
400            ValueExpression::AggregateFunction {
401                name,
402                args,
403                distinct,
404            } => {
405                let function_name = name.to_lowercase();
406                // Validate known aggregate functions
407                match function_name.as_str() {
408                    "count" | "sum" | "avg" | "min" | "max" | "collect" => {
409                        // DISTINCT is only supported for COUNT
410                        // Other aggregates silently ignore it in execution, so reject early
411                        if *distinct && function_name != "count" {
412                            return Err(GraphError::UnsupportedFeature {
413                                feature: format!(
414                                    "DISTINCT is only supported with COUNT, not {}",
415                                    function_name.to_uppercase()
416                                ),
417                                location: snafu::Location::new(file!(), line!(), column!()),
418                            });
419                        }
420
421                        // COUNT(DISTINCT *) is semantically meaningless
422                        // It would count distinct values of lit(1) which is always 1
423                        if *distinct && function_name == "count" {
424                            if let Some(ValueExpression::Variable(v)) = args.first() {
425                                if v == "*" {
426                                    return Err(GraphError::PlanError {
427                                        message: "COUNT(DISTINCT *) is not supported. \
428                                            Use COUNT(*) to count all rows, or \
429                                            COUNT(DISTINCT property) to count distinct values."
430                                            .to_string(),
431                                        location: snafu::Location::new(file!(), line!(), column!()),
432                                    });
433                                }
434                            }
435                        }
436                        // All aggregates require exactly 1 argument
437                        if args.len() != 1 {
438                            return Err(GraphError::PlanError {
439                                message: format!(
440                                    "{} requires exactly 1 argument, got {}",
441                                    function_name.to_uppercase(),
442                                    args.len()
443                                ),
444                                location: snafu::Location::new(file!(), line!(), column!()),
445                            });
446                        }
447
448                        // Additional validation for SUM, AVG, MIN, MAX: they require properties, not bare variables
449                        // Only COUNT and COLLECT allow bare variables (COUNT(*), COUNT(p), COLLECT(p))
450                        if matches!(function_name.as_str(), "sum" | "avg" | "min" | "max") {
451                            if let Some(ValueExpression::Variable(v)) = args.first() {
452                                return Err(GraphError::PlanError {
453                                    message: format!(
454                                        "{}({}) is invalid - {} requires a property like {}({}.property). You cannot {} a node/entity.",
455                                        function_name.to_uppercase(), v, function_name.to_uppercase(), function_name.to_uppercase(), v, function_name
456                                    ),
457                                    location: snafu::Location::new(file!(), line!(), column!()),
458                                });
459                            }
460                        }
461                    }
462                    _ => {
463                        // Unknown aggregate function - reject early
464                        return Err(GraphError::UnsupportedFeature {
465                            feature: format!(
466                                "Cypher aggregate function '{}' is not implemented. Supported aggregate functions: COUNT, SUM, AVG, MIN, MAX, COLLECT.",
467                                name
468                            ),
469                            location: snafu::Location::new(file!(), line!(), column!()),
470                        });
471                    }
472                }
473
474                // Validate arguments recursively.
475                // Special-case COUNT(*) where '*' isn't a real variable.
476                for arg in args {
477                    if function_name == "count"
478                        && matches!(arg, ValueExpression::Variable(v) if v == "*")
479                    {
480                        continue;
481                    }
482                    self.analyze_value_expression(arg)?;
483                }
484            }
485            ValueExpression::Arithmetic { left, right, .. } => {
486                // Validate arithmetic operands recursively
487                self.analyze_value_expression(left)?;
488                self.analyze_value_expression(right)?;
489
490                // If both sides are literals, ensure they are numeric
491                let is_numeric_literal = |pv: &PropertyValue| {
492                    matches!(pv, PropertyValue::Integer(_) | PropertyValue::Float(_))
493                };
494
495                if let (ValueExpression::Literal(l1), ValueExpression::Literal(l2)) =
496                    (&**left, &**right)
497                {
498                    if !(is_numeric_literal(l1) && is_numeric_literal(l2)) {
499                        return Err(GraphError::PlanError {
500                            message: "Arithmetic requires numeric literal operands".to_string(),
501                            location: snafu::Location::new(file!(), line!(), column!()),
502                        });
503                    }
504                }
505            }
506            ValueExpression::VectorDistance { left, right, .. } => {
507                // Validate vector distance function arguments
508                self.analyze_value_expression(left)?;
509                self.analyze_value_expression(right)?;
510
511                // Check that at least one argument references a property
512                let has_property = matches!(**left, ValueExpression::Property(_))
513                    || matches!(**right, ValueExpression::Property(_));
514
515                if !has_property {
516                    return Err(GraphError::PlanError {
517                        message: "vector_distance() requires at least one argument to be a property reference".to_string(),
518                        location: snafu::Location::new(file!(), line!(), column!()),
519                    });
520                }
521            }
522            ValueExpression::VectorSimilarity { left, right, .. } => {
523                // Validate vector similarity function arguments
524                self.analyze_value_expression(left)?;
525                self.analyze_value_expression(right)?;
526
527                // Check that at least one argument references a property
528                let has_property = matches!(**left, ValueExpression::Property(_))
529                    || matches!(**right, ValueExpression::Property(_));
530
531                if !has_property {
532                    return Err(GraphError::PlanError {
533                        message: "vector_similarity() requires at least one argument to be a property reference".to_string(),
534                        location: snafu::Location::new(file!(), line!(), column!()),
535                    });
536                }
537            }
538            ValueExpression::VectorLiteral(values) => {
539                // Validate non-empty
540                if values.is_empty() {
541                    return Err(GraphError::PlanError {
542                        message: "Vector literal cannot be empty".to_string(),
543                        location: snafu::Location::new(file!(), line!(), column!()),
544                    });
545                }
546
547                // Note: Very large vectors (>4096 dimensions) may impact performance
548                // but we don't enforce a hard limit here
549            }
550            ValueExpression::Parameter(_) => {
551                // Parameters are always valid (resolved at runtime)
552            }
553        }
554        Ok(())
555    }
556
557    fn register_projection_alias(&mut self, alias: &str) {
558        // Use case-insensitive lookup and store normalized key
559        if self.variables.contains_key_ci(alias) {
560            return;
561        }
562
563        let var_info = VariableInfo {
564            name: alias.to_string(),
565            variable_type: VariableType::Property,
566            labels: vec![],
567            properties: HashSet::new(),
568            defined_in: self.current_scope.clone(),
569        };
570        self.variables.insert(alias.to_lowercase(), var_info);
571    }
572
573    /// Validate property reference
574    fn validate_property_reference(&self, prop_ref: &PropertyRef) -> Result<()> {
575        // Use case-insensitive lookup
576        if !self.variables.contains_key_ci(&prop_ref.variable) {
577            return Err(GraphError::PlanError {
578                message: format!("Undefined variable: '{}'", prop_ref.variable),
579                location: snafu::Location::new(file!(), line!(), column!()),
580            });
581        }
582        Ok(())
583    }
584
585    /// Analyze RETURN clause
586    fn analyze_return_clause(&mut self, return_clause: &ReturnClause) -> Result<()> {
587        for item in &return_clause.items {
588            self.analyze_value_expression(&item.expression)?;
589            if let Some(alias) = &item.alias {
590                self.register_projection_alias(alias);
591            }
592        }
593        Ok(())
594    }
595
596    /// Analyze WITH clause
597    fn analyze_with_clause(&mut self, with_clause: &WithClause) -> Result<()> {
598        // Validate WITH item expressions (similar to RETURN)
599        for item in &with_clause.items {
600            self.analyze_value_expression(&item.expression)?;
601            if let Some(alias) = &item.alias {
602                self.register_projection_alias(alias);
603            }
604        }
605        // Validate ORDER BY within WITH if present
606        if let Some(order_by) = &with_clause.order_by {
607            for item in &order_by.items {
608                self.analyze_value_expression(&item.expression)?;
609            }
610        }
611        Ok(())
612    }
613
614    /// Analyze ORDER BY clause
615    fn analyze_order_by_clause(&mut self, order_by: &OrderByClause) -> Result<()> {
616        for item in &order_by.items {
617            self.analyze_value_expression(&item.expression)?;
618        }
619        Ok(())
620    }
621
622    /// Validate schema references against configuration
623    fn validate_schema(&self, warnings: &mut Vec<String>) {
624        for var_info in self.variables.values() {
625            match var_info.variable_type {
626                VariableType::Node => {
627                    for label in &var_info.labels {
628                        if self.config.get_node_mapping(label).is_none() {
629                            warnings.push(format!("Node label '{}' not found in schema", label));
630                        }
631                    }
632                }
633                VariableType::Relationship => {
634                    for rel_type in &var_info.labels {
635                        if self.config.get_relationship_mapping(rel_type).is_none() {
636                            warnings.push(format!(
637                                "Relationship type '{}' not found in schema",
638                                rel_type
639                            ));
640                        }
641                    }
642                }
643                _ => {}
644            }
645        }
646    }
647
648    /// Validate types and operations
649    fn validate_types(&self, errors: &mut Vec<String>) {
650        // TODO: Implement type checking
651        // - Check that properties exist on nodes/relationships
652        // - Check that comparison operations are valid for data types
653        // - Check that arithmetic operations are valid
654
655        // Check that properties referenced in patterns exist in schema when property fields are defined
656        for var_info in self.variables.values() {
657            match var_info.variable_type {
658                VariableType::Node => {
659                    // Collect property_fields from all known label mappings that specify properties
660                    let mut label_property_sets: Vec<&[String]> = Vec::new();
661                    for label in &var_info.labels {
662                        if let Some(mapping) = self.config.get_node_mapping(label) {
663                            if !mapping.property_fields.is_empty() {
664                                label_property_sets.push(&mapping.property_fields);
665                            }
666                        }
667                    }
668
669                    if !label_property_sets.is_empty() {
670                        'prop: for prop in &var_info.properties {
671                            // Property is valid if present in at least one label's property_fields
672                            // Use case-insensitive comparison
673                            let prop_lower = prop.to_lowercase();
674                            for fields in &label_property_sets {
675                                if fields.iter().any(|f| f.to_lowercase() == prop_lower) {
676                                    continue 'prop;
677                                }
678                            }
679                            errors.push(format!(
680                                "Property '{}' not found on labels {:?}",
681                                prop, var_info.labels
682                            ));
683                        }
684                    }
685                }
686                VariableType::Relationship => {
687                    // Collect property_fields from all known relationship mappings that specify properties
688                    let mut rel_property_sets: Vec<&[String]> = Vec::new();
689                    for rel_type in &var_info.labels {
690                        if let Some(mapping) = self.config.get_relationship_mapping(rel_type) {
691                            if !mapping.property_fields.is_empty() {
692                                rel_property_sets.push(&mapping.property_fields);
693                            }
694                        }
695                    }
696
697                    if !rel_property_sets.is_empty() {
698                        'prop_rel: for prop in &var_info.properties {
699                            // Use case-insensitive comparison for relationship properties
700                            let prop_lower = prop.to_lowercase();
701                            for fields in &rel_property_sets {
702                                if fields.iter().any(|f| f.to_lowercase() == prop_lower) {
703                                    continue 'prop_rel;
704                                }
705                            }
706                            errors.push(format!(
707                                "Property '{}' not found on relationship types {:?}",
708                                prop, var_info.labels
709                            ));
710                        }
711                    }
712                }
713                _ => {}
714            }
715        }
716    }
717}
718
719impl SemanticAnalyzer {
720    fn validate_length_range(&self, rel: &RelationshipPattern) -> Result<()> {
721        if let Some(len) = &rel.length {
722            if let (Some(min), Some(max)) = (len.min, len.max) {
723                if min > max {
724                    return Err(GraphError::PlanError {
725                        message: "Invalid path length range: min > max".to_string(),
726                        location: snafu::Location::new(file!(), line!(), column!()),
727                    });
728                }
729            }
730        }
731        Ok(())
732    }
733}
734
735#[cfg(test)]
736mod tests {
737    use super::*;
738    use crate::ast::{
739        ArithmeticOperator, BooleanExpression, CypherQuery, GraphPattern, LengthRange, MatchClause,
740        NodePattern, PathPattern, PathSegment, PropertyRef, PropertyValue, RelationshipDirection,
741        RelationshipPattern, ReturnClause, ReturnItem, ValueExpression, WhereClause,
742    };
743    use crate::config::{GraphConfig, NodeMapping};
744
745    fn test_config() -> GraphConfig {
746        GraphConfig::builder()
747            .with_node_label("Person", "id")
748            .with_node_label("Employee", "id")
749            .with_node_label("Company", "id")
750            .with_relationship("KNOWS", "src_id", "dst_id")
751            .build()
752            .unwrap()
753    }
754
755    // Helper: analyze a query that only has a single RETURN expression
756    fn analyze_return_expr(expr: ValueExpression) -> Result<SemanticResult> {
757        let query = CypherQuery {
758            reading_clauses: vec![],
759            where_clause: None,
760            with_clause: None,
761            post_with_reading_clauses: vec![],
762            post_with_where_clause: None,
763            return_clause: ReturnClause {
764                distinct: false,
765                items: vec![ReturnItem {
766                    expression: expr,
767                    alias: None,
768                }],
769            },
770            limit: None,
771            order_by: None,
772            skip: None,
773        };
774        let mut analyzer = SemanticAnalyzer::new(test_config());
775        analyzer.analyze(&query)
776    }
777
778    // Helper: analyze a query with a single MATCH (var:label) and a RETURN expression
779    fn analyze_return_with_match(
780        var: &str,
781        label: &str,
782        expr: ValueExpression,
783    ) -> Result<SemanticResult> {
784        let node = NodePattern::new(Some(var.to_string())).with_label(label);
785        let query = CypherQuery {
786            reading_clauses: vec![ReadingClause::Match(MatchClause {
787                patterns: vec![GraphPattern::Node(node)],
788            })],
789            where_clause: None,
790            with_clause: None,
791            post_with_reading_clauses: vec![],
792            post_with_where_clause: None,
793            return_clause: ReturnClause {
794                distinct: false,
795                items: vec![ReturnItem {
796                    expression: expr,
797                    alias: None,
798                }],
799            },
800            limit: None,
801            order_by: None,
802            skip: None,
803        };
804        let mut analyzer = SemanticAnalyzer::new(test_config());
805        analyzer.analyze(&query)
806    }
807
808    #[test]
809    fn test_merge_node_variable_metadata() {
810        // MATCH (n:Person {age: 30}), (n:Employee {dept: "X"})
811        let node1 = NodePattern::new(Some("n".to_string()))
812            .with_label("Person")
813            .with_property("age", PropertyValue::Integer(30));
814        let node2 = NodePattern::new(Some("n".to_string()))
815            .with_label("Employee")
816            .with_property("dept", PropertyValue::String("X".to_string()));
817
818        let query = CypherQuery {
819            reading_clauses: vec![ReadingClause::Match(MatchClause {
820                patterns: vec![GraphPattern::Node(node1), GraphPattern::Node(node2)],
821            })],
822            where_clause: None,
823            with_clause: None,
824            post_with_reading_clauses: vec![],
825            post_with_where_clause: None,
826            return_clause: ReturnClause {
827                distinct: false,
828                items: vec![],
829            },
830            limit: None,
831            order_by: None,
832            skip: None,
833        };
834
835        let mut analyzer = SemanticAnalyzer::new(test_config());
836        let result = analyzer.analyze(&query).unwrap();
837        assert!(result.errors.is_empty());
838        let n = result.variables.get("n").expect("variable n present");
839        // Labels merged
840        assert!(n.labels.contains(&"Person".to_string()));
841        assert!(n.labels.contains(&"Employee".to_string()));
842        // Properties unioned
843        assert!(n.properties.contains("age"));
844        assert!(n.properties.contains("dept"));
845    }
846
847    #[test]
848    fn test_invalid_length_range_collects_error() {
849        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
850        let end = NodePattern::new(Some("b".to_string())).with_label("Person");
851        let mut rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
852            .with_variable("r")
853            .with_type("KNOWS");
854        rel.length = Some(LengthRange {
855            min: Some(3),
856            max: Some(2),
857        });
858
859        let path = PathPattern {
860            start_node: start,
861            segments: vec![PathSegment {
862                relationship: rel,
863                end_node: end,
864            }],
865        };
866
867        let query = CypherQuery {
868            reading_clauses: vec![ReadingClause::Match(MatchClause {
869                patterns: vec![GraphPattern::Path(path)],
870            })],
871            where_clause: None,
872            with_clause: None,
873            post_with_reading_clauses: vec![],
874            post_with_where_clause: None,
875            return_clause: ReturnClause {
876                distinct: false,
877                items: vec![],
878            },
879            limit: None,
880            order_by: None,
881            skip: None,
882        };
883
884        let mut analyzer = SemanticAnalyzer::new(test_config());
885        let result = analyzer.analyze(&query).unwrap();
886        assert!(result
887            .errors
888            .iter()
889            .any(|e| e.contains("Invalid path length range")));
890    }
891
892    #[test]
893    fn test_undefined_variable_in_where() {
894        // MATCH (n:Person) WHERE EXISTS(m.name)
895        let node = NodePattern::new(Some("n".to_string())).with_label("Person");
896        let where_clause = WhereClause {
897            expression: BooleanExpression::Exists(PropertyRef::new("m", "name")),
898        };
899        let query = CypherQuery {
900            reading_clauses: vec![ReadingClause::Match(MatchClause {
901                patterns: vec![GraphPattern::Node(node)],
902            })],
903            where_clause: Some(where_clause),
904            with_clause: None,
905            post_with_reading_clauses: vec![],
906            post_with_where_clause: None,
907            return_clause: ReturnClause {
908                distinct: false,
909                items: vec![],
910            },
911            limit: None,
912            order_by: None,
913            skip: None,
914        };
915
916        let mut analyzer = SemanticAnalyzer::new(test_config());
917        let result = analyzer.analyze(&query).unwrap();
918        assert!(result
919            .errors
920            .iter()
921            .any(|e| e.contains("Undefined variable: 'm'")));
922    }
923
924    #[test]
925    fn test_variable_redefinition_between_node_and_relationship() {
926        // MATCH (n:Person)-[n:KNOWS]->(m:Person)
927        let start = NodePattern::new(Some("n".to_string())).with_label("Person");
928        let end = NodePattern::new(Some("m".to_string())).with_label("Person");
929        let rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
930            .with_variable("n")
931            .with_type("KNOWS");
932
933        let path = PathPattern {
934            start_node: start,
935            segments: vec![PathSegment {
936                relationship: rel,
937                end_node: end,
938            }],
939        };
940
941        let query = CypherQuery {
942            reading_clauses: vec![ReadingClause::Match(MatchClause {
943                patterns: vec![GraphPattern::Path(path)],
944            })],
945            where_clause: None,
946            with_clause: None,
947            post_with_reading_clauses: vec![],
948            post_with_where_clause: None,
949            return_clause: ReturnClause {
950                distinct: false,
951                items: vec![],
952            },
953            limit: None,
954            order_by: None,
955            skip: None,
956        };
957
958        let mut analyzer = SemanticAnalyzer::new(test_config());
959        let result = analyzer.analyze(&query).unwrap();
960        assert!(result
961            .errors
962            .iter()
963            .any(|e| e.contains("redefined with different type")));
964    }
965
966    #[test]
967    fn test_unknown_node_label_warns() {
968        // MATCH (x:Unknown)
969        let node = NodePattern::new(Some("x".to_string())).with_label("Unknown");
970        let query = CypherQuery {
971            reading_clauses: vec![ReadingClause::Match(MatchClause {
972                patterns: vec![GraphPattern::Node(node)],
973            })],
974            post_with_reading_clauses: vec![],
975            post_with_where_clause: None,
976            where_clause: None,
977            with_clause: None,
978            return_clause: ReturnClause {
979                distinct: false,
980                items: vec![],
981            },
982            limit: None,
983            order_by: None,
984            skip: None,
985        };
986
987        let mut analyzer = SemanticAnalyzer::new(test_config());
988        let result = analyzer.analyze(&query).unwrap();
989        assert!(result
990            .warnings
991            .iter()
992            .any(|w| w.contains("Node label 'Unknown' not found in schema")));
993    }
994
995    #[test]
996    fn test_property_not_in_schema_reports_error() {
997        // Configure Person with allowed property 'name' only
998        let custom_config = GraphConfig::builder()
999            .with_node_mapping(
1000                NodeMapping::new("Person", "id").with_properties(vec!["name".to_string()]),
1001            )
1002            .with_relationship("KNOWS", "src_id", "dst_id")
1003            .build()
1004            .unwrap();
1005
1006        // MATCH (n:Person {age: 30})
1007        let node = NodePattern::new(Some("n".to_string()))
1008            .with_label("Person")
1009            .with_property("age", PropertyValue::Integer(30));
1010        let query = CypherQuery {
1011            reading_clauses: vec![ReadingClause::Match(MatchClause {
1012                patterns: vec![GraphPattern::Node(node)],
1013            })],
1014            post_with_reading_clauses: vec![],
1015            post_with_where_clause: None,
1016            where_clause: None,
1017            with_clause: None,
1018            return_clause: ReturnClause {
1019                distinct: false,
1020                items: vec![],
1021            },
1022            limit: None,
1023            order_by: None,
1024            skip: None,
1025        };
1026
1027        let mut analyzer = SemanticAnalyzer::new(custom_config);
1028        let result = analyzer.analyze(&query).unwrap();
1029        assert!(result
1030            .errors
1031            .iter()
1032            .any(|e| e.contains("Property 'age' not found on labels [\"Person\"]")));
1033    }
1034
1035    #[test]
1036    fn test_valid_length_range_ok() {
1037        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
1038        let end = NodePattern::new(Some("b".to_string())).with_label("Person");
1039        let mut rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
1040            .with_variable("r")
1041            .with_type("KNOWS");
1042        rel.length = Some(LengthRange {
1043            min: Some(2),
1044            max: Some(3),
1045        });
1046
1047        let path = PathPattern {
1048            start_node: start,
1049            segments: vec![PathSegment {
1050                relationship: rel,
1051                end_node: end,
1052            }],
1053        };
1054
1055        let query = CypherQuery {
1056            reading_clauses: vec![ReadingClause::Match(MatchClause {
1057                patterns: vec![GraphPattern::Path(path)],
1058            })],
1059            post_with_reading_clauses: vec![],
1060            post_with_where_clause: None,
1061            where_clause: None,
1062            with_clause: None,
1063            return_clause: ReturnClause {
1064                distinct: false,
1065                items: vec![],
1066            },
1067            limit: None,
1068            order_by: None,
1069            skip: None,
1070        };
1071
1072        let mut analyzer = SemanticAnalyzer::new(test_config());
1073        let result = analyzer.analyze(&query).unwrap();
1074        assert!(result
1075            .errors
1076            .iter()
1077            .all(|e| !e.contains("Invalid path length range")));
1078    }
1079
1080    #[test]
1081    fn test_relationship_variable_metadata_merge_across_segments() {
1082        // Path with two segments sharing the same relationship variable 'r'
1083        // (a:Person)-[r:KNOWS {since: 2020}]->(b:Person)-[r:FRIEND {level: 1}]->(c:Person)
1084        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
1085        let mid = NodePattern::new(Some("b".to_string())).with_label("Person");
1086        let end = NodePattern::new(Some("c".to_string())).with_label("Person");
1087
1088        let mut rel1 = RelationshipPattern::new(RelationshipDirection::Outgoing)
1089            .with_variable("r")
1090            .with_type("KNOWS")
1091            .with_property("since", PropertyValue::Integer(2020));
1092        rel1.length = None;
1093
1094        let mut rel2 = RelationshipPattern::new(RelationshipDirection::Outgoing)
1095            .with_variable("r")
1096            .with_type("FRIEND")
1097            .with_property("level", PropertyValue::Integer(1));
1098        rel2.length = None;
1099
1100        let path = PathPattern {
1101            start_node: start,
1102            segments: vec![
1103                PathSegment {
1104                    relationship: rel1,
1105                    end_node: mid,
1106                },
1107                PathSegment {
1108                    relationship: rel2,
1109                    end_node: end,
1110                },
1111            ],
1112        };
1113
1114        // Custom config that knows both relationship types to avoid warnings muddying the assertion
1115        let custom_config = GraphConfig::builder()
1116            .with_node_label("Person", "id")
1117            .with_relationship("KNOWS", "src_id", "dst_id")
1118            .with_relationship("FRIEND", "src_id", "dst_id")
1119            .build()
1120            .unwrap();
1121
1122        let query = CypherQuery {
1123            reading_clauses: vec![ReadingClause::Match(MatchClause {
1124                patterns: vec![GraphPattern::Path(path)],
1125            })],
1126            post_with_reading_clauses: vec![],
1127            post_with_where_clause: None,
1128            where_clause: None,
1129            with_clause: None,
1130            return_clause: ReturnClause {
1131                distinct: false,
1132                items: vec![],
1133            },
1134            limit: None,
1135            order_by: None,
1136            skip: None,
1137        };
1138
1139        let mut analyzer = SemanticAnalyzer::new(custom_config);
1140        let result = analyzer.analyze(&query).unwrap();
1141        let r = result.variables.get("r").expect("variable r present");
1142        // Types merged
1143        assert!(r.labels.contains(&"KNOWS".to_string()));
1144        assert!(r.labels.contains(&"FRIEND".to_string()));
1145        // Properties unioned
1146        assert!(r.properties.contains("since"));
1147        assert!(r.properties.contains("level"));
1148    }
1149
1150    #[test]
1151    fn test_function_argument_undefined_variable_in_return() {
1152        // RETURN toUpper(m.name)
1153        let expr = ValueExpression::ScalarFunction {
1154            name: "toUpper".to_string(),
1155            args: vec![ValueExpression::Property(PropertyRef::new("m", "name"))],
1156        };
1157        let result = analyze_return_expr(expr).unwrap();
1158        assert!(result
1159            .errors
1160            .iter()
1161            .any(|e| e.contains("Undefined variable: 'm'")));
1162    }
1163
1164    #[test]
1165    fn test_function_argument_valid_variable_ok() {
1166        // MATCH (n:Person) RETURN toUpper(n.name)
1167        let expr = ValueExpression::ScalarFunction {
1168            name: "toUpper".to_string(),
1169            args: vec![ValueExpression::Property(PropertyRef::new("n", "name"))],
1170        };
1171        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1172        assert!(result.errors.is_empty());
1173    }
1174
1175    #[test]
1176    fn test_arithmetic_with_undefined_variable_in_return() {
1177        // RETURN x + 1
1178        let expr = ValueExpression::Arithmetic {
1179            left: Box::new(ValueExpression::Variable("x".to_string())),
1180            operator: ArithmeticOperator::Add,
1181            right: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1182        };
1183        let result = analyze_return_expr(expr).unwrap();
1184        assert!(result
1185            .errors
1186            .iter()
1187            .any(|e| e.contains("Undefined variable: 'x'")));
1188    }
1189
1190    #[test]
1191    fn test_arithmetic_with_defined_property_ok() {
1192        let expr = ValueExpression::Arithmetic {
1193            left: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1194            operator: ArithmeticOperator::Add,
1195            right: Box::new(ValueExpression::Property(PropertyRef::new("n", "age"))),
1196        };
1197        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1198        // Should not report undefined variable 'n'
1199        assert!(result
1200            .errors
1201            .iter()
1202            .all(|e| !e.contains("Undefined variable: 'n'")));
1203    }
1204
1205    #[test]
1206    fn test_count_with_multiple_args_fails_validation() {
1207        // COUNT(n.age, n.name) should fail semantic validation
1208        let expr = ValueExpression::AggregateFunction {
1209            name: "count".to_string(),
1210            args: vec![
1211                ValueExpression::Property(PropertyRef::new("n", "age")),
1212                ValueExpression::Property(PropertyRef::new("n", "name")),
1213            ],
1214            distinct: false,
1215        };
1216        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1217        assert!(
1218            result
1219                .errors
1220                .iter()
1221                .any(|e| e.contains("COUNT requires exactly 1 argument")),
1222            "Expected error about COUNT arity, got: {:?}",
1223            result.errors
1224        );
1225    }
1226
1227    #[test]
1228    fn test_count_with_zero_args_fails_validation() {
1229        // COUNT() with no arguments should fail
1230        let expr = ValueExpression::AggregateFunction {
1231            name: "count".to_string(),
1232            args: vec![],
1233            distinct: false,
1234        };
1235        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1236        assert!(
1237            result
1238                .errors
1239                .iter()
1240                .any(|e| e.contains("COUNT requires exactly 1 argument")),
1241            "Expected error about COUNT arity, got: {:?}",
1242            result.errors
1243        );
1244    }
1245
1246    #[test]
1247    fn test_count_with_one_arg_passes_validation() {
1248        // COUNT(n.age) should pass validation
1249        let expr = ValueExpression::AggregateFunction {
1250            name: "count".to_string(),
1251            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1252            distinct: false,
1253        };
1254        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1255        assert!(
1256            result
1257                .errors
1258                .iter()
1259                .all(|e| !e.contains("COUNT requires exactly 1 argument")),
1260            "COUNT with 1 arg should not produce arity error, got: {:?}",
1261            result.errors
1262        );
1263    }
1264
1265    #[test]
1266    fn test_count_star_passes_validation() {
1267        // COUNT(*) should be allowed (special-cased in semantic analysis)
1268        let expr = ValueExpression::AggregateFunction {
1269            name: "count".to_string(),
1270            args: vec![ValueExpression::Variable("*".to_string())],
1271            distinct: false,
1272        };
1273        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1274        assert!(
1275            result.errors.is_empty(),
1276            "Expected COUNT(*) to pass semantic validation, got: {:?}",
1277            result.errors
1278        );
1279    }
1280
1281    #[test]
1282    fn test_unimplemented_scalar_function_fails_validation() {
1283        let expr = ValueExpression::ScalarFunction {
1284            name: "replace".to_string(),
1285            args: vec![ValueExpression::Property(PropertyRef::new("n", "name"))],
1286        };
1287        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1288        // ScalarFunction with unknown name collects an error
1289        assert!(
1290            result
1291                .errors
1292                .iter()
1293                .any(|e| e.to_lowercase().contains("not implemented")),
1294            "Expected semantic validation to reject unimplemented function, got: {:?}",
1295            result.errors
1296        );
1297    }
1298
1299    #[test]
1300    fn test_sum_with_variable_fails_validation() {
1301        let expr = ValueExpression::AggregateFunction {
1302            name: "sum".to_string(),
1303            args: vec![ValueExpression::Variable("n".to_string())],
1304            distinct: false,
1305        };
1306        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1307        assert!(
1308            !result.errors.is_empty(),
1309            "Expected SUM(variable) to produce validation errors"
1310        );
1311        let has_sum_error = result
1312            .errors
1313            .iter()
1314            .any(|e| e.contains("SUM(n) is invalid") && e.contains("requires a property"));
1315        assert!(
1316            has_sum_error,
1317            "Expected error about SUM requiring property, got: {:?}",
1318            result.errors
1319        );
1320    }
1321
1322    #[test]
1323    fn test_avg_with_variable_fails_validation() {
1324        let expr = ValueExpression::AggregateFunction {
1325            name: "avg".to_string(),
1326            args: vec![ValueExpression::Variable("n".to_string())],
1327            distinct: false,
1328        };
1329        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1330        assert!(
1331            !result.errors.is_empty(),
1332            "Expected AVG(variable) to produce validation errors"
1333        );
1334        let has_avg_error = result
1335            .errors
1336            .iter()
1337            .any(|e| e.contains("AVG(n) is invalid") && e.contains("requires a property"));
1338        assert!(
1339            has_avg_error,
1340            "Expected error about AVG requiring property, got: {:?}",
1341            result.errors
1342        );
1343    }
1344
1345    #[test]
1346    fn test_sum_with_property_passes_validation() {
1347        let expr = ValueExpression::AggregateFunction {
1348            name: "sum".to_string(),
1349            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1350            distinct: false,
1351        };
1352        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1353        assert!(
1354            result.errors.is_empty(),
1355            "SUM with property should pass validation, got errors: {:?}",
1356            result.errors
1357        );
1358    }
1359
1360    #[test]
1361    fn test_min_with_variable_fails_validation() {
1362        let expr = ValueExpression::AggregateFunction {
1363            name: "min".to_string(),
1364            args: vec![ValueExpression::Variable("n".to_string())],
1365            distinct: false,
1366        };
1367        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1368        assert!(
1369            !result.errors.is_empty(),
1370            "Expected MIN(variable) to produce validation errors"
1371        );
1372        let has_min_error = result
1373            .errors
1374            .iter()
1375            .any(|e| e.contains("MIN(n) is invalid") && e.contains("requires a property"));
1376        assert!(
1377            has_min_error,
1378            "Expected error about MIN requiring property, got: {:?}",
1379            result.errors
1380        );
1381    }
1382
1383    #[test]
1384    fn test_max_with_variable_fails_validation() {
1385        let expr = ValueExpression::AggregateFunction {
1386            name: "max".to_string(),
1387            args: vec![ValueExpression::Variable("n".to_string())],
1388            distinct: false,
1389        };
1390        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1391        assert!(
1392            !result.errors.is_empty(),
1393            "Expected MAX(variable) to produce validation errors"
1394        );
1395        let has_max_error = result
1396            .errors
1397            .iter()
1398            .any(|e| e.contains("MAX(n) is invalid") && e.contains("requires a property"));
1399        assert!(
1400            has_max_error,
1401            "Expected error about MAX requiring property, got: {:?}",
1402            result.errors
1403        );
1404    }
1405
1406    #[test]
1407    fn test_min_with_property_passes_validation() {
1408        let expr = ValueExpression::AggregateFunction {
1409            name: "min".to_string(),
1410            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1411            distinct: false,
1412        };
1413        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1414        assert!(
1415            result.errors.is_empty(),
1416            "MIN with property should pass validation, got errors: {:?}",
1417            result.errors
1418        );
1419    }
1420
1421    #[test]
1422    fn test_max_with_property_passes_validation() {
1423        let expr = ValueExpression::AggregateFunction {
1424            name: "max".to_string(),
1425            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1426            distinct: false,
1427        };
1428        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1429        assert!(
1430            result.errors.is_empty(),
1431            "MAX with property should pass validation, got errors: {:?}",
1432            result.errors
1433        );
1434    }
1435
1436    #[test]
1437    fn test_distinct_only_supported_on_count() {
1438        // SUM(DISTINCT n.age) should fail - DISTINCT only supported for COUNT
1439        let expr = ValueExpression::AggregateFunction {
1440            name: "sum".to_string(),
1441            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1442            distinct: true,
1443        };
1444        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1445        assert!(
1446            result
1447                .errors
1448                .iter()
1449                .any(|e| e.contains("DISTINCT is only supported with COUNT")),
1450            "Expected error about DISTINCT only for COUNT, got: {:?}",
1451            result.errors
1452        );
1453    }
1454
1455    #[test]
1456    fn test_count_distinct_star_rejected() {
1457        // COUNT(DISTINCT *) is semantically meaningless - should be rejected
1458        let expr = ValueExpression::AggregateFunction {
1459            name: "count".to_string(),
1460            args: vec![ValueExpression::Variable("*".to_string())],
1461            distinct: true,
1462        };
1463        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1464        assert!(
1465            result
1466                .errors
1467                .iter()
1468                .any(|e| e.contains("COUNT(DISTINCT *)")),
1469            "Expected error about COUNT(DISTINCT *), got: {:?}",
1470            result.errors
1471        );
1472    }
1473
1474    #[test]
1475    fn test_count_distinct_passes_validation() {
1476        // COUNT(DISTINCT n.age) should pass
1477        let expr = ValueExpression::AggregateFunction {
1478            name: "count".to_string(),
1479            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1480            distinct: true,
1481        };
1482        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1483        assert!(
1484            result.errors.is_empty(),
1485            "COUNT(DISTINCT) should pass validation, got errors: {:?}",
1486            result.errors
1487        );
1488    }
1489
1490    #[test]
1491    fn test_arithmetic_with_non_numeric_literal_error() {
1492        // RETURN "x" + 1
1493        let expr = ValueExpression::Arithmetic {
1494            left: Box::new(ValueExpression::Literal(PropertyValue::String(
1495                "x".to_string(),
1496            ))),
1497            operator: ArithmeticOperator::Add,
1498            right: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1499        };
1500        let result = analyze_return_expr(expr).unwrap();
1501        // The semantic analyzer returns Ok with errors collected in the result
1502        assert!(result
1503            .errors
1504            .iter()
1505            .any(|e| e.contains("Arithmetic requires numeric literal operands")));
1506    }
1507
1508    #[test]
1509    fn test_arithmetic_with_numeric_literals_ok() {
1510        // RETURN 1 + 2.0
1511        let expr = ValueExpression::Arithmetic {
1512            left: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1513            operator: ArithmeticOperator::Add,
1514            right: Box::new(ValueExpression::Literal(PropertyValue::Float(2.0))),
1515        };
1516        let result = analyze_return_expr(expr);
1517        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1518        assert!(result.unwrap().errors.is_empty());
1519    }
1520
1521    #[test]
1522    fn test_vector_distance_with_property() {
1523        use crate::ast::DistanceMetric;
1524
1525        // MATCH (p:Person) RETURN vector_distance(p.embedding, p.embedding, l2)
1526        let expr = ValueExpression::VectorDistance {
1527            left: Box::new(ValueExpression::Property(PropertyRef {
1528                variable: "p".to_string(),
1529                property: "embedding".to_string(),
1530            })),
1531            right: Box::new(ValueExpression::Property(PropertyRef {
1532                variable: "p".to_string(),
1533                property: "embedding".to_string(),
1534            })),
1535            metric: DistanceMetric::L2,
1536        };
1537
1538        let result = analyze_return_with_match("p", "Person", expr);
1539        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1540        assert!(result.unwrap().errors.is_empty());
1541    }
1542
1543    #[test]
1544    fn test_vector_distance_without_property_fails() {
1545        use crate::ast::DistanceMetric;
1546
1547        // MATCH (p:Person) RETURN vector_distance(0.5, 0.3, l2) - both literals, should fail
1548        let expr = ValueExpression::VectorDistance {
1549            left: Box::new(ValueExpression::Literal(PropertyValue::Float(0.5))),
1550            right: Box::new(ValueExpression::Literal(PropertyValue::Float(0.3))),
1551            metric: DistanceMetric::L2,
1552        };
1553
1554        let result = analyze_return_with_match("p", "Person", expr);
1555        // Semantic analyzer returns Ok but with errors in the result
1556        assert!(
1557            result.is_ok(),
1558            "Analyzer should return Ok with errors, got {:?}",
1559            result
1560        );
1561        let semantic_result = result.unwrap();
1562        assert!(
1563            !semantic_result.errors.is_empty(),
1564            "Expected validation errors"
1565        );
1566        assert!(semantic_result
1567            .errors
1568            .iter()
1569            .any(|e| e.contains("requires at least one argument to be a property")));
1570    }
1571
1572    #[test]
1573    fn test_vector_similarity_with_property() {
1574        use crate::ast::DistanceMetric;
1575
1576        // MATCH (p:Person) RETURN vector_similarity(p.embedding, p.embedding, cosine)
1577        let expr = ValueExpression::VectorSimilarity {
1578            left: Box::new(ValueExpression::Property(PropertyRef {
1579                variable: "p".to_string(),
1580                property: "embedding".to_string(),
1581            })),
1582            right: Box::new(ValueExpression::Property(PropertyRef {
1583                variable: "p".to_string(),
1584                property: "embedding".to_string(),
1585            })),
1586            metric: DistanceMetric::Cosine,
1587        };
1588
1589        let result = analyze_return_with_match("p", "Person", expr);
1590        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1591        assert!(result.unwrap().errors.is_empty());
1592    }
1593
1594    #[test]
1595    fn test_vector_similarity_one_literal_ok() {
1596        use crate::ast::DistanceMetric;
1597
1598        // MATCH (p:Person) RETURN vector_similarity(p.embedding, 0.5, cosine)
1599        // One property reference is sufficient
1600        let expr = ValueExpression::VectorSimilarity {
1601            left: Box::new(ValueExpression::Property(PropertyRef {
1602                variable: "p".to_string(),
1603                property: "embedding".to_string(),
1604            })),
1605            right: Box::new(ValueExpression::Literal(PropertyValue::Float(0.5))),
1606            metric: DistanceMetric::Cosine,
1607        };
1608
1609        let result = analyze_return_with_match("p", "Person", expr);
1610        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1611        assert!(result.unwrap().errors.is_empty());
1612    }
1613
1614    #[test]
1615    fn test_vector_distance_all_metrics() {
1616        use crate::ast::DistanceMetric;
1617
1618        // Test all distance metrics are accepted
1619        for metric in [
1620            DistanceMetric::L2,
1621            DistanceMetric::Cosine,
1622            DistanceMetric::Dot,
1623        ] {
1624            let expr = ValueExpression::VectorDistance {
1625                left: Box::new(ValueExpression::Property(PropertyRef {
1626                    variable: "p".to_string(),
1627                    property: "embedding".to_string(),
1628                })),
1629                right: Box::new(ValueExpression::Property(PropertyRef {
1630                    variable: "p".to_string(),
1631                    property: "embedding".to_string(),
1632                })),
1633                metric: metric.clone(),
1634            };
1635
1636            let result = analyze_return_with_match("p", "Person", expr);
1637            assert!(
1638                result.is_ok(),
1639                "Expected Ok for metric {:?} but got {:?}",
1640                metric,
1641                result
1642            );
1643            assert!(result.unwrap().errors.is_empty());
1644        }
1645    }
1646}