Skip to main content

lance_graph/
semantic.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Semantic analysis for graph queries
5//!
6//! This module implements the semantic analysis phase of the query pipeline:
7//! Parse → **Semantic Analysis** → Logical Plan → Physical Plan
8//!
9//! Semantic analysis validates the query and enriches the AST with type information.
10
11use crate::ast::*;
12use crate::case_insensitive::CaseInsensitiveLookup;
13use crate::config::GraphConfig;
14use crate::error::{GraphError, Result};
15use std::collections::{HashMap, HashSet};
16
17/// Semantic analyzer - validates and enriches the AST
18pub struct SemanticAnalyzer {
19    config: GraphConfig,
20    variables: HashMap<String, VariableInfo>,
21    current_scope: ScopeType,
22}
23
24/// Information about a variable in the query
25#[derive(Debug, Clone)]
26pub struct VariableInfo {
27    pub name: String,
28    pub variable_type: VariableType,
29    pub labels: Vec<String>,
30    pub properties: HashSet<String>,
31    pub defined_in: ScopeType,
32}
33
34/// Type of a variable
35#[derive(Debug, Clone, PartialEq)]
36pub enum VariableType {
37    Node,
38    Relationship,
39    Path,
40    Property,
41}
42
43/// Scope where a variable is defined
44#[derive(Debug, Clone, PartialEq)]
45pub enum ScopeType {
46    Match,
47    Where,
48    With,
49    PostWithWhere,
50    Return,
51    OrderBy,
52}
53
54/// Semantic analysis result with validated and enriched AST
55#[derive(Debug, Clone)]
56pub struct SemanticResult {
57    /// The AST with parameters substituted and validated
58    pub ast: CypherQuery,
59    pub variables: HashMap<String, VariableInfo>,
60    pub errors: Vec<String>,
61    pub warnings: Vec<String>,
62}
63
64impl SemanticAnalyzer {
65    pub fn new(config: GraphConfig) -> Self {
66        Self {
67            config,
68            variables: HashMap::new(),
69            current_scope: ScopeType::Match,
70        }
71    }
72
73    /// Analyze a Cypher query AST
74    pub fn analyze(
75        &mut self,
76        query: &CypherQuery,
77        parameters: &HashMap<String, serde_json::Value>,
78    ) -> Result<SemanticResult> {
79        // Clone the query to perform parameter substitution
80        let mut analyzed_query = query.clone();
81
82        // Perform parameter substitution
83        self.substitute_parameters(&mut analyzed_query, parameters)?;
84
85        let mut errors = Vec::new();
86        let mut warnings = Vec::new();
87
88        // Phase 1: Variable discovery in READING clauses (MATCH/UNWIND)
89        self.current_scope = ScopeType::Match;
90        for clause in &analyzed_query.reading_clauses {
91            match clause {
92                ReadingClause::Match(match_clause) => {
93                    if let Err(e) = self.analyze_match_clause(match_clause) {
94                        errors.push(format!("MATCH clause error: {}", e));
95                    }
96                }
97                ReadingClause::Unwind(unwind_clause) => {
98                    if let Err(e) = self.analyze_unwind_clause(unwind_clause) {
99                        errors.push(format!("UNWIND clause error: {}", e));
100                    }
101                }
102            }
103        }
104
105        // Phase 2: Validate WHERE clause (before WITH)
106        if let Some(where_clause) = &analyzed_query.where_clause {
107            self.current_scope = ScopeType::Where;
108            if let Err(e) = self.analyze_where_clause(where_clause) {
109                errors.push(format!("WHERE clause error: {}", e));
110            }
111        }
112
113        // Phase 3: Validate WITH clause if present
114        if let Some(with_clause) = &analyzed_query.with_clause {
115            self.current_scope = ScopeType::With;
116            if let Err(e) = self.analyze_with_clause(with_clause) {
117                errors.push(format!("WITH clause error: {}", e));
118            }
119        }
120
121        // Phase 4: Variable discovery in post-WITH READING clauses (query chaining)
122        self.current_scope = ScopeType::Match;
123        for clause in &analyzed_query.post_with_reading_clauses {
124            match clause {
125                ReadingClause::Match(match_clause) => {
126                    if let Err(e) = self.analyze_match_clause(match_clause) {
127                        errors.push(format!("Post-WITH MATCH clause error: {}", e));
128                    }
129                }
130                ReadingClause::Unwind(unwind_clause) => {
131                    if let Err(e) = self.analyze_unwind_clause(unwind_clause) {
132                        errors.push(format!("Post-WITH UNWIND clause error: {}", e));
133                    }
134                }
135            }
136        }
137
138        // Phase 4: Validate post-WITH WHERE clause if present
139        if let Some(post_where) = &analyzed_query.post_with_where_clause {
140            self.current_scope = ScopeType::PostWithWhere;
141            if let Err(e) = self.analyze_where_clause(post_where) {
142                errors.push(format!("Post-WITH WHERE clause error: {}", e));
143            }
144        }
145
146        // Phase 5: Validate RETURN clause
147        self.current_scope = ScopeType::Return;
148        if let Err(e) = self.analyze_return_clause(&analyzed_query.return_clause) {
149            errors.push(format!("RETURN clause error: {}", e));
150        }
151
152        // Phase 6: Validate ORDER BY clause
153        if let Some(order_by) = &analyzed_query.order_by {
154            self.current_scope = ScopeType::OrderBy;
155            if let Err(e) = self.analyze_order_by_clause(order_by) {
156                errors.push(format!("ORDER BY clause error: {}", e));
157            }
158        }
159
160        // Phase 7: Schema validation
161        self.validate_schema(&mut warnings);
162
163        // Phase 8: Type checking
164        self.validate_types(&mut errors);
165
166        Ok(SemanticResult {
167            ast: analyzed_query,
168            variables: self.variables.clone(),
169            errors,
170            warnings,
171        })
172    }
173
174    /// Analyze MATCH clause and discover variables
175    fn analyze_match_clause(&mut self, match_clause: &MatchClause) -> Result<()> {
176        for pattern in &match_clause.patterns {
177            self.analyze_graph_pattern(pattern)?;
178        }
179        Ok(())
180    }
181
182    /// Analyze UNWIND clause and register variables
183    fn analyze_unwind_clause(&mut self, unwind_clause: &UnwindClause) -> Result<()> {
184        self.analyze_value_expression(&unwind_clause.expression)?;
185
186        // Register the aliased variable (normalize to lowercase for case-insensitive behavior)
187        let var_name = &unwind_clause.alias;
188        let var_name_lower = var_name.to_lowercase();
189        if let Some(existing) = self.variables.get_mut(&var_name_lower) {
190            // Shadowing or redefinition - in Cypher variables can be bound multiple times in some contexts
191            // But here we enforce uniqueness of types mostly.
192            // For now, treat UNWIND alias as a Property type variable.
193            if existing.variable_type != VariableType::Property {
194                return Err(GraphError::PlanError {
195                    message: format!("Variable '{}' redefined with different type", var_name),
196                    location: snafu::Location::new(file!(), line!(), column!()),
197                });
198            }
199        } else {
200            let var_info = VariableInfo {
201                name: var_name.clone(),
202                variable_type: VariableType::Property,
203                labels: vec![],
204                properties: HashSet::new(),
205                defined_in: self.current_scope.clone(),
206            };
207            self.variables.insert(var_name_lower, var_info);
208        }
209        Ok(())
210    }
211
212    /// Analyze a graph pattern and register variables
213    fn analyze_graph_pattern(&mut self, pattern: &GraphPattern) -> Result<()> {
214        match pattern {
215            GraphPattern::Node(node) => {
216                self.register_node_variable(node)?;
217            }
218            GraphPattern::Path(path) => {
219                // Register start node
220                self.register_node_variable(&path.start_node)?;
221
222                // Register variables in each segment
223                for segment in &path.segments {
224                    // Validate relationship length constraints if present
225                    self.validate_length_range(&segment.relationship)?;
226                    // Register relationship variable if present
227                    if let Some(rel_var) = &segment.relationship.variable {
228                        self.register_relationship_variable(rel_var, &segment.relationship)?;
229                    }
230
231                    // Register end node
232                    self.register_node_variable(&segment.end_node)?;
233                }
234            }
235        }
236        Ok(())
237    }
238
239    /// Register a node variable
240    fn register_node_variable(&mut self, node: &NodePattern) -> Result<()> {
241        if let Some(var_name) = &node.variable {
242            // Normalize to lowercase for case-insensitive behavior
243            let var_name_lower = var_name.to_lowercase();
244            if let Some(existing) = self.variables.get_mut(&var_name_lower) {
245                if existing.variable_type != VariableType::Node {
246                    return Err(GraphError::PlanError {
247                        message: format!("Variable '{}' redefined with different type", var_name),
248                        location: snafu::Location::new(file!(), line!(), column!()),
249                    });
250                }
251                for label in &node.labels {
252                    if !existing.labels.contains(label) {
253                        existing.labels.push(label.clone());
254                    }
255                }
256                for prop in node.properties.keys() {
257                    existing.properties.insert(prop.clone());
258                }
259            } else {
260                let var_info = VariableInfo {
261                    name: var_name.clone(),
262                    variable_type: VariableType::Node,
263                    labels: node.labels.clone(),
264                    properties: node.properties.keys().cloned().collect(),
265                    defined_in: self.current_scope.clone(),
266                };
267                self.variables.insert(var_name_lower, var_info);
268            }
269        }
270        Ok(())
271    }
272
273    /// Register a relationship variable
274    fn register_relationship_variable(
275        &mut self,
276        var_name: &str,
277        rel: &RelationshipPattern,
278    ) -> Result<()> {
279        // Normalize to lowercase for case-insensitive behavior
280        let var_name_lower = var_name.to_lowercase();
281        if let Some(existing) = self.variables.get_mut(&var_name_lower) {
282            if existing.variable_type != VariableType::Relationship {
283                return Err(GraphError::PlanError {
284                    message: format!("Variable '{}' redefined with different type", var_name),
285                    location: snafu::Location::new(file!(), line!(), column!()),
286                });
287            }
288            for rel_type in &rel.types {
289                if !existing.labels.contains(rel_type) {
290                    existing.labels.push(rel_type.clone());
291                }
292            }
293            for prop in rel.properties.keys() {
294                existing.properties.insert(prop.clone());
295            }
296        } else {
297            let var_info = VariableInfo {
298                name: var_name.to_string(),
299                variable_type: VariableType::Relationship,
300                labels: rel.types.clone(), // Relationship types are like labels
301                properties: rel.properties.keys().cloned().collect(),
302                defined_in: self.current_scope.clone(),
303            };
304            self.variables.insert(var_name_lower, var_info);
305        }
306        Ok(())
307    }
308
309    /// Analyze WHERE clause
310    fn analyze_where_clause(&mut self, where_clause: &WhereClause) -> Result<()> {
311        self.analyze_boolean_expression(&where_clause.expression)
312    }
313
314    /// Analyze boolean expression and check variable references
315    fn analyze_boolean_expression(&mut self, expr: &BooleanExpression) -> Result<()> {
316        match expr {
317            BooleanExpression::Comparison { left, right, .. } => {
318                self.analyze_value_expression(left)?;
319                self.analyze_value_expression(right)?;
320            }
321            BooleanExpression::And(left, right) | BooleanExpression::Or(left, right) => {
322                self.analyze_boolean_expression(left)?;
323                self.analyze_boolean_expression(right)?;
324            }
325            BooleanExpression::Not(inner) => {
326                self.analyze_boolean_expression(inner)?;
327            }
328            BooleanExpression::Exists(prop_ref) => {
329                self.validate_property_reference(prop_ref)?;
330            }
331            BooleanExpression::In { expression, list } => {
332                self.analyze_value_expression(expression)?;
333                for item in list {
334                    self.analyze_value_expression(item)?;
335                }
336            }
337            BooleanExpression::Like { expression, .. } => {
338                self.analyze_value_expression(expression)?;
339            }
340            BooleanExpression::ILike { expression, .. } => {
341                self.analyze_value_expression(expression)?;
342            }
343            BooleanExpression::Contains { expression, .. } => {
344                self.analyze_value_expression(expression)?;
345            }
346            BooleanExpression::StartsWith { expression, .. } => {
347                self.analyze_value_expression(expression)?;
348            }
349            BooleanExpression::EndsWith { expression, .. } => {
350                self.analyze_value_expression(expression)?;
351            }
352            BooleanExpression::IsNull(expression) => {
353                self.analyze_value_expression(expression)?;
354            }
355            BooleanExpression::IsNotNull(expression) => {
356                self.analyze_value_expression(expression)?;
357            }
358        }
359        Ok(())
360    }
361
362    /// Analyze value expression and check variable references
363    fn analyze_value_expression(&mut self, expr: &ValueExpression) -> Result<()> {
364        match expr {
365            ValueExpression::Property(prop_ref) => {
366                self.validate_property_reference(prop_ref)?;
367            }
368            ValueExpression::Literal(_) => {
369                // Literals are always valid
370            }
371            ValueExpression::Variable(var) => {
372                // Use case-insensitive lookup
373                if !self.variables.contains_key_ci(var) {
374                    return Err(GraphError::PlanError {
375                        message: format!("Undefined variable: '{}'", var),
376                        location: snafu::Location::new(file!(), line!(), column!()),
377                    });
378                }
379            }
380            ValueExpression::ScalarFunction { name, args } => {
381                let function_name = name.to_lowercase();
382                // Validate arity and known functions
383                match function_name.as_str() {
384                    "tolower" | "lower" | "toupper" | "upper" => {
385                        if args.len() != 1 {
386                            return Err(GraphError::PlanError {
387                                message: format!(
388                                    "{} requires exactly 1 argument, got {}",
389                                    name.to_uppercase(),
390                                    args.len()
391                                ),
392                                location: snafu::Location::new(file!(), line!(), column!()),
393                            });
394                        }
395                    }
396                    _ => {
397                        // Unknown scalar function - reject early with helpful error
398                        return Err(GraphError::UnsupportedFeature {
399                            feature: format!(
400                                "Cypher function '{}' is not implemented. Supported scalar functions: toLower, lower, toUpper, upper. Supported aggregate functions: COUNT, SUM, AVG, MIN, MAX, COLLECT.",
401                                name
402                            ),
403                            location: snafu::Location::new(file!(), line!(), column!()),
404                        });
405                    }
406                }
407
408                // Validate arguments recursively
409                for arg in args {
410                    self.analyze_value_expression(arg)?;
411                }
412            }
413            ValueExpression::AggregateFunction {
414                name,
415                args,
416                distinct,
417            } => {
418                let function_name = name.to_lowercase();
419                // Validate known aggregate functions
420                match function_name.as_str() {
421                    "count" | "sum" | "avg" | "min" | "max" | "collect" => {
422                        // DISTINCT is only supported for COUNT
423                        // Other aggregates silently ignore it in execution, so reject early
424                        if *distinct && function_name != "count" {
425                            return Err(GraphError::UnsupportedFeature {
426                                feature: format!(
427                                    "DISTINCT is only supported with COUNT, not {}",
428                                    function_name.to_uppercase()
429                                ),
430                                location: snafu::Location::new(file!(), line!(), column!()),
431                            });
432                        }
433
434                        // COUNT(DISTINCT *) is semantically meaningless
435                        // It would count distinct values of lit(1) which is always 1
436                        if *distinct && function_name == "count" {
437                            if let Some(ValueExpression::Variable(v)) = args.first() {
438                                if v == "*" {
439                                    return Err(GraphError::PlanError {
440                                        message: "COUNT(DISTINCT *) is not supported. \
441                                            Use COUNT(*) to count all rows, or \
442                                            COUNT(DISTINCT property) to count distinct values."
443                                            .to_string(),
444                                        location: snafu::Location::new(file!(), line!(), column!()),
445                                    });
446                                }
447                            }
448                        }
449                        // All aggregates require exactly 1 argument
450                        if args.len() != 1 {
451                            return Err(GraphError::PlanError {
452                                message: format!(
453                                    "{} requires exactly 1 argument, got {}",
454                                    function_name.to_uppercase(),
455                                    args.len()
456                                ),
457                                location: snafu::Location::new(file!(), line!(), column!()),
458                            });
459                        }
460
461                        // Additional validation for SUM, AVG, MIN, MAX: they require properties, not bare variables
462                        // Only COUNT and COLLECT allow bare variables (COUNT(*), COUNT(p), COLLECT(p))
463                        if matches!(function_name.as_str(), "sum" | "avg" | "min" | "max") {
464                            if let Some(ValueExpression::Variable(v)) = args.first() {
465                                return Err(GraphError::PlanError {
466                                    message: format!(
467                                        "{}({}) is invalid - {} requires a property like {}({}.property). You cannot {} a node/entity.",
468                                        function_name.to_uppercase(), v, function_name.to_uppercase(), function_name.to_uppercase(), v, function_name
469                                    ),
470                                    location: snafu::Location::new(file!(), line!(), column!()),
471                                });
472                            }
473                        }
474                    }
475                    _ => {
476                        // Unknown aggregate function - reject early
477                        return Err(GraphError::UnsupportedFeature {
478                            feature: format!(
479                                "Cypher aggregate function '{}' is not implemented. Supported aggregate functions: COUNT, SUM, AVG, MIN, MAX, COLLECT.",
480                                name
481                            ),
482                            location: snafu::Location::new(file!(), line!(), column!()),
483                        });
484                    }
485                }
486
487                // Validate arguments recursively.
488                // Special-case COUNT(*) where '*' isn't a real variable.
489                for arg in args {
490                    if function_name == "count"
491                        && matches!(arg, ValueExpression::Variable(v) if v == "*")
492                    {
493                        continue;
494                    }
495                    self.analyze_value_expression(arg)?;
496                }
497            }
498            ValueExpression::Arithmetic { left, right, .. } => {
499                // Validate arithmetic operands recursively
500                self.analyze_value_expression(left)?;
501                self.analyze_value_expression(right)?;
502
503                // If both sides are literals, ensure they are numeric
504                let is_numeric_literal = |pv: &PropertyValue| {
505                    matches!(pv, PropertyValue::Integer(_) | PropertyValue::Float(_))
506                };
507
508                if let (ValueExpression::Literal(l1), ValueExpression::Literal(l2)) =
509                    (&**left, &**right)
510                {
511                    if !(is_numeric_literal(l1) && is_numeric_literal(l2)) {
512                        return Err(GraphError::PlanError {
513                            message: "Arithmetic requires numeric literal operands".to_string(),
514                            location: snafu::Location::new(file!(), line!(), column!()),
515                        });
516                    }
517                }
518            }
519            ValueExpression::VectorDistance { left, right, .. } => {
520                // Validate vector distance function arguments
521                self.analyze_value_expression(left)?;
522                self.analyze_value_expression(right)?;
523
524                // Check that at least one argument references a property
525                let has_property = matches!(**left, ValueExpression::Property(_))
526                    || matches!(**right, ValueExpression::Property(_));
527
528                if !has_property {
529                    return Err(GraphError::PlanError {
530                        message: "vector_distance() requires at least one argument to be a property reference".to_string(),
531                        location: snafu::Location::new(file!(), line!(), column!()),
532                    });
533                }
534            }
535            ValueExpression::VectorSimilarity { left, right, .. } => {
536                // Validate vector similarity function arguments
537                self.analyze_value_expression(left)?;
538                self.analyze_value_expression(right)?;
539
540                // Check that at least one argument references a property
541                let has_property = matches!(**left, ValueExpression::Property(_))
542                    || matches!(**right, ValueExpression::Property(_));
543
544                if !has_property {
545                    return Err(GraphError::PlanError {
546                        message: "vector_similarity() requires at least one argument to be a property reference".to_string(),
547                        location: snafu::Location::new(file!(), line!(), column!()),
548                    });
549                }
550            }
551            ValueExpression::VectorLiteral(values) => {
552                // Validate non-empty
553                if values.is_empty() {
554                    return Err(GraphError::PlanError {
555                        message: "Vector literal cannot be empty".to_string(),
556                        location: snafu::Location::new(file!(), line!(), column!()),
557                    });
558                }
559
560                // Note: Very large vectors (>4096 dimensions) may impact performance
561                // but we don't enforce a hard limit here
562            }
563            ValueExpression::Parameter(_) => {
564                // Parameters are always valid (resolved at runtime)
565            }
566        }
567        Ok(())
568    }
569
570    fn register_projection_alias(&mut self, alias: &str) {
571        // Use case-insensitive lookup and store normalized key
572        if self.variables.contains_key_ci(alias) {
573            return;
574        }
575
576        let var_info = VariableInfo {
577            name: alias.to_string(),
578            variable_type: VariableType::Property,
579            labels: vec![],
580            properties: HashSet::new(),
581            defined_in: self.current_scope.clone(),
582        };
583        self.variables.insert(alias.to_lowercase(), var_info);
584    }
585
586    /// Validate property reference
587    fn validate_property_reference(&self, prop_ref: &PropertyRef) -> Result<()> {
588        // Use case-insensitive lookup
589        if !self.variables.contains_key_ci(&prop_ref.variable) {
590            return Err(GraphError::PlanError {
591                message: format!("Undefined variable: '{}'", prop_ref.variable),
592                location: snafu::Location::new(file!(), line!(), column!()),
593            });
594        }
595        Ok(())
596    }
597
598    /// Analyze RETURN clause
599    fn analyze_return_clause(&mut self, return_clause: &ReturnClause) -> Result<()> {
600        for item in &return_clause.items {
601            self.analyze_value_expression(&item.expression)?;
602            if let Some(alias) = &item.alias {
603                self.register_projection_alias(alias);
604            }
605        }
606        Ok(())
607    }
608
609    /// Analyze WITH clause
610    fn analyze_with_clause(&mut self, with_clause: &WithClause) -> Result<()> {
611        // Validate WITH item expressions (similar to RETURN)
612        for item in &with_clause.items {
613            self.analyze_value_expression(&item.expression)?;
614            if let Some(alias) = &item.alias {
615                self.register_projection_alias(alias);
616            }
617        }
618        // Validate ORDER BY within WITH if present
619        if let Some(order_by) = &with_clause.order_by {
620            for item in &order_by.items {
621                self.analyze_value_expression(&item.expression)?;
622            }
623        }
624        Ok(())
625    }
626
627    /// Analyze ORDER BY clause
628    fn analyze_order_by_clause(&mut self, order_by: &OrderByClause) -> Result<()> {
629        for item in &order_by.items {
630            self.analyze_value_expression(&item.expression)?;
631        }
632        Ok(())
633    }
634
635    /// Validate schema references against configuration
636    fn validate_schema(&self, warnings: &mut Vec<String>) {
637        for var_info in self.variables.values() {
638            match var_info.variable_type {
639                VariableType::Node => {
640                    for label in &var_info.labels {
641                        if self.config.get_node_mapping(label).is_none() {
642                            warnings.push(format!("Node label '{}' not found in schema", label));
643                        }
644                    }
645                }
646                VariableType::Relationship => {
647                    for rel_type in &var_info.labels {
648                        if self.config.get_relationship_mapping(rel_type).is_none() {
649                            warnings.push(format!(
650                                "Relationship type '{}' not found in schema",
651                                rel_type
652                            ));
653                        }
654                    }
655                }
656                _ => {}
657            }
658        }
659    }
660
661    /// Validate types and operations
662    fn validate_types(&self, errors: &mut Vec<String>) {
663        // TODO: Implement type checking
664        // - Check that properties exist on nodes/relationships
665        // - Check that comparison operations are valid for data types
666        // - Check that arithmetic operations are valid
667
668        // Check that properties referenced in patterns exist in schema when property fields are defined
669        for var_info in self.variables.values() {
670            match var_info.variable_type {
671                VariableType::Node => {
672                    // Collect property_fields from all known label mappings that specify properties
673                    let mut label_property_sets: Vec<&[String]> = Vec::new();
674                    for label in &var_info.labels {
675                        if let Some(mapping) = self.config.get_node_mapping(label) {
676                            if !mapping.property_fields.is_empty() {
677                                label_property_sets.push(&mapping.property_fields);
678                            }
679                        }
680                    }
681
682                    if !label_property_sets.is_empty() {
683                        'prop: for prop in &var_info.properties {
684                            // Property is valid if present in at least one label's property_fields
685                            // Use case-insensitive comparison
686                            let prop_lower = prop.to_lowercase();
687                            for fields in &label_property_sets {
688                                if fields.iter().any(|f| f.to_lowercase() == prop_lower) {
689                                    continue 'prop;
690                                }
691                            }
692                            errors.push(format!(
693                                "Property '{}' not found on labels {:?}",
694                                prop, var_info.labels
695                            ));
696                        }
697                    }
698                }
699                VariableType::Relationship => {
700                    // Collect property_fields from all known relationship mappings that specify properties
701                    let mut rel_property_sets: Vec<&[String]> = Vec::new();
702                    for rel_type in &var_info.labels {
703                        if let Some(mapping) = self.config.get_relationship_mapping(rel_type) {
704                            if !mapping.property_fields.is_empty() {
705                                rel_property_sets.push(&mapping.property_fields);
706                            }
707                        }
708                    }
709
710                    if !rel_property_sets.is_empty() {
711                        'prop_rel: for prop in &var_info.properties {
712                            // Use case-insensitive comparison for relationship properties
713                            let prop_lower = prop.to_lowercase();
714                            for fields in &rel_property_sets {
715                                if fields.iter().any(|f| f.to_lowercase() == prop_lower) {
716                                    continue 'prop_rel;
717                                }
718                            }
719                            errors.push(format!(
720                                "Property '{}' not found on relationship types {:?}",
721                                prop, var_info.labels
722                            ));
723                        }
724                    }
725                }
726                _ => {}
727            }
728        }
729    }
730}
731
732impl SemanticAnalyzer {
733    fn validate_length_range(&self, rel: &RelationshipPattern) -> Result<()> {
734        if let Some(len) = &rel.length {
735            if let (Some(min), Some(max)) = (len.min, len.max) {
736                if min > max {
737                    return Err(GraphError::PlanError {
738                        message: "Invalid path length range: min > max".to_string(),
739                        location: snafu::Location::new(file!(), line!(), column!()),
740                    });
741                }
742            }
743        }
744        Ok(())
745    }
746    /// Substitute parameters with literal values in the AST
747    fn substitute_parameters(
748        &self,
749        query: &mut CypherQuery,
750        parameters: &HashMap<String, serde_json::Value>,
751    ) -> Result<()> {
752        crate::parameter_substitution::substitute_parameters(query, parameters)
753    }
754}
755
756#[cfg(test)]
757mod tests {
758    use super::*;
759    use crate::ast::{
760        ArithmeticOperator, BooleanExpression, CypherQuery, GraphPattern, LengthRange, MatchClause,
761        NodePattern, PathPattern, PathSegment, PropertyRef, PropertyValue, RelationshipDirection,
762        RelationshipPattern, ReturnClause, ReturnItem, ValueExpression, WhereClause,
763    };
764    use crate::config::{GraphConfig, NodeMapping};
765
766    fn test_config() -> GraphConfig {
767        GraphConfig::builder()
768            .with_node_label("Person", "id")
769            .with_node_label("Employee", "id")
770            .with_node_label("Company", "id")
771            .with_relationship("KNOWS", "src_id", "dst_id")
772            .build()
773            .unwrap()
774    }
775
776    // Helper: analyze a query that only has a single RETURN expression
777    fn analyze_return_expr(expr: ValueExpression) -> Result<SemanticResult> {
778        let query = CypherQuery {
779            reading_clauses: vec![],
780            where_clause: None,
781            with_clause: None,
782            post_with_reading_clauses: vec![],
783            post_with_where_clause: None,
784            return_clause: ReturnClause {
785                distinct: false,
786                items: vec![ReturnItem {
787                    expression: expr,
788                    alias: None,
789                }],
790            },
791            limit: None,
792            order_by: None,
793            skip: None,
794        };
795        let mut analyzer = SemanticAnalyzer::new(test_config());
796        analyzer.analyze(&query, &HashMap::new())
797    }
798
799    // Helper: analyze a query with a single MATCH (var:label) and a RETURN expression
800    fn analyze_return_with_match(
801        var: &str,
802        label: &str,
803        expr: ValueExpression,
804    ) -> Result<SemanticResult> {
805        let node = NodePattern::new(Some(var.to_string())).with_label(label);
806        let query = CypherQuery {
807            reading_clauses: vec![ReadingClause::Match(MatchClause {
808                patterns: vec![GraphPattern::Node(node)],
809            })],
810            where_clause: None,
811            with_clause: None,
812            post_with_reading_clauses: vec![],
813            post_with_where_clause: None,
814            return_clause: ReturnClause {
815                distinct: false,
816                items: vec![ReturnItem {
817                    expression: expr,
818                    alias: None,
819                }],
820            },
821            limit: None,
822            order_by: None,
823            skip: None,
824        };
825        let mut analyzer = SemanticAnalyzer::new(test_config());
826        analyzer.analyze(&query, &HashMap::new())
827    }
828
829    #[test]
830    fn test_merge_node_variable_metadata() {
831        // MATCH (n:Person {age: 30}), (n:Employee {dept: "X"})
832        let node1 = NodePattern::new(Some("n".to_string()))
833            .with_label("Person")
834            .with_property("age", PropertyValue::Integer(30));
835        let node2 = NodePattern::new(Some("n".to_string()))
836            .with_label("Employee")
837            .with_property("dept", PropertyValue::String("X".to_string()));
838
839        let query = CypherQuery {
840            reading_clauses: vec![ReadingClause::Match(MatchClause {
841                patterns: vec![GraphPattern::Node(node1), GraphPattern::Node(node2)],
842            })],
843            where_clause: None,
844            with_clause: None,
845            post_with_reading_clauses: vec![],
846            post_with_where_clause: None,
847            return_clause: ReturnClause {
848                distinct: false,
849                items: vec![],
850            },
851            limit: None,
852            order_by: None,
853            skip: None,
854        };
855
856        let mut analyzer = SemanticAnalyzer::new(test_config());
857        let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
858        assert!(result.errors.is_empty());
859        let n = result.variables.get("n").expect("variable n present");
860        // Labels merged
861        assert!(n.labels.contains(&"Person".to_string()));
862        assert!(n.labels.contains(&"Employee".to_string()));
863        // Properties unioned
864        assert!(n.properties.contains("age"));
865        assert!(n.properties.contains("dept"));
866    }
867
868    #[test]
869    fn test_invalid_length_range_collects_error() {
870        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
871        let end = NodePattern::new(Some("b".to_string())).with_label("Person");
872        let mut rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
873            .with_variable("r")
874            .with_type("KNOWS");
875        rel.length = Some(LengthRange {
876            min: Some(3),
877            max: Some(2),
878        });
879
880        let path = PathPattern {
881            start_node: start,
882            segments: vec![PathSegment {
883                relationship: rel,
884                end_node: end,
885            }],
886        };
887
888        let query = CypherQuery {
889            reading_clauses: vec![ReadingClause::Match(MatchClause {
890                patterns: vec![GraphPattern::Path(path)],
891            })],
892            where_clause: None,
893            with_clause: None,
894            post_with_reading_clauses: vec![],
895            post_with_where_clause: None,
896            return_clause: ReturnClause {
897                distinct: false,
898                items: vec![],
899            },
900            limit: None,
901            order_by: None,
902            skip: None,
903        };
904
905        let mut analyzer = SemanticAnalyzer::new(test_config());
906        let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
907        assert!(result
908            .errors
909            .iter()
910            .any(|e| e.contains("Invalid path length range")));
911    }
912
913    #[test]
914    fn test_undefined_variable_in_where() {
915        // MATCH (n:Person) WHERE EXISTS(m.name)
916        let node = NodePattern::new(Some("n".to_string())).with_label("Person");
917        let where_clause = WhereClause {
918            expression: BooleanExpression::Exists(PropertyRef::new("m", "name")),
919        };
920        let query = CypherQuery {
921            reading_clauses: vec![ReadingClause::Match(MatchClause {
922                patterns: vec![GraphPattern::Node(node)],
923            })],
924            where_clause: Some(where_clause),
925            with_clause: None,
926            post_with_reading_clauses: vec![],
927            post_with_where_clause: None,
928            return_clause: ReturnClause {
929                distinct: false,
930                items: vec![],
931            },
932            limit: None,
933            order_by: None,
934            skip: None,
935        };
936
937        let mut analyzer = SemanticAnalyzer::new(test_config());
938        let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
939        assert!(result
940            .errors
941            .iter()
942            .any(|e| e.contains("Undefined variable: 'm'")));
943    }
944
945    #[test]
946    fn test_variable_redefinition_between_node_and_relationship() {
947        // MATCH (n:Person)-[n:KNOWS]->(m:Person)
948        let start = NodePattern::new(Some("n".to_string())).with_label("Person");
949        let end = NodePattern::new(Some("m".to_string())).with_label("Person");
950        let rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
951            .with_variable("n")
952            .with_type("KNOWS");
953
954        let path = PathPattern {
955            start_node: start,
956            segments: vec![PathSegment {
957                relationship: rel,
958                end_node: end,
959            }],
960        };
961
962        let query = CypherQuery {
963            reading_clauses: vec![ReadingClause::Match(MatchClause {
964                patterns: vec![GraphPattern::Path(path)],
965            })],
966            where_clause: None,
967            with_clause: None,
968            post_with_reading_clauses: vec![],
969            post_with_where_clause: None,
970            return_clause: ReturnClause {
971                distinct: false,
972                items: vec![],
973            },
974            limit: None,
975            order_by: None,
976            skip: None,
977        };
978
979        let mut analyzer = SemanticAnalyzer::new(test_config());
980        let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
981        assert!(result
982            .errors
983            .iter()
984            .any(|e| e.contains("redefined with different type")));
985    }
986
987    #[test]
988    fn test_unknown_node_label_warns() {
989        // MATCH (x:Unknown)
990        let node = NodePattern::new(Some("x".to_string())).with_label("Unknown");
991        let query = CypherQuery {
992            reading_clauses: vec![ReadingClause::Match(MatchClause {
993                patterns: vec![GraphPattern::Node(node)],
994            })],
995            post_with_reading_clauses: vec![],
996            post_with_where_clause: None,
997            where_clause: None,
998            with_clause: None,
999            return_clause: ReturnClause {
1000                distinct: false,
1001                items: vec![],
1002            },
1003            limit: None,
1004            order_by: None,
1005            skip: None,
1006        };
1007
1008        let mut analyzer = SemanticAnalyzer::new(test_config());
1009        let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
1010        assert!(result
1011            .warnings
1012            .iter()
1013            .any(|w| w.contains("Node label 'Unknown' not found in schema")));
1014    }
1015
1016    #[test]
1017    fn test_property_not_in_schema_reports_error() {
1018        // Configure Person with allowed property 'name' only
1019        let custom_config = GraphConfig::builder()
1020            .with_node_mapping(
1021                NodeMapping::new("Person", "id").with_properties(vec!["name".to_string()]),
1022            )
1023            .with_relationship("KNOWS", "src_id", "dst_id")
1024            .build()
1025            .unwrap();
1026
1027        // MATCH (n:Person {age: 30})
1028        let node = NodePattern::new(Some("n".to_string()))
1029            .with_label("Person")
1030            .with_property("age", PropertyValue::Integer(30));
1031        let query = CypherQuery {
1032            reading_clauses: vec![ReadingClause::Match(MatchClause {
1033                patterns: vec![GraphPattern::Node(node)],
1034            })],
1035            post_with_reading_clauses: vec![],
1036            post_with_where_clause: None,
1037            where_clause: None,
1038            with_clause: None,
1039            return_clause: ReturnClause {
1040                distinct: false,
1041                items: vec![],
1042            },
1043            limit: None,
1044            order_by: None,
1045            skip: None,
1046        };
1047
1048        let mut analyzer = SemanticAnalyzer::new(custom_config);
1049        let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
1050        assert!(result
1051            .errors
1052            .iter()
1053            .any(|e| e.contains("Property 'age' not found on labels [\"Person\"]")));
1054    }
1055
1056    #[test]
1057    fn test_valid_length_range_ok() {
1058        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
1059        let end = NodePattern::new(Some("b".to_string())).with_label("Person");
1060        let mut rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
1061            .with_variable("r")
1062            .with_type("KNOWS");
1063        rel.length = Some(LengthRange {
1064            min: Some(2),
1065            max: Some(3),
1066        });
1067
1068        let path = PathPattern {
1069            start_node: start,
1070            segments: vec![PathSegment {
1071                relationship: rel,
1072                end_node: end,
1073            }],
1074        };
1075
1076        let query = CypherQuery {
1077            reading_clauses: vec![ReadingClause::Match(MatchClause {
1078                patterns: vec![GraphPattern::Path(path)],
1079            })],
1080            post_with_reading_clauses: vec![],
1081            post_with_where_clause: None,
1082            where_clause: None,
1083            with_clause: None,
1084            return_clause: ReturnClause {
1085                distinct: false,
1086                items: vec![],
1087            },
1088            limit: None,
1089            order_by: None,
1090            skip: None,
1091        };
1092
1093        let mut analyzer = SemanticAnalyzer::new(test_config());
1094        let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
1095        assert!(result
1096            .errors
1097            .iter()
1098            .all(|e| !e.contains("Invalid path length range")));
1099    }
1100
1101    #[test]
1102    fn test_relationship_variable_metadata_merge_across_segments() {
1103        // Path with two segments sharing the same relationship variable 'r'
1104        // (a:Person)-[r:KNOWS {since: 2020}]->(b:Person)-[r:FRIEND {level: 1}]->(c:Person)
1105        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
1106        let mid = NodePattern::new(Some("b".to_string())).with_label("Person");
1107        let end = NodePattern::new(Some("c".to_string())).with_label("Person");
1108
1109        let mut rel1 = RelationshipPattern::new(RelationshipDirection::Outgoing)
1110            .with_variable("r")
1111            .with_type("KNOWS")
1112            .with_property("since", PropertyValue::Integer(2020));
1113        rel1.length = None;
1114
1115        let mut rel2 = RelationshipPattern::new(RelationshipDirection::Outgoing)
1116            .with_variable("r")
1117            .with_type("FRIEND")
1118            .with_property("level", PropertyValue::Integer(1));
1119        rel2.length = None;
1120
1121        let path = PathPattern {
1122            start_node: start,
1123            segments: vec![
1124                PathSegment {
1125                    relationship: rel1,
1126                    end_node: mid,
1127                },
1128                PathSegment {
1129                    relationship: rel2,
1130                    end_node: end,
1131                },
1132            ],
1133        };
1134
1135        // Custom config that knows both relationship types to avoid warnings muddying the assertion
1136        let custom_config = GraphConfig::builder()
1137            .with_node_label("Person", "id")
1138            .with_relationship("KNOWS", "src_id", "dst_id")
1139            .with_relationship("FRIEND", "src_id", "dst_id")
1140            .build()
1141            .unwrap();
1142
1143        let query = CypherQuery {
1144            reading_clauses: vec![ReadingClause::Match(MatchClause {
1145                patterns: vec![GraphPattern::Path(path)],
1146            })],
1147            post_with_reading_clauses: vec![],
1148            post_with_where_clause: None,
1149            where_clause: None,
1150            with_clause: None,
1151            return_clause: ReturnClause {
1152                distinct: false,
1153                items: vec![],
1154            },
1155            limit: None,
1156            order_by: None,
1157            skip: None,
1158        };
1159
1160        let mut analyzer = SemanticAnalyzer::new(custom_config);
1161        let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
1162        let r = result.variables.get("r").expect("variable r present");
1163        // Types merged
1164        assert!(r.labels.contains(&"KNOWS".to_string()));
1165        assert!(r.labels.contains(&"FRIEND".to_string()));
1166        // Properties unioned
1167        assert!(r.properties.contains("since"));
1168        assert!(r.properties.contains("level"));
1169    }
1170
1171    #[test]
1172    fn test_parameter_substitution() {
1173        // MATCH (n:Person) WHERE n.age > $min_age RETURN n
1174        let node = NodePattern::new(Some("n".to_string())).with_label("Person");
1175        let where_clause = WhereClause {
1176            expression: BooleanExpression::Comparison {
1177                left: ValueExpression::Property(PropertyRef::new("n", "age")),
1178                operator: crate::ast::ComparisonOperator::GreaterThan,
1179                right: ValueExpression::Parameter("min_age".to_string()),
1180            },
1181        };
1182        let query = CypherQuery {
1183            reading_clauses: vec![ReadingClause::Match(MatchClause {
1184                patterns: vec![GraphPattern::Node(node)],
1185            })],
1186            where_clause: Some(where_clause),
1187            with_clause: None,
1188            post_with_reading_clauses: vec![],
1189            post_with_where_clause: None,
1190            return_clause: ReturnClause {
1191                distinct: false,
1192                items: vec![ReturnItem {
1193                    expression: ValueExpression::Variable("n".to_string()),
1194                    alias: None,
1195                }],
1196            },
1197            limit: None,
1198            order_by: None,
1199            skip: None,
1200        };
1201
1202        let mut parameters = HashMap::new();
1203        parameters.insert("min_age".to_string(), serde_json::json!(18));
1204
1205        let mut analyzer = SemanticAnalyzer::new(test_config());
1206        let result = analyzer
1207            .analyze(&query, &parameters)
1208            .expect("Analysis failed");
1209
1210        // Verify substitution in AST
1211        let where_clause = result.ast.where_clause.as_ref().unwrap();
1212        match &where_clause.expression {
1213            BooleanExpression::Comparison { right, .. } => match right {
1214                ValueExpression::Literal(PropertyValue::Integer(val)) => {
1215                    assert_eq!(*val, 18);
1216                }
1217                _ => panic!("Expected Integer literal, got {:?}", right),
1218            },
1219            _ => panic!("Expected Comparison expression"),
1220        }
1221    }
1222
1223    #[test]
1224    fn test_function_argument_undefined_variable_in_return() {
1225        // RETURN toUpper(m.name)
1226        let expr = ValueExpression::ScalarFunction {
1227            name: "toUpper".to_string(),
1228            args: vec![ValueExpression::Property(PropertyRef::new("m", "name"))],
1229        };
1230        let result = analyze_return_expr(expr).unwrap();
1231        assert!(result
1232            .errors
1233            .iter()
1234            .any(|e| e.contains("Undefined variable: 'm'")));
1235    }
1236
1237    #[test]
1238    fn test_function_argument_valid_variable_ok() {
1239        // MATCH (n:Person) RETURN toUpper(n.name)
1240        let expr = ValueExpression::ScalarFunction {
1241            name: "toUpper".to_string(),
1242            args: vec![ValueExpression::Property(PropertyRef::new("n", "name"))],
1243        };
1244        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1245        assert!(result.errors.is_empty());
1246    }
1247
1248    #[test]
1249    fn test_arithmetic_with_undefined_variable_in_return() {
1250        // RETURN x + 1
1251        let expr = ValueExpression::Arithmetic {
1252            left: Box::new(ValueExpression::Variable("x".to_string())),
1253            operator: ArithmeticOperator::Add,
1254            right: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1255        };
1256        let result = analyze_return_expr(expr).unwrap();
1257        assert!(result
1258            .errors
1259            .iter()
1260            .any(|e| e.contains("Undefined variable: 'x'")));
1261    }
1262
1263    #[test]
1264    fn test_arithmetic_with_defined_property_ok() {
1265        let expr = ValueExpression::Arithmetic {
1266            left: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1267            operator: ArithmeticOperator::Add,
1268            right: Box::new(ValueExpression::Property(PropertyRef::new("n", "age"))),
1269        };
1270        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1271        // Should not report undefined variable 'n'
1272        assert!(result
1273            .errors
1274            .iter()
1275            .all(|e| !e.contains("Undefined variable: 'n'")));
1276    }
1277
1278    #[test]
1279    fn test_count_with_multiple_args_fails_validation() {
1280        // COUNT(n.age, n.name) should fail semantic validation
1281        let expr = ValueExpression::AggregateFunction {
1282            name: "count".to_string(),
1283            args: vec![
1284                ValueExpression::Property(PropertyRef::new("n", "age")),
1285                ValueExpression::Property(PropertyRef::new("n", "name")),
1286            ],
1287            distinct: false,
1288        };
1289        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1290        assert!(
1291            result
1292                .errors
1293                .iter()
1294                .any(|e| e.contains("COUNT requires exactly 1 argument")),
1295            "Expected error about COUNT arity, got: {:?}",
1296            result.errors
1297        );
1298    }
1299
1300    #[test]
1301    fn test_count_with_zero_args_fails_validation() {
1302        // COUNT() with no arguments should fail
1303        let expr = ValueExpression::AggregateFunction {
1304            name: "count".to_string(),
1305            args: vec![],
1306            distinct: false,
1307        };
1308        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1309        assert!(
1310            result
1311                .errors
1312                .iter()
1313                .any(|e| e.contains("COUNT requires exactly 1 argument")),
1314            "Expected error about COUNT arity, got: {:?}",
1315            result.errors
1316        );
1317    }
1318
1319    #[test]
1320    fn test_count_with_one_arg_passes_validation() {
1321        // COUNT(n.age) should pass validation
1322        let expr = ValueExpression::AggregateFunction {
1323            name: "count".to_string(),
1324            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1325            distinct: false,
1326        };
1327        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1328        assert!(
1329            result
1330                .errors
1331                .iter()
1332                .all(|e| !e.contains("COUNT requires exactly 1 argument")),
1333            "COUNT with 1 arg should not produce arity error, got: {:?}",
1334            result.errors
1335        );
1336    }
1337
1338    #[test]
1339    fn test_count_star_passes_validation() {
1340        // COUNT(*) should be allowed (special-cased in semantic analysis)
1341        let expr = ValueExpression::AggregateFunction {
1342            name: "count".to_string(),
1343            args: vec![ValueExpression::Variable("*".to_string())],
1344            distinct: false,
1345        };
1346        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1347        assert!(
1348            result.errors.is_empty(),
1349            "Expected COUNT(*) to pass semantic validation, got: {:?}",
1350            result.errors
1351        );
1352    }
1353
1354    #[test]
1355    fn test_unimplemented_scalar_function_fails_validation() {
1356        let expr = ValueExpression::ScalarFunction {
1357            name: "replace".to_string(),
1358            args: vec![ValueExpression::Property(PropertyRef::new("n", "name"))],
1359        };
1360        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1361        // ScalarFunction with unknown name collects an error
1362        assert!(
1363            result
1364                .errors
1365                .iter()
1366                .any(|e| e.to_lowercase().contains("not implemented")),
1367            "Expected semantic validation to reject unimplemented function, got: {:?}",
1368            result.errors
1369        );
1370    }
1371
1372    #[test]
1373    fn test_sum_with_variable_fails_validation() {
1374        let expr = ValueExpression::AggregateFunction {
1375            name: "sum".to_string(),
1376            args: vec![ValueExpression::Variable("n".to_string())],
1377            distinct: false,
1378        };
1379        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1380        assert!(
1381            !result.errors.is_empty(),
1382            "Expected SUM(variable) to produce validation errors"
1383        );
1384        let has_sum_error = result
1385            .errors
1386            .iter()
1387            .any(|e| e.contains("SUM(n) is invalid") && e.contains("requires a property"));
1388        assert!(
1389            has_sum_error,
1390            "Expected error about SUM requiring property, got: {:?}",
1391            result.errors
1392        );
1393    }
1394
1395    #[test]
1396    fn test_avg_with_variable_fails_validation() {
1397        let expr = ValueExpression::AggregateFunction {
1398            name: "avg".to_string(),
1399            args: vec![ValueExpression::Variable("n".to_string())],
1400            distinct: false,
1401        };
1402        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1403        assert!(
1404            !result.errors.is_empty(),
1405            "Expected AVG(variable) to produce validation errors"
1406        );
1407        let has_avg_error = result
1408            .errors
1409            .iter()
1410            .any(|e| e.contains("AVG(n) is invalid") && e.contains("requires a property"));
1411        assert!(
1412            has_avg_error,
1413            "Expected error about AVG requiring property, got: {:?}",
1414            result.errors
1415        );
1416    }
1417
1418    #[test]
1419    fn test_sum_with_property_passes_validation() {
1420        let expr = ValueExpression::AggregateFunction {
1421            name: "sum".to_string(),
1422            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1423            distinct: false,
1424        };
1425        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1426        assert!(
1427            result.errors.is_empty(),
1428            "SUM with property should pass validation, got errors: {:?}",
1429            result.errors
1430        );
1431    }
1432
1433    #[test]
1434    fn test_min_with_variable_fails_validation() {
1435        let expr = ValueExpression::AggregateFunction {
1436            name: "min".to_string(),
1437            args: vec![ValueExpression::Variable("n".to_string())],
1438            distinct: false,
1439        };
1440        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1441        assert!(
1442            !result.errors.is_empty(),
1443            "Expected MIN(variable) to produce validation errors"
1444        );
1445        let has_min_error = result
1446            .errors
1447            .iter()
1448            .any(|e| e.contains("MIN(n) is invalid") && e.contains("requires a property"));
1449        assert!(
1450            has_min_error,
1451            "Expected error about MIN requiring property, got: {:?}",
1452            result.errors
1453        );
1454    }
1455
1456    #[test]
1457    fn test_max_with_variable_fails_validation() {
1458        let expr = ValueExpression::AggregateFunction {
1459            name: "max".to_string(),
1460            args: vec![ValueExpression::Variable("n".to_string())],
1461            distinct: false,
1462        };
1463        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1464        assert!(
1465            !result.errors.is_empty(),
1466            "Expected MAX(variable) to produce validation errors"
1467        );
1468        let has_max_error = result
1469            .errors
1470            .iter()
1471            .any(|e| e.contains("MAX(n) is invalid") && e.contains("requires a property"));
1472        assert!(
1473            has_max_error,
1474            "Expected error about MAX requiring property, got: {:?}",
1475            result.errors
1476        );
1477    }
1478
1479    #[test]
1480    fn test_min_with_property_passes_validation() {
1481        let expr = ValueExpression::AggregateFunction {
1482            name: "min".to_string(),
1483            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1484            distinct: false,
1485        };
1486        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1487        assert!(
1488            result.errors.is_empty(),
1489            "MIN with property should pass validation, got errors: {:?}",
1490            result.errors
1491        );
1492    }
1493
1494    #[test]
1495    fn test_max_with_property_passes_validation() {
1496        let expr = ValueExpression::AggregateFunction {
1497            name: "max".to_string(),
1498            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1499            distinct: false,
1500        };
1501        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1502        assert!(
1503            result.errors.is_empty(),
1504            "MAX with property should pass validation, got errors: {:?}",
1505            result.errors
1506        );
1507    }
1508
1509    #[test]
1510    fn test_distinct_only_supported_on_count() {
1511        // SUM(DISTINCT n.age) should fail - DISTINCT only supported for COUNT
1512        let expr = ValueExpression::AggregateFunction {
1513            name: "sum".to_string(),
1514            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1515            distinct: true,
1516        };
1517        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1518        assert!(
1519            result
1520                .errors
1521                .iter()
1522                .any(|e| e.contains("DISTINCT is only supported with COUNT")),
1523            "Expected error about DISTINCT only for COUNT, got: {:?}",
1524            result.errors
1525        );
1526    }
1527
1528    #[test]
1529    fn test_count_distinct_star_rejected() {
1530        // COUNT(DISTINCT *) is semantically meaningless - should be rejected
1531        let expr = ValueExpression::AggregateFunction {
1532            name: "count".to_string(),
1533            args: vec![ValueExpression::Variable("*".to_string())],
1534            distinct: true,
1535        };
1536        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1537        assert!(
1538            result
1539                .errors
1540                .iter()
1541                .any(|e| e.contains("COUNT(DISTINCT *)")),
1542            "Expected error about COUNT(DISTINCT *), got: {:?}",
1543            result.errors
1544        );
1545    }
1546
1547    #[test]
1548    fn test_count_distinct_passes_validation() {
1549        // COUNT(DISTINCT n.age) should pass
1550        let expr = ValueExpression::AggregateFunction {
1551            name: "count".to_string(),
1552            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1553            distinct: true,
1554        };
1555        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1556        assert!(
1557            result.errors.is_empty(),
1558            "COUNT(DISTINCT) should pass validation, got errors: {:?}",
1559            result.errors
1560        );
1561    }
1562
1563    #[test]
1564    fn test_arithmetic_with_non_numeric_literal_error() {
1565        // RETURN "x" + 1
1566        let expr = ValueExpression::Arithmetic {
1567            left: Box::new(ValueExpression::Literal(PropertyValue::String(
1568                "x".to_string(),
1569            ))),
1570            operator: ArithmeticOperator::Add,
1571            right: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1572        };
1573        let result = analyze_return_expr(expr).unwrap();
1574        // The semantic analyzer returns Ok with errors collected in the result
1575        assert!(result
1576            .errors
1577            .iter()
1578            .any(|e| e.contains("Arithmetic requires numeric literal operands")));
1579    }
1580
1581    #[test]
1582    fn test_arithmetic_with_numeric_literals_ok() {
1583        // RETURN 1 + 2.0
1584        let expr = ValueExpression::Arithmetic {
1585            left: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1586            operator: ArithmeticOperator::Add,
1587            right: Box::new(ValueExpression::Literal(PropertyValue::Float(2.0))),
1588        };
1589        let result = analyze_return_expr(expr);
1590        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1591        assert!(result.unwrap().errors.is_empty());
1592    }
1593
1594    #[test]
1595    fn test_vector_distance_with_property() {
1596        use crate::ast::DistanceMetric;
1597
1598        // MATCH (p:Person) RETURN vector_distance(p.embedding, p.embedding, l2)
1599        let expr = ValueExpression::VectorDistance {
1600            left: Box::new(ValueExpression::Property(PropertyRef {
1601                variable: "p".to_string(),
1602                property: "embedding".to_string(),
1603            })),
1604            right: Box::new(ValueExpression::Property(PropertyRef {
1605                variable: "p".to_string(),
1606                property: "embedding".to_string(),
1607            })),
1608            metric: DistanceMetric::L2,
1609        };
1610
1611        let result = analyze_return_with_match("p", "Person", expr);
1612        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1613        assert!(result.unwrap().errors.is_empty());
1614    }
1615
1616    #[test]
1617    fn test_vector_distance_without_property_fails() {
1618        use crate::ast::DistanceMetric;
1619
1620        // MATCH (p:Person) RETURN vector_distance(0.5, 0.3, l2) - both literals, should fail
1621        let expr = ValueExpression::VectorDistance {
1622            left: Box::new(ValueExpression::Literal(PropertyValue::Float(0.5))),
1623            right: Box::new(ValueExpression::Literal(PropertyValue::Float(0.3))),
1624            metric: DistanceMetric::L2,
1625        };
1626
1627        let result = analyze_return_with_match("p", "Person", expr);
1628        // Semantic analyzer returns Ok but with errors in the result
1629        assert!(
1630            result.is_ok(),
1631            "Analyzer should return Ok with errors, got {:?}",
1632            result
1633        );
1634        let semantic_result = result.unwrap();
1635        assert!(
1636            !semantic_result.errors.is_empty(),
1637            "Expected validation errors"
1638        );
1639        assert!(semantic_result
1640            .errors
1641            .iter()
1642            .any(|e| e.contains("requires at least one argument to be a property")));
1643    }
1644
1645    #[test]
1646    fn test_vector_similarity_with_property() {
1647        use crate::ast::DistanceMetric;
1648
1649        // MATCH (p:Person) RETURN vector_similarity(p.embedding, p.embedding, cosine)
1650        let expr = ValueExpression::VectorSimilarity {
1651            left: Box::new(ValueExpression::Property(PropertyRef {
1652                variable: "p".to_string(),
1653                property: "embedding".to_string(),
1654            })),
1655            right: Box::new(ValueExpression::Property(PropertyRef {
1656                variable: "p".to_string(),
1657                property: "embedding".to_string(),
1658            })),
1659            metric: DistanceMetric::Cosine,
1660        };
1661
1662        let result = analyze_return_with_match("p", "Person", expr);
1663        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1664        assert!(result.unwrap().errors.is_empty());
1665    }
1666
1667    #[test]
1668    fn test_vector_similarity_one_literal_ok() {
1669        use crate::ast::DistanceMetric;
1670
1671        // MATCH (p:Person) RETURN vector_similarity(p.embedding, 0.5, cosine)
1672        // One property reference is sufficient
1673        let expr = ValueExpression::VectorSimilarity {
1674            left: Box::new(ValueExpression::Property(PropertyRef {
1675                variable: "p".to_string(),
1676                property: "embedding".to_string(),
1677            })),
1678            right: Box::new(ValueExpression::Literal(PropertyValue::Float(0.5))),
1679            metric: DistanceMetric::Cosine,
1680        };
1681
1682        let result = analyze_return_with_match("p", "Person", expr);
1683        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1684        assert!(result.unwrap().errors.is_empty());
1685    }
1686
1687    #[test]
1688    fn test_vector_distance_all_metrics() {
1689        use crate::ast::DistanceMetric;
1690
1691        // Test all distance metrics are accepted
1692        for metric in [
1693            DistanceMetric::L2,
1694            DistanceMetric::Cosine,
1695            DistanceMetric::Dot,
1696        ] {
1697            let expr = ValueExpression::VectorDistance {
1698                left: Box::new(ValueExpression::Property(PropertyRef {
1699                    variable: "p".to_string(),
1700                    property: "embedding".to_string(),
1701                })),
1702                right: Box::new(ValueExpression::Property(PropertyRef {
1703                    variable: "p".to_string(),
1704                    property: "embedding".to_string(),
1705                })),
1706                metric: metric.clone(),
1707            };
1708
1709            let result = analyze_return_with_match("p", "Person", expr);
1710            assert!(
1711                result.is_ok(),
1712                "Expected Ok for metric {:?} but got {:?}",
1713                metric,
1714                result
1715            );
1716            assert!(result.unwrap().errors.is_empty());
1717        }
1718    }
1719}