lance_graph/
semantic.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Semantic analysis for graph queries
5//!
6//! This module implements the semantic analysis phase of the query pipeline:
7//! Parse → **Semantic Analysis** → Logical Plan → Physical Plan
8//!
9//! Semantic analysis validates the query and enriches the AST with type information.
10
11use crate::ast::*;
12use crate::config::GraphConfig;
13use crate::error::{GraphError, Result};
14use std::collections::{HashMap, HashSet};
15
16/// Semantic analyzer - validates and enriches the AST
17pub struct SemanticAnalyzer {
18    config: GraphConfig,
19    variables: HashMap<String, VariableInfo>,
20    current_scope: ScopeType,
21}
22
23/// Information about a variable in the query
24#[derive(Debug, Clone)]
25pub struct VariableInfo {
26    pub name: String,
27    pub variable_type: VariableType,
28    pub labels: Vec<String>,
29    pub properties: HashSet<String>,
30    pub defined_in: ScopeType,
31}
32
33/// Type of a variable
34#[derive(Debug, Clone, PartialEq)]
35pub enum VariableType {
36    Node,
37    Relationship,
38    Path,
39    Property,
40}
41
42/// Scope where a variable is defined
43#[derive(Debug, Clone, PartialEq)]
44pub enum ScopeType {
45    Match,
46    Where,
47    Return,
48    OrderBy,
49}
50
51/// Semantic analysis result with validated and enriched AST
52#[derive(Debug, Clone)]
53pub struct SemanticResult {
54    pub query: CypherQuery,
55    pub variables: HashMap<String, VariableInfo>,
56    pub errors: Vec<String>,
57    pub warnings: Vec<String>,
58}
59
60impl SemanticAnalyzer {
61    pub fn new(config: GraphConfig) -> Self {
62        Self {
63            config,
64            variables: HashMap::new(),
65            current_scope: ScopeType::Match,
66        }
67    }
68
69    /// Analyze a Cypher query AST
70    pub fn analyze(&mut self, query: &CypherQuery) -> Result<SemanticResult> {
71        let mut errors = Vec::new();
72        let mut warnings = Vec::new();
73
74        // Phase 1: Variable discovery in MATCH clauses
75        self.current_scope = ScopeType::Match;
76        for match_clause in &query.match_clauses {
77            if let Err(e) = self.analyze_match_clause(match_clause) {
78                errors.push(format!("MATCH clause error: {}", e));
79            }
80        }
81
82        // Phase 2: Validate WHERE clause
83        if let Some(where_clause) = &query.where_clause {
84            self.current_scope = ScopeType::Where;
85            if let Err(e) = self.analyze_where_clause(where_clause) {
86                errors.push(format!("WHERE clause error: {}", e));
87            }
88        }
89
90        // Phase 3: Validate RETURN clause
91        self.current_scope = ScopeType::Return;
92        if let Err(e) = self.analyze_return_clause(&query.return_clause) {
93            errors.push(format!("RETURN clause error: {}", e));
94        }
95
96        // Phase 4: Validate ORDER BY clause
97        if let Some(order_by) = &query.order_by {
98            self.current_scope = ScopeType::OrderBy;
99            if let Err(e) = self.analyze_order_by_clause(order_by) {
100                errors.push(format!("ORDER BY clause error: {}", e));
101            }
102        }
103
104        // Phase 5: Schema validation
105        self.validate_schema(&mut warnings);
106
107        // Phase 6: Type checking
108        self.validate_types(&mut errors);
109
110        Ok(SemanticResult {
111            query: query.clone(),
112            variables: self.variables.clone(),
113            errors,
114            warnings,
115        })
116    }
117
118    /// Analyze MATCH clause and discover variables
119    fn analyze_match_clause(&mut self, match_clause: &MatchClause) -> Result<()> {
120        for pattern in &match_clause.patterns {
121            self.analyze_graph_pattern(pattern)?;
122        }
123        Ok(())
124    }
125
126    /// Analyze a graph pattern and register variables
127    fn analyze_graph_pattern(&mut self, pattern: &GraphPattern) -> Result<()> {
128        match pattern {
129            GraphPattern::Node(node) => {
130                self.register_node_variable(node)?;
131            }
132            GraphPattern::Path(path) => {
133                // Register start node
134                self.register_node_variable(&path.start_node)?;
135
136                // Register variables in each segment
137                for segment in &path.segments {
138                    // Validate relationship length constraints if present
139                    self.validate_length_range(&segment.relationship)?;
140                    // Register relationship variable if present
141                    if let Some(rel_var) = &segment.relationship.variable {
142                        self.register_relationship_variable(rel_var, &segment.relationship)?;
143                    }
144
145                    // Register end node
146                    self.register_node_variable(&segment.end_node)?;
147                }
148            }
149        }
150        Ok(())
151    }
152
153    /// Register a node variable
154    fn register_node_variable(&mut self, node: &NodePattern) -> Result<()> {
155        if let Some(var_name) = &node.variable {
156            if let Some(existing) = self.variables.get_mut(var_name) {
157                if existing.variable_type != VariableType::Node {
158                    return Err(GraphError::PlanError {
159                        message: format!("Variable '{}' redefined with different type", var_name),
160                        location: snafu::Location::new(file!(), line!(), column!()),
161                    });
162                }
163                for label in &node.labels {
164                    if !existing.labels.contains(label) {
165                        existing.labels.push(label.clone());
166                    }
167                }
168                for prop in node.properties.keys() {
169                    existing.properties.insert(prop.clone());
170                }
171            } else {
172                let var_info = VariableInfo {
173                    name: var_name.clone(),
174                    variable_type: VariableType::Node,
175                    labels: node.labels.clone(),
176                    properties: node.properties.keys().cloned().collect(),
177                    defined_in: self.current_scope.clone(),
178                };
179                self.variables.insert(var_name.clone(), var_info);
180            }
181        }
182        Ok(())
183    }
184
185    /// Register a relationship variable
186    fn register_relationship_variable(
187        &mut self,
188        var_name: &str,
189        rel: &RelationshipPattern,
190    ) -> Result<()> {
191        if let Some(existing) = self.variables.get_mut(var_name) {
192            if existing.variable_type != VariableType::Relationship {
193                return Err(GraphError::PlanError {
194                    message: format!("Variable '{}' redefined with different type", var_name),
195                    location: snafu::Location::new(file!(), line!(), column!()),
196                });
197            }
198            for rel_type in &rel.types {
199                if !existing.labels.contains(rel_type) {
200                    existing.labels.push(rel_type.clone());
201                }
202            }
203            for prop in rel.properties.keys() {
204                existing.properties.insert(prop.clone());
205            }
206        } else {
207            let var_info = VariableInfo {
208                name: var_name.to_string(),
209                variable_type: VariableType::Relationship,
210                labels: rel.types.clone(), // Relationship types are like labels
211                properties: rel.properties.keys().cloned().collect(),
212                defined_in: self.current_scope.clone(),
213            };
214            self.variables.insert(var_name.to_string(), var_info);
215        }
216        Ok(())
217    }
218
219    /// Analyze WHERE clause
220    fn analyze_where_clause(&mut self, where_clause: &WhereClause) -> Result<()> {
221        self.analyze_boolean_expression(&where_clause.expression)
222    }
223
224    /// Analyze boolean expression and check variable references
225    fn analyze_boolean_expression(&mut self, expr: &BooleanExpression) -> Result<()> {
226        match expr {
227            BooleanExpression::Comparison { left, right, .. } => {
228                self.analyze_value_expression(left)?;
229                self.analyze_value_expression(right)?;
230            }
231            BooleanExpression::And(left, right) | BooleanExpression::Or(left, right) => {
232                self.analyze_boolean_expression(left)?;
233                self.analyze_boolean_expression(right)?;
234            }
235            BooleanExpression::Not(inner) => {
236                self.analyze_boolean_expression(inner)?;
237            }
238            BooleanExpression::Exists(prop_ref) => {
239                self.validate_property_reference(prop_ref)?;
240            }
241            BooleanExpression::In { expression, list } => {
242                self.analyze_value_expression(expression)?;
243                for item in list {
244                    self.analyze_value_expression(item)?;
245                }
246            }
247            BooleanExpression::Like { expression, .. } => {
248                self.analyze_value_expression(expression)?;
249            }
250            BooleanExpression::IsNull(expression) => {
251                self.analyze_value_expression(expression)?;
252            }
253            BooleanExpression::IsNotNull(expression) => {
254                self.analyze_value_expression(expression)?;
255            }
256        }
257        Ok(())
258    }
259
260    /// Analyze value expression and check variable references
261    fn analyze_value_expression(&mut self, expr: &ValueExpression) -> Result<()> {
262        match expr {
263            ValueExpression::Property(prop_ref) => {
264                self.validate_property_reference(prop_ref)?;
265            }
266            ValueExpression::Literal(_) => {
267                // Literals are always valid
268            }
269            ValueExpression::Variable(var) => {
270                if !self.variables.contains_key(var) {
271                    return Err(GraphError::PlanError {
272                        message: format!("Undefined variable: '{}'", var),
273                        location: snafu::Location::new(file!(), line!(), column!()),
274                    });
275                }
276            }
277            ValueExpression::Function { name, args } => {
278                // Validate function-specific arity and signature rules
279                match name.to_lowercase().as_str() {
280                    "count" | "sum" | "avg" | "min" | "max" => {
281                        if args.len() != 1 {
282                            return Err(GraphError::PlanError {
283                                message: format!(
284                                    "{} requires exactly 1 argument, got {}",
285                                    name.to_uppercase(),
286                                    args.len()
287                                ),
288                                location: snafu::Location::new(file!(), line!(), column!()),
289                            });
290                        }
291
292                        // Additional validation for SUM, AVG, MIN, MAX: they require properties, not bare variables
293                        // Only COUNT allows bare variables (COUNT(*) or COUNT(p))
294                        if matches!(name.to_lowercase().as_str(), "sum" | "avg" | "min" | "max") {
295                            if let Some(ValueExpression::Variable(v)) = args.first() {
296                                return Err(GraphError::PlanError {
297                                    message: format!(
298                                        "{}({}) is invalid - {} requires a property like {}({}.property). You cannot {} a node/entity.",
299                                        name.to_uppercase(), v, name.to_uppercase(), name.to_uppercase(), v, name.to_lowercase()
300                                    ),
301                                    location: snafu::Location::new(file!(), line!(), column!()),
302                                });
303                            }
304                        }
305                    }
306                    _ => {
307                        // Other functions - no validation yet
308                    }
309                }
310
311                // Validate arguments recursively
312                for arg in args {
313                    self.analyze_value_expression(arg)?;
314                }
315            }
316            ValueExpression::Arithmetic { left, right, .. } => {
317                // Validate arithmetic operands recursively
318                self.analyze_value_expression(left)?;
319                self.analyze_value_expression(right)?;
320
321                // If both sides are literals, ensure they are numeric
322                let is_numeric_literal = |pv: &PropertyValue| {
323                    matches!(pv, PropertyValue::Integer(_) | PropertyValue::Float(_))
324                };
325
326                if let (ValueExpression::Literal(l1), ValueExpression::Literal(l2)) =
327                    (&**left, &**right)
328                {
329                    if !(is_numeric_literal(l1) && is_numeric_literal(l2)) {
330                        return Err(GraphError::PlanError {
331                            message: "Arithmetic requires numeric literal operands".to_string(),
332                            location: snafu::Location::new(file!(), line!(), column!()),
333                        });
334                    }
335                }
336            }
337        }
338        Ok(())
339    }
340
341    /// Validate property reference
342    fn validate_property_reference(&self, prop_ref: &PropertyRef) -> Result<()> {
343        if !self.variables.contains_key(&prop_ref.variable) {
344            return Err(GraphError::PlanError {
345                message: format!("Undefined variable: '{}'", prop_ref.variable),
346                location: snafu::Location::new(file!(), line!(), column!()),
347            });
348        }
349        Ok(())
350    }
351
352    /// Analyze RETURN clause
353    fn analyze_return_clause(&mut self, return_clause: &ReturnClause) -> Result<()> {
354        for item in &return_clause.items {
355            self.analyze_value_expression(&item.expression)?;
356        }
357        Ok(())
358    }
359
360    /// Analyze ORDER BY clause
361    fn analyze_order_by_clause(&mut self, order_by: &OrderByClause) -> Result<()> {
362        for item in &order_by.items {
363            self.analyze_value_expression(&item.expression)?;
364        }
365        Ok(())
366    }
367
368    /// Validate schema references against configuration
369    fn validate_schema(&self, warnings: &mut Vec<String>) {
370        for var_info in self.variables.values() {
371            match var_info.variable_type {
372                VariableType::Node => {
373                    for label in &var_info.labels {
374                        if self.config.get_node_mapping(label).is_none() {
375                            warnings.push(format!("Node label '{}' not found in schema", label));
376                        }
377                    }
378                }
379                VariableType::Relationship => {
380                    for rel_type in &var_info.labels {
381                        if self.config.get_relationship_mapping(rel_type).is_none() {
382                            warnings.push(format!(
383                                "Relationship type '{}' not found in schema",
384                                rel_type
385                            ));
386                        }
387                    }
388                }
389                _ => {}
390            }
391        }
392    }
393
394    /// Validate types and operations
395    fn validate_types(&self, errors: &mut Vec<String>) {
396        // TODO: Implement type checking
397        // - Check that properties exist on nodes/relationships
398        // - Check that comparison operations are valid for data types
399        // - Check that arithmetic operations are valid
400
401        // Check that properties referenced in patterns exist in schema when property fields are defined
402        for var_info in self.variables.values() {
403            match var_info.variable_type {
404                VariableType::Node => {
405                    // Collect property_fields from all known label mappings that specify properties
406                    let mut label_property_sets: Vec<&[String]> = Vec::new();
407                    for label in &var_info.labels {
408                        if let Some(mapping) = self.config.get_node_mapping(label) {
409                            if !mapping.property_fields.is_empty() {
410                                label_property_sets.push(&mapping.property_fields);
411                            }
412                        }
413                    }
414
415                    if !label_property_sets.is_empty() {
416                        'prop: for prop in &var_info.properties {
417                            // Property is valid if present in at least one label's property_fields
418                            for fields in &label_property_sets {
419                                if fields.iter().any(|f| f == prop) {
420                                    continue 'prop;
421                                }
422                            }
423                            errors.push(format!(
424                                "Property '{}' not found on labels {:?}",
425                                prop, var_info.labels
426                            ));
427                        }
428                    }
429                }
430                VariableType::Relationship => {
431                    // Collect property_fields from all known relationship mappings that specify properties
432                    let mut rel_property_sets: Vec<&[String]> = Vec::new();
433                    for rel_type in &var_info.labels {
434                        if let Some(mapping) = self.config.get_relationship_mapping(rel_type) {
435                            if !mapping.property_fields.is_empty() {
436                                rel_property_sets.push(&mapping.property_fields);
437                            }
438                        }
439                    }
440
441                    if !rel_property_sets.is_empty() {
442                        'prop_rel: for prop in &var_info.properties {
443                            for fields in &rel_property_sets {
444                                if fields.iter().any(|f| f == prop) {
445                                    continue 'prop_rel;
446                                }
447                            }
448                            errors.push(format!(
449                                "Property '{}' not found on relationship types {:?}",
450                                prop, var_info.labels
451                            ));
452                        }
453                    }
454                }
455                _ => {}
456            }
457        }
458    }
459}
460
461impl SemanticAnalyzer {
462    fn validate_length_range(&self, rel: &RelationshipPattern) -> Result<()> {
463        if let Some(len) = &rel.length {
464            if let (Some(min), Some(max)) = (len.min, len.max) {
465                if min > max {
466                    return Err(GraphError::PlanError {
467                        message: "Invalid path length range: min > max".to_string(),
468                        location: snafu::Location::new(file!(), line!(), column!()),
469                    });
470                }
471            }
472        }
473        Ok(())
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480    use crate::ast::{
481        ArithmeticOperator, BooleanExpression, CypherQuery, GraphPattern, LengthRange, MatchClause,
482        NodePattern, PathPattern, PathSegment, PropertyRef, PropertyValue, RelationshipDirection,
483        RelationshipPattern, ReturnClause, ReturnItem, ValueExpression, WhereClause,
484    };
485    use crate::config::{GraphConfig, NodeMapping};
486
487    fn test_config() -> GraphConfig {
488        GraphConfig::builder()
489            .with_node_label("Person", "id")
490            .with_node_label("Employee", "id")
491            .with_node_label("Company", "id")
492            .with_relationship("KNOWS", "src_id", "dst_id")
493            .build()
494            .unwrap()
495    }
496
497    // Helper: analyze a query that only has a single RETURN expression
498    fn analyze_return_expr(expr: ValueExpression) -> Result<SemanticResult> {
499        let query = CypherQuery {
500            match_clauses: vec![],
501            where_clause: None,
502            return_clause: ReturnClause {
503                distinct: false,
504                items: vec![ReturnItem {
505                    expression: expr,
506                    alias: None,
507                }],
508            },
509            limit: None,
510            order_by: None,
511            skip: None,
512        };
513        let mut analyzer = SemanticAnalyzer::new(test_config());
514        analyzer.analyze(&query)
515    }
516
517    // Helper: analyze a query with a single MATCH (var:label) and a RETURN expression
518    fn analyze_return_with_match(
519        var: &str,
520        label: &str,
521        expr: ValueExpression,
522    ) -> Result<SemanticResult> {
523        let node = NodePattern::new(Some(var.to_string())).with_label(label);
524        let query = CypherQuery {
525            match_clauses: vec![MatchClause {
526                patterns: vec![GraphPattern::Node(node)],
527            }],
528            where_clause: None,
529            return_clause: ReturnClause {
530                distinct: false,
531                items: vec![ReturnItem {
532                    expression: expr,
533                    alias: None,
534                }],
535            },
536            limit: None,
537            order_by: None,
538            skip: None,
539        };
540        let mut analyzer = SemanticAnalyzer::new(test_config());
541        analyzer.analyze(&query)
542    }
543
544    #[test]
545    fn test_merge_node_variable_metadata() {
546        // MATCH (n:Person {age: 30}), (n:Employee {dept: "X"})
547        let node1 = NodePattern::new(Some("n".to_string()))
548            .with_label("Person")
549            .with_property("age", PropertyValue::Integer(30));
550        let node2 = NodePattern::new(Some("n".to_string()))
551            .with_label("Employee")
552            .with_property("dept", PropertyValue::String("X".to_string()));
553
554        let query = CypherQuery {
555            match_clauses: vec![MatchClause {
556                patterns: vec![GraphPattern::Node(node1), GraphPattern::Node(node2)],
557            }],
558            where_clause: None,
559            return_clause: ReturnClause {
560                distinct: false,
561                items: vec![],
562            },
563            limit: None,
564            order_by: None,
565            skip: None,
566        };
567
568        let mut analyzer = SemanticAnalyzer::new(test_config());
569        let result = analyzer.analyze(&query).unwrap();
570        assert!(result.errors.is_empty());
571        let n = result.variables.get("n").expect("variable n present");
572        // Labels merged
573        assert!(n.labels.contains(&"Person".to_string()));
574        assert!(n.labels.contains(&"Employee".to_string()));
575        // Properties unioned
576        assert!(n.properties.contains("age"));
577        assert!(n.properties.contains("dept"));
578    }
579
580    #[test]
581    fn test_invalid_length_range_collects_error() {
582        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
583        let end = NodePattern::new(Some("b".to_string())).with_label("Person");
584        let mut rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
585            .with_variable("r")
586            .with_type("KNOWS");
587        rel.length = Some(LengthRange {
588            min: Some(3),
589            max: Some(2),
590        });
591
592        let path = PathPattern {
593            start_node: start,
594            segments: vec![PathSegment {
595                relationship: rel,
596                end_node: end,
597            }],
598        };
599
600        let query = CypherQuery {
601            match_clauses: vec![MatchClause {
602                patterns: vec![GraphPattern::Path(path)],
603            }],
604            where_clause: None,
605            return_clause: ReturnClause {
606                distinct: false,
607                items: vec![],
608            },
609            limit: None,
610            order_by: None,
611            skip: None,
612        };
613
614        let mut analyzer = SemanticAnalyzer::new(test_config());
615        let result = analyzer.analyze(&query).unwrap();
616        assert!(result
617            .errors
618            .iter()
619            .any(|e| e.contains("Invalid path length range")));
620    }
621
622    #[test]
623    fn test_undefined_variable_in_where() {
624        // MATCH (n:Person) WHERE EXISTS(m.name)
625        let node = NodePattern::new(Some("n".to_string())).with_label("Person");
626        let where_clause = WhereClause {
627            expression: BooleanExpression::Exists(PropertyRef::new("m", "name")),
628        };
629        let query = CypherQuery {
630            match_clauses: vec![MatchClause {
631                patterns: vec![GraphPattern::Node(node)],
632            }],
633            where_clause: Some(where_clause),
634            return_clause: ReturnClause {
635                distinct: false,
636                items: vec![],
637            },
638            limit: None,
639            order_by: None,
640            skip: None,
641        };
642
643        let mut analyzer = SemanticAnalyzer::new(test_config());
644        let result = analyzer.analyze(&query).unwrap();
645        assert!(result
646            .errors
647            .iter()
648            .any(|e| e.contains("Undefined variable: 'm'")));
649    }
650
651    #[test]
652    fn test_variable_redefinition_between_node_and_relationship() {
653        // MATCH (n:Person)-[n:KNOWS]->(m:Person)
654        let start = NodePattern::new(Some("n".to_string())).with_label("Person");
655        let end = NodePattern::new(Some("m".to_string())).with_label("Person");
656        let rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
657            .with_variable("n")
658            .with_type("KNOWS");
659
660        let path = PathPattern {
661            start_node: start,
662            segments: vec![PathSegment {
663                relationship: rel,
664                end_node: end,
665            }],
666        };
667
668        let query = CypherQuery {
669            match_clauses: vec![MatchClause {
670                patterns: vec![GraphPattern::Path(path)],
671            }],
672            where_clause: None,
673            return_clause: ReturnClause {
674                distinct: false,
675                items: vec![],
676            },
677            limit: None,
678            order_by: None,
679            skip: None,
680        };
681
682        let mut analyzer = SemanticAnalyzer::new(test_config());
683        let result = analyzer.analyze(&query).unwrap();
684        assert!(result
685            .errors
686            .iter()
687            .any(|e| e.contains("redefined with different type")));
688    }
689
690    #[test]
691    fn test_unknown_node_label_warns() {
692        // MATCH (x:Unknown)
693        let node = NodePattern::new(Some("x".to_string())).with_label("Unknown");
694        let query = CypherQuery {
695            match_clauses: vec![MatchClause {
696                patterns: vec![GraphPattern::Node(node)],
697            }],
698            where_clause: None,
699            return_clause: ReturnClause {
700                distinct: false,
701                items: vec![],
702            },
703            limit: None,
704            order_by: None,
705            skip: None,
706        };
707
708        let mut analyzer = SemanticAnalyzer::new(test_config());
709        let result = analyzer.analyze(&query).unwrap();
710        assert!(result
711            .warnings
712            .iter()
713            .any(|w| w.contains("Node label 'Unknown' not found in schema")));
714    }
715
716    #[test]
717    fn test_property_not_in_schema_reports_error() {
718        // Configure Person with allowed property 'name' only
719        let custom_config = GraphConfig::builder()
720            .with_node_mapping(
721                NodeMapping::new("Person", "id").with_properties(vec!["name".to_string()]),
722            )
723            .with_relationship("KNOWS", "src_id", "dst_id")
724            .build()
725            .unwrap();
726
727        // MATCH (n:Person {age: 30})
728        let node = NodePattern::new(Some("n".to_string()))
729            .with_label("Person")
730            .with_property("age", PropertyValue::Integer(30));
731        let query = CypherQuery {
732            match_clauses: vec![MatchClause {
733                patterns: vec![GraphPattern::Node(node)],
734            }],
735            where_clause: None,
736            return_clause: ReturnClause {
737                distinct: false,
738                items: vec![],
739            },
740            limit: None,
741            order_by: None,
742            skip: None,
743        };
744
745        let mut analyzer = SemanticAnalyzer::new(custom_config);
746        let result = analyzer.analyze(&query).unwrap();
747        assert!(result
748            .errors
749            .iter()
750            .any(|e| e.contains("Property 'age' not found on labels [\"Person\"]")));
751    }
752
753    #[test]
754    fn test_valid_length_range_ok() {
755        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
756        let end = NodePattern::new(Some("b".to_string())).with_label("Person");
757        let mut rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
758            .with_variable("r")
759            .with_type("KNOWS");
760        rel.length = Some(LengthRange {
761            min: Some(2),
762            max: Some(3),
763        });
764
765        let path = PathPattern {
766            start_node: start,
767            segments: vec![PathSegment {
768                relationship: rel,
769                end_node: end,
770            }],
771        };
772
773        let query = CypherQuery {
774            match_clauses: vec![MatchClause {
775                patterns: vec![GraphPattern::Path(path)],
776            }],
777            where_clause: None,
778            return_clause: ReturnClause {
779                distinct: false,
780                items: vec![],
781            },
782            limit: None,
783            order_by: None,
784            skip: None,
785        };
786
787        let mut analyzer = SemanticAnalyzer::new(test_config());
788        let result = analyzer.analyze(&query).unwrap();
789        assert!(result
790            .errors
791            .iter()
792            .all(|e| !e.contains("Invalid path length range")));
793    }
794
795    #[test]
796    fn test_relationship_variable_metadata_merge_across_segments() {
797        // Path with two segments sharing the same relationship variable 'r'
798        // (a:Person)-[r:KNOWS {since: 2020}]->(b:Person)-[r:FRIEND {level: 1}]->(c:Person)
799        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
800        let mid = NodePattern::new(Some("b".to_string())).with_label("Person");
801        let end = NodePattern::new(Some("c".to_string())).with_label("Person");
802
803        let mut rel1 = RelationshipPattern::new(RelationshipDirection::Outgoing)
804            .with_variable("r")
805            .with_type("KNOWS")
806            .with_property("since", PropertyValue::Integer(2020));
807        rel1.length = None;
808
809        let mut rel2 = RelationshipPattern::new(RelationshipDirection::Outgoing)
810            .with_variable("r")
811            .with_type("FRIEND")
812            .with_property("level", PropertyValue::Integer(1));
813        rel2.length = None;
814
815        let path = PathPattern {
816            start_node: start,
817            segments: vec![
818                PathSegment {
819                    relationship: rel1,
820                    end_node: mid,
821                },
822                PathSegment {
823                    relationship: rel2,
824                    end_node: end,
825                },
826            ],
827        };
828
829        // Custom config that knows both relationship types to avoid warnings muddying the assertion
830        let custom_config = GraphConfig::builder()
831            .with_node_label("Person", "id")
832            .with_relationship("KNOWS", "src_id", "dst_id")
833            .with_relationship("FRIEND", "src_id", "dst_id")
834            .build()
835            .unwrap();
836
837        let query = CypherQuery {
838            match_clauses: vec![MatchClause {
839                patterns: vec![GraphPattern::Path(path)],
840            }],
841            where_clause: None,
842            return_clause: ReturnClause {
843                distinct: false,
844                items: vec![],
845            },
846            limit: None,
847            order_by: None,
848            skip: None,
849        };
850
851        let mut analyzer = SemanticAnalyzer::new(custom_config);
852        let result = analyzer.analyze(&query).unwrap();
853        let r = result.variables.get("r").expect("variable r present");
854        // Types merged
855        assert!(r.labels.contains(&"KNOWS".to_string()));
856        assert!(r.labels.contains(&"FRIEND".to_string()));
857        // Properties unioned
858        assert!(r.properties.contains("since"));
859        assert!(r.properties.contains("level"));
860    }
861
862    #[test]
863    fn test_function_argument_undefined_variable_in_return() {
864        // RETURN toUpper(m.name)
865        let expr = ValueExpression::Function {
866            name: "toUpper".to_string(),
867            args: vec![ValueExpression::Property(PropertyRef::new("m", "name"))],
868        };
869        let result = analyze_return_expr(expr).unwrap();
870        assert!(result
871            .errors
872            .iter()
873            .any(|e| e.contains("Undefined variable: 'm'")));
874    }
875
876    #[test]
877    fn test_function_argument_valid_variable_ok() {
878        // MATCH (n:Person) RETURN toUpper(n.name)
879        let expr = ValueExpression::Function {
880            name: "toUpper".to_string(),
881            args: vec![ValueExpression::Property(PropertyRef::new("n", "name"))],
882        };
883        let result = analyze_return_with_match("n", "Person", expr).unwrap();
884        assert!(result.errors.is_empty());
885    }
886
887    #[test]
888    fn test_arithmetic_with_undefined_variable_in_return() {
889        // RETURN x + 1
890        let expr = ValueExpression::Arithmetic {
891            left: Box::new(ValueExpression::Variable("x".to_string())),
892            operator: ArithmeticOperator::Add,
893            right: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
894        };
895        let result = analyze_return_expr(expr).unwrap();
896        assert!(result
897            .errors
898            .iter()
899            .any(|e| e.contains("Undefined variable: 'x'")));
900    }
901
902    #[test]
903    fn test_arithmetic_with_defined_property_ok() {
904        let expr = ValueExpression::Arithmetic {
905            left: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
906            operator: ArithmeticOperator::Add,
907            right: Box::new(ValueExpression::Property(PropertyRef::new("n", "age"))),
908        };
909        let result = analyze_return_with_match("n", "Person", expr).unwrap();
910        // Should not report undefined variable 'n'
911        assert!(result
912            .errors
913            .iter()
914            .all(|e| !e.contains("Undefined variable: 'n'")));
915    }
916
917    #[test]
918    fn test_count_with_multiple_args_fails_validation() {
919        // COUNT(n.age, n.name) should fail semantic validation
920        let expr = ValueExpression::Function {
921            name: "count".to_string(),
922            args: vec![
923                ValueExpression::Property(PropertyRef::new("n", "age")),
924                ValueExpression::Property(PropertyRef::new("n", "name")),
925            ],
926        };
927        let result = analyze_return_with_match("n", "Person", expr).unwrap();
928        assert!(
929            result
930                .errors
931                .iter()
932                .any(|e| e.contains("COUNT requires exactly 1 argument")),
933            "Expected error about COUNT arity, got: {:?}",
934            result.errors
935        );
936    }
937
938    #[test]
939    fn test_count_with_zero_args_fails_validation() {
940        // COUNT() with no arguments should fail
941        let expr = ValueExpression::Function {
942            name: "count".to_string(),
943            args: vec![],
944        };
945        let result = analyze_return_with_match("n", "Person", expr).unwrap();
946        assert!(
947            result
948                .errors
949                .iter()
950                .any(|e| e.contains("COUNT requires exactly 1 argument")),
951            "Expected error about COUNT arity, got: {:?}",
952            result.errors
953        );
954    }
955
956    #[test]
957    fn test_count_with_one_arg_passes_validation() {
958        // COUNT(n.age) should pass validation
959        let expr = ValueExpression::Function {
960            name: "count".to_string(),
961            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
962        };
963        let result = analyze_return_with_match("n", "Person", expr).unwrap();
964        assert!(
965            result
966                .errors
967                .iter()
968                .all(|e| !e.contains("COUNT requires exactly 1 argument")),
969            "COUNT with 1 arg should not produce arity error, got: {:?}",
970            result.errors
971        );
972    }
973
974    #[test]
975    fn test_sum_with_variable_fails_validation() {
976        let expr = ValueExpression::Function {
977            name: "sum".to_string(),
978            args: vec![ValueExpression::Variable("n".to_string())],
979        };
980        let result = analyze_return_with_match("n", "Person", expr).unwrap();
981        assert!(
982            !result.errors.is_empty(),
983            "Expected SUM(variable) to produce validation errors"
984        );
985        let has_sum_error = result
986            .errors
987            .iter()
988            .any(|e| e.contains("SUM(n) is invalid") && e.contains("requires a property"));
989        assert!(
990            has_sum_error,
991            "Expected error about SUM requiring property, got: {:?}",
992            result.errors
993        );
994    }
995
996    #[test]
997    fn test_avg_with_variable_fails_validation() {
998        let expr = ValueExpression::Function {
999            name: "avg".to_string(),
1000            args: vec![ValueExpression::Variable("n".to_string())],
1001        };
1002        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1003        assert!(
1004            !result.errors.is_empty(),
1005            "Expected AVG(variable) to produce validation errors"
1006        );
1007        let has_avg_error = result
1008            .errors
1009            .iter()
1010            .any(|e| e.contains("AVG(n) is invalid") && e.contains("requires a property"));
1011        assert!(
1012            has_avg_error,
1013            "Expected error about AVG requiring property, got: {:?}",
1014            result.errors
1015        );
1016    }
1017
1018    #[test]
1019    fn test_sum_with_property_passes_validation() {
1020        let expr = ValueExpression::Function {
1021            name: "sum".to_string(),
1022            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1023        };
1024        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1025        assert!(
1026            result.errors.is_empty(),
1027            "SUM with property should pass validation, got errors: {:?}",
1028            result.errors
1029        );
1030    }
1031
1032    #[test]
1033    fn test_min_with_variable_fails_validation() {
1034        let expr = ValueExpression::Function {
1035            name: "min".to_string(),
1036            args: vec![ValueExpression::Variable("n".to_string())],
1037        };
1038        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1039        assert!(
1040            !result.errors.is_empty(),
1041            "Expected MIN(variable) to produce validation errors"
1042        );
1043        let has_min_error = result
1044            .errors
1045            .iter()
1046            .any(|e| e.contains("MIN(n) is invalid") && e.contains("requires a property"));
1047        assert!(
1048            has_min_error,
1049            "Expected error about MIN requiring property, got: {:?}",
1050            result.errors
1051        );
1052    }
1053
1054    #[test]
1055    fn test_max_with_variable_fails_validation() {
1056        let expr = ValueExpression::Function {
1057            name: "max".to_string(),
1058            args: vec![ValueExpression::Variable("n".to_string())],
1059        };
1060        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1061        assert!(
1062            !result.errors.is_empty(),
1063            "Expected MAX(variable) to produce validation errors"
1064        );
1065        let has_max_error = result
1066            .errors
1067            .iter()
1068            .any(|e| e.contains("MAX(n) is invalid") && e.contains("requires a property"));
1069        assert!(
1070            has_max_error,
1071            "Expected error about MAX requiring property, got: {:?}",
1072            result.errors
1073        );
1074    }
1075
1076    #[test]
1077    fn test_min_with_property_passes_validation() {
1078        let expr = ValueExpression::Function {
1079            name: "min".to_string(),
1080            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1081        };
1082        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1083        assert!(
1084            result.errors.is_empty(),
1085            "MIN with property should pass validation, got errors: {:?}",
1086            result.errors
1087        );
1088    }
1089
1090    #[test]
1091    fn test_max_with_property_passes_validation() {
1092        let expr = ValueExpression::Function {
1093            name: "max".to_string(),
1094            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1095        };
1096        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1097        assert!(
1098            result.errors.is_empty(),
1099            "MAX with property should pass validation, got errors: {:?}",
1100            result.errors
1101        );
1102    }
1103
1104    #[test]
1105    fn test_arithmetic_with_non_numeric_literal_error() {
1106        // RETURN "x" + 1
1107        let expr = ValueExpression::Arithmetic {
1108            left: Box::new(ValueExpression::Literal(PropertyValue::String(
1109                "x".to_string(),
1110            ))),
1111            operator: ArithmeticOperator::Add,
1112            right: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1113        };
1114        let result = analyze_return_expr(expr).unwrap();
1115        // The semantic analyzer returns Ok with errors collected in the result
1116        assert!(result
1117            .errors
1118            .iter()
1119            .any(|e| e.contains("Arithmetic requires numeric literal operands")));
1120    }
1121
1122    #[test]
1123    fn test_arithmetic_with_numeric_literals_ok() {
1124        // RETURN 1 + 2.0
1125        let expr = ValueExpression::Arithmetic {
1126            left: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1127            operator: ArithmeticOperator::Add,
1128            right: Box::new(ValueExpression::Literal(PropertyValue::Float(2.0))),
1129        };
1130        let result = analyze_return_expr(expr);
1131        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1132        assert!(result.unwrap().errors.is_empty());
1133    }
1134}