lance_graph/
semantic.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Semantic analysis for graph queries
5//!
6//! This module implements the semantic analysis phase of the query pipeline:
7//! Parse → **Semantic Analysis** → Logical Plan → Physical Plan
8//!
9//! Semantic analysis validates the query and enriches the AST with type information.
10
11use crate::ast::*;
12use crate::config::GraphConfig;
13use crate::error::{GraphError, Result};
14use std::collections::{HashMap, HashSet};
15
16/// Semantic analyzer - validates and enriches the AST
17pub struct SemanticAnalyzer {
18    config: GraphConfig,
19    variables: HashMap<String, VariableInfo>,
20    current_scope: ScopeType,
21}
22
23/// Information about a variable in the query
24#[derive(Debug, Clone)]
25pub struct VariableInfo {
26    pub name: String,
27    pub variable_type: VariableType,
28    pub labels: Vec<String>,
29    pub properties: HashSet<String>,
30    pub defined_in: ScopeType,
31}
32
33/// Type of a variable
34#[derive(Debug, Clone, PartialEq)]
35pub enum VariableType {
36    Node,
37    Relationship,
38    Path,
39    Property,
40}
41
42/// Scope where a variable is defined
43#[derive(Debug, Clone, PartialEq)]
44pub enum ScopeType {
45    Match,
46    Where,
47    Return,
48    OrderBy,
49}
50
51/// Semantic analysis result with validated and enriched AST
52#[derive(Debug, Clone)]
53pub struct SemanticResult {
54    pub query: CypherQuery,
55    pub variables: HashMap<String, VariableInfo>,
56    pub errors: Vec<String>,
57    pub warnings: Vec<String>,
58}
59
60impl SemanticAnalyzer {
61    pub fn new(config: GraphConfig) -> Self {
62        Self {
63            config,
64            variables: HashMap::new(),
65            current_scope: ScopeType::Match,
66        }
67    }
68
69    /// Analyze a Cypher query AST
70    pub fn analyze(&mut self, query: &CypherQuery) -> Result<SemanticResult> {
71        let mut errors = Vec::new();
72        let mut warnings = Vec::new();
73
74        // Phase 1: Variable discovery in MATCH clauses
75        self.current_scope = ScopeType::Match;
76        for match_clause in &query.match_clauses {
77            if let Err(e) = self.analyze_match_clause(match_clause) {
78                errors.push(format!("MATCH clause error: {}", e));
79            }
80        }
81
82        // Phase 2: Validate WHERE clause
83        if let Some(where_clause) = &query.where_clause {
84            self.current_scope = ScopeType::Where;
85            if let Err(e) = self.analyze_where_clause(where_clause) {
86                errors.push(format!("WHERE clause error: {}", e));
87            }
88        }
89
90        // Phase 3: Validate RETURN clause
91        self.current_scope = ScopeType::Return;
92        if let Err(e) = self.analyze_return_clause(&query.return_clause) {
93            errors.push(format!("RETURN clause error: {}", e));
94        }
95
96        // Phase 4: Validate ORDER BY clause
97        if let Some(order_by) = &query.order_by {
98            self.current_scope = ScopeType::OrderBy;
99            if let Err(e) = self.analyze_order_by_clause(order_by) {
100                errors.push(format!("ORDER BY clause error: {}", e));
101            }
102        }
103
104        // Phase 5: Schema validation
105        self.validate_schema(&mut warnings);
106
107        // Phase 6: Type checking
108        self.validate_types(&mut errors);
109
110        Ok(SemanticResult {
111            query: query.clone(),
112            variables: self.variables.clone(),
113            errors,
114            warnings,
115        })
116    }
117
118    /// Analyze MATCH clause and discover variables
119    fn analyze_match_clause(&mut self, match_clause: &MatchClause) -> Result<()> {
120        for pattern in &match_clause.patterns {
121            self.analyze_graph_pattern(pattern)?;
122        }
123        Ok(())
124    }
125
126    /// Analyze a graph pattern and register variables
127    fn analyze_graph_pattern(&mut self, pattern: &GraphPattern) -> Result<()> {
128        match pattern {
129            GraphPattern::Node(node) => {
130                self.register_node_variable(node)?;
131            }
132            GraphPattern::Path(path) => {
133                // Register start node
134                self.register_node_variable(&path.start_node)?;
135
136                // Register variables in each segment
137                for segment in &path.segments {
138                    // Validate relationship length constraints if present
139                    self.validate_length_range(&segment.relationship)?;
140                    // Register relationship variable if present
141                    if let Some(rel_var) = &segment.relationship.variable {
142                        self.register_relationship_variable(rel_var, &segment.relationship)?;
143                    }
144
145                    // Register end node
146                    self.register_node_variable(&segment.end_node)?;
147                }
148            }
149        }
150        Ok(())
151    }
152
153    /// Register a node variable
154    fn register_node_variable(&mut self, node: &NodePattern) -> Result<()> {
155        if let Some(var_name) = &node.variable {
156            if let Some(existing) = self.variables.get_mut(var_name) {
157                if existing.variable_type != VariableType::Node {
158                    return Err(GraphError::PlanError {
159                        message: format!("Variable '{}' redefined with different type", var_name),
160                        location: snafu::Location::new(file!(), line!(), column!()),
161                    });
162                }
163                for label in &node.labels {
164                    if !existing.labels.contains(label) {
165                        existing.labels.push(label.clone());
166                    }
167                }
168                for prop in node.properties.keys() {
169                    existing.properties.insert(prop.clone());
170                }
171            } else {
172                let var_info = VariableInfo {
173                    name: var_name.clone(),
174                    variable_type: VariableType::Node,
175                    labels: node.labels.clone(),
176                    properties: node.properties.keys().cloned().collect(),
177                    defined_in: self.current_scope.clone(),
178                };
179                self.variables.insert(var_name.clone(), var_info);
180            }
181        }
182        Ok(())
183    }
184
185    /// Register a relationship variable
186    fn register_relationship_variable(
187        &mut self,
188        var_name: &str,
189        rel: &RelationshipPattern,
190    ) -> Result<()> {
191        if let Some(existing) = self.variables.get_mut(var_name) {
192            if existing.variable_type != VariableType::Relationship {
193                return Err(GraphError::PlanError {
194                    message: format!("Variable '{}' redefined with different type", var_name),
195                    location: snafu::Location::new(file!(), line!(), column!()),
196                });
197            }
198            for rel_type in &rel.types {
199                if !existing.labels.contains(rel_type) {
200                    existing.labels.push(rel_type.clone());
201                }
202            }
203            for prop in rel.properties.keys() {
204                existing.properties.insert(prop.clone());
205            }
206        } else {
207            let var_info = VariableInfo {
208                name: var_name.to_string(),
209                variable_type: VariableType::Relationship,
210                labels: rel.types.clone(), // Relationship types are like labels
211                properties: rel.properties.keys().cloned().collect(),
212                defined_in: self.current_scope.clone(),
213            };
214            self.variables.insert(var_name.to_string(), var_info);
215        }
216        Ok(())
217    }
218
219    /// Analyze WHERE clause
220    fn analyze_where_clause(&mut self, where_clause: &WhereClause) -> Result<()> {
221        self.analyze_boolean_expression(&where_clause.expression)
222    }
223
224    /// Analyze boolean expression and check variable references
225    fn analyze_boolean_expression(&mut self, expr: &BooleanExpression) -> Result<()> {
226        match expr {
227            BooleanExpression::Comparison { left, right, .. } => {
228                self.analyze_value_expression(left)?;
229                self.analyze_value_expression(right)?;
230            }
231            BooleanExpression::And(left, right) | BooleanExpression::Or(left, right) => {
232                self.analyze_boolean_expression(left)?;
233                self.analyze_boolean_expression(right)?;
234            }
235            BooleanExpression::Not(inner) => {
236                self.analyze_boolean_expression(inner)?;
237            }
238            BooleanExpression::Exists(prop_ref) => {
239                self.validate_property_reference(prop_ref)?;
240            }
241            BooleanExpression::In { expression, list } => {
242                self.analyze_value_expression(expression)?;
243                for item in list {
244                    self.analyze_value_expression(item)?;
245                }
246            }
247            BooleanExpression::Like { expression, .. } => {
248                self.analyze_value_expression(expression)?;
249            }
250            BooleanExpression::ILike { expression, .. } => {
251                self.analyze_value_expression(expression)?;
252            }
253            BooleanExpression::Contains { expression, .. } => {
254                self.analyze_value_expression(expression)?;
255            }
256            BooleanExpression::StartsWith { expression, .. } => {
257                self.analyze_value_expression(expression)?;
258            }
259            BooleanExpression::EndsWith { expression, .. } => {
260                self.analyze_value_expression(expression)?;
261            }
262            BooleanExpression::IsNull(expression) => {
263                self.analyze_value_expression(expression)?;
264            }
265            BooleanExpression::IsNotNull(expression) => {
266                self.analyze_value_expression(expression)?;
267            }
268        }
269        Ok(())
270    }
271
272    /// Analyze value expression and check variable references
273    fn analyze_value_expression(&mut self, expr: &ValueExpression) -> Result<()> {
274        match expr {
275            ValueExpression::Property(prop_ref) => {
276                self.validate_property_reference(prop_ref)?;
277            }
278            ValueExpression::Literal(_) => {
279                // Literals are always valid
280            }
281            ValueExpression::Variable(var) => {
282                if !self.variables.contains_key(var) {
283                    return Err(GraphError::PlanError {
284                        message: format!("Undefined variable: '{}'", var),
285                        location: snafu::Location::new(file!(), line!(), column!()),
286                    });
287                }
288            }
289            ValueExpression::Function { name, args } => {
290                // Validate function-specific arity and signature rules
291                match name.to_lowercase().as_str() {
292                    "count" | "sum" | "avg" | "min" | "max" => {
293                        if args.len() != 1 {
294                            return Err(GraphError::PlanError {
295                                message: format!(
296                                    "{} requires exactly 1 argument, got {}",
297                                    name.to_uppercase(),
298                                    args.len()
299                                ),
300                                location: snafu::Location::new(file!(), line!(), column!()),
301                            });
302                        }
303
304                        // Additional validation for SUM, AVG, MIN, MAX: they require properties, not bare variables
305                        // Only COUNT allows bare variables (COUNT(*) or COUNT(p))
306                        if matches!(name.to_lowercase().as_str(), "sum" | "avg" | "min" | "max") {
307                            if let Some(ValueExpression::Variable(v)) = args.first() {
308                                return Err(GraphError::PlanError {
309                                    message: format!(
310                                        "{}({}) is invalid - {} requires a property like {}({}.property). You cannot {} a node/entity.",
311                                        name.to_uppercase(), v, name.to_uppercase(), name.to_uppercase(), v, name.to_lowercase()
312                                    ),
313                                    location: snafu::Location::new(file!(), line!(), column!()),
314                                });
315                            }
316                        }
317                    }
318                    _ => {
319                        // Other functions - no validation yet
320                    }
321                }
322
323                // Validate arguments recursively
324                for arg in args {
325                    self.analyze_value_expression(arg)?;
326                }
327            }
328            ValueExpression::Arithmetic { left, right, .. } => {
329                // Validate arithmetic operands recursively
330                self.analyze_value_expression(left)?;
331                self.analyze_value_expression(right)?;
332
333                // If both sides are literals, ensure they are numeric
334                let is_numeric_literal = |pv: &PropertyValue| {
335                    matches!(pv, PropertyValue::Integer(_) | PropertyValue::Float(_))
336                };
337
338                if let (ValueExpression::Literal(l1), ValueExpression::Literal(l2)) =
339                    (&**left, &**right)
340                {
341                    if !(is_numeric_literal(l1) && is_numeric_literal(l2)) {
342                        return Err(GraphError::PlanError {
343                            message: "Arithmetic requires numeric literal operands".to_string(),
344                            location: snafu::Location::new(file!(), line!(), column!()),
345                        });
346                    }
347                }
348            }
349            ValueExpression::VectorDistance { left, right, .. } => {
350                // Validate vector distance function arguments
351                self.analyze_value_expression(left)?;
352                self.analyze_value_expression(right)?;
353
354                // Check that at least one argument references a property
355                let has_property = matches!(**left, ValueExpression::Property(_))
356                    || matches!(**right, ValueExpression::Property(_));
357
358                if !has_property {
359                    return Err(GraphError::PlanError {
360                        message: "vector_distance() requires at least one argument to be a property reference".to_string(),
361                        location: snafu::Location::new(file!(), line!(), column!()),
362                    });
363                }
364            }
365            ValueExpression::VectorSimilarity { left, right, .. } => {
366                // Validate vector similarity function arguments
367                self.analyze_value_expression(left)?;
368                self.analyze_value_expression(right)?;
369
370                // Check that at least one argument references a property
371                let has_property = matches!(**left, ValueExpression::Property(_))
372                    || matches!(**right, ValueExpression::Property(_));
373
374                if !has_property {
375                    return Err(GraphError::PlanError {
376                        message: "vector_similarity() requires at least one argument to be a property reference".to_string(),
377                        location: snafu::Location::new(file!(), line!(), column!()),
378                    });
379                }
380            }
381            ValueExpression::VectorLiteral(values) => {
382                // Validate non-empty
383                if values.is_empty() {
384                    return Err(GraphError::PlanError {
385                        message: "Vector literal cannot be empty".to_string(),
386                        location: snafu::Location::new(file!(), line!(), column!()),
387                    });
388                }
389
390                // Note: Very large vectors (>4096 dimensions) may impact performance
391                // but we don't enforce a hard limit here
392            }
393            ValueExpression::Parameter(_) => {
394                // Parameters are always valid (resolved at runtime)
395            }
396        }
397        Ok(())
398    }
399
400    /// Validate property reference
401    fn validate_property_reference(&self, prop_ref: &PropertyRef) -> Result<()> {
402        if !self.variables.contains_key(&prop_ref.variable) {
403            return Err(GraphError::PlanError {
404                message: format!("Undefined variable: '{}'", prop_ref.variable),
405                location: snafu::Location::new(file!(), line!(), column!()),
406            });
407        }
408        Ok(())
409    }
410
411    /// Analyze RETURN clause
412    fn analyze_return_clause(&mut self, return_clause: &ReturnClause) -> Result<()> {
413        for item in &return_clause.items {
414            self.analyze_value_expression(&item.expression)?;
415        }
416        Ok(())
417    }
418
419    /// Analyze ORDER BY clause
420    fn analyze_order_by_clause(&mut self, order_by: &OrderByClause) -> Result<()> {
421        for item in &order_by.items {
422            self.analyze_value_expression(&item.expression)?;
423        }
424        Ok(())
425    }
426
427    /// Validate schema references against configuration
428    fn validate_schema(&self, warnings: &mut Vec<String>) {
429        for var_info in self.variables.values() {
430            match var_info.variable_type {
431                VariableType::Node => {
432                    for label in &var_info.labels {
433                        if self.config.get_node_mapping(label).is_none() {
434                            warnings.push(format!("Node label '{}' not found in schema", label));
435                        }
436                    }
437                }
438                VariableType::Relationship => {
439                    for rel_type in &var_info.labels {
440                        if self.config.get_relationship_mapping(rel_type).is_none() {
441                            warnings.push(format!(
442                                "Relationship type '{}' not found in schema",
443                                rel_type
444                            ));
445                        }
446                    }
447                }
448                _ => {}
449            }
450        }
451    }
452
453    /// Validate types and operations
454    fn validate_types(&self, errors: &mut Vec<String>) {
455        // TODO: Implement type checking
456        // - Check that properties exist on nodes/relationships
457        // - Check that comparison operations are valid for data types
458        // - Check that arithmetic operations are valid
459
460        // Check that properties referenced in patterns exist in schema when property fields are defined
461        for var_info in self.variables.values() {
462            match var_info.variable_type {
463                VariableType::Node => {
464                    // Collect property_fields from all known label mappings that specify properties
465                    let mut label_property_sets: Vec<&[String]> = Vec::new();
466                    for label in &var_info.labels {
467                        if let Some(mapping) = self.config.get_node_mapping(label) {
468                            if !mapping.property_fields.is_empty() {
469                                label_property_sets.push(&mapping.property_fields);
470                            }
471                        }
472                    }
473
474                    if !label_property_sets.is_empty() {
475                        'prop: for prop in &var_info.properties {
476                            // Property is valid if present in at least one label's property_fields
477                            for fields in &label_property_sets {
478                                if fields.iter().any(|f| f == prop) {
479                                    continue 'prop;
480                                }
481                            }
482                            errors.push(format!(
483                                "Property '{}' not found on labels {:?}",
484                                prop, var_info.labels
485                            ));
486                        }
487                    }
488                }
489                VariableType::Relationship => {
490                    // Collect property_fields from all known relationship mappings that specify properties
491                    let mut rel_property_sets: Vec<&[String]> = Vec::new();
492                    for rel_type in &var_info.labels {
493                        if let Some(mapping) = self.config.get_relationship_mapping(rel_type) {
494                            if !mapping.property_fields.is_empty() {
495                                rel_property_sets.push(&mapping.property_fields);
496                            }
497                        }
498                    }
499
500                    if !rel_property_sets.is_empty() {
501                        'prop_rel: for prop in &var_info.properties {
502                            for fields in &rel_property_sets {
503                                if fields.iter().any(|f| f == prop) {
504                                    continue 'prop_rel;
505                                }
506                            }
507                            errors.push(format!(
508                                "Property '{}' not found on relationship types {:?}",
509                                prop, var_info.labels
510                            ));
511                        }
512                    }
513                }
514                _ => {}
515            }
516        }
517    }
518}
519
520impl SemanticAnalyzer {
521    fn validate_length_range(&self, rel: &RelationshipPattern) -> Result<()> {
522        if let Some(len) = &rel.length {
523            if let (Some(min), Some(max)) = (len.min, len.max) {
524                if min > max {
525                    return Err(GraphError::PlanError {
526                        message: "Invalid path length range: min > max".to_string(),
527                        location: snafu::Location::new(file!(), line!(), column!()),
528                    });
529                }
530            }
531        }
532        Ok(())
533    }
534}
535
536#[cfg(test)]
537mod tests {
538    use super::*;
539    use crate::ast::{
540        ArithmeticOperator, BooleanExpression, CypherQuery, GraphPattern, LengthRange, MatchClause,
541        NodePattern, PathPattern, PathSegment, PropertyRef, PropertyValue, RelationshipDirection,
542        RelationshipPattern, ReturnClause, ReturnItem, ValueExpression, WhereClause,
543    };
544    use crate::config::{GraphConfig, NodeMapping};
545
546    fn test_config() -> GraphConfig {
547        GraphConfig::builder()
548            .with_node_label("Person", "id")
549            .with_node_label("Employee", "id")
550            .with_node_label("Company", "id")
551            .with_relationship("KNOWS", "src_id", "dst_id")
552            .build()
553            .unwrap()
554    }
555
556    // Helper: analyze a query that only has a single RETURN expression
557    fn analyze_return_expr(expr: ValueExpression) -> Result<SemanticResult> {
558        let query = CypherQuery {
559            match_clauses: vec![],
560            where_clause: None,
561            return_clause: ReturnClause {
562                distinct: false,
563                items: vec![ReturnItem {
564                    expression: expr,
565                    alias: None,
566                }],
567            },
568            limit: None,
569            order_by: None,
570            skip: None,
571        };
572        let mut analyzer = SemanticAnalyzer::new(test_config());
573        analyzer.analyze(&query)
574    }
575
576    // Helper: analyze a query with a single MATCH (var:label) and a RETURN expression
577    fn analyze_return_with_match(
578        var: &str,
579        label: &str,
580        expr: ValueExpression,
581    ) -> Result<SemanticResult> {
582        let node = NodePattern::new(Some(var.to_string())).with_label(label);
583        let query = CypherQuery {
584            match_clauses: vec![MatchClause {
585                patterns: vec![GraphPattern::Node(node)],
586            }],
587            where_clause: None,
588            return_clause: ReturnClause {
589                distinct: false,
590                items: vec![ReturnItem {
591                    expression: expr,
592                    alias: None,
593                }],
594            },
595            limit: None,
596            order_by: None,
597            skip: None,
598        };
599        let mut analyzer = SemanticAnalyzer::new(test_config());
600        analyzer.analyze(&query)
601    }
602
603    #[test]
604    fn test_merge_node_variable_metadata() {
605        // MATCH (n:Person {age: 30}), (n:Employee {dept: "X"})
606        let node1 = NodePattern::new(Some("n".to_string()))
607            .with_label("Person")
608            .with_property("age", PropertyValue::Integer(30));
609        let node2 = NodePattern::new(Some("n".to_string()))
610            .with_label("Employee")
611            .with_property("dept", PropertyValue::String("X".to_string()));
612
613        let query = CypherQuery {
614            match_clauses: vec![MatchClause {
615                patterns: vec![GraphPattern::Node(node1), GraphPattern::Node(node2)],
616            }],
617            where_clause: None,
618            return_clause: ReturnClause {
619                distinct: false,
620                items: vec![],
621            },
622            limit: None,
623            order_by: None,
624            skip: None,
625        };
626
627        let mut analyzer = SemanticAnalyzer::new(test_config());
628        let result = analyzer.analyze(&query).unwrap();
629        assert!(result.errors.is_empty());
630        let n = result.variables.get("n").expect("variable n present");
631        // Labels merged
632        assert!(n.labels.contains(&"Person".to_string()));
633        assert!(n.labels.contains(&"Employee".to_string()));
634        // Properties unioned
635        assert!(n.properties.contains("age"));
636        assert!(n.properties.contains("dept"));
637    }
638
639    #[test]
640    fn test_invalid_length_range_collects_error() {
641        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
642        let end = NodePattern::new(Some("b".to_string())).with_label("Person");
643        let mut rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
644            .with_variable("r")
645            .with_type("KNOWS");
646        rel.length = Some(LengthRange {
647            min: Some(3),
648            max: Some(2),
649        });
650
651        let path = PathPattern {
652            start_node: start,
653            segments: vec![PathSegment {
654                relationship: rel,
655                end_node: end,
656            }],
657        };
658
659        let query = CypherQuery {
660            match_clauses: vec![MatchClause {
661                patterns: vec![GraphPattern::Path(path)],
662            }],
663            where_clause: None,
664            return_clause: ReturnClause {
665                distinct: false,
666                items: vec![],
667            },
668            limit: None,
669            order_by: None,
670            skip: None,
671        };
672
673        let mut analyzer = SemanticAnalyzer::new(test_config());
674        let result = analyzer.analyze(&query).unwrap();
675        assert!(result
676            .errors
677            .iter()
678            .any(|e| e.contains("Invalid path length range")));
679    }
680
681    #[test]
682    fn test_undefined_variable_in_where() {
683        // MATCH (n:Person) WHERE EXISTS(m.name)
684        let node = NodePattern::new(Some("n".to_string())).with_label("Person");
685        let where_clause = WhereClause {
686            expression: BooleanExpression::Exists(PropertyRef::new("m", "name")),
687        };
688        let query = CypherQuery {
689            match_clauses: vec![MatchClause {
690                patterns: vec![GraphPattern::Node(node)],
691            }],
692            where_clause: Some(where_clause),
693            return_clause: ReturnClause {
694                distinct: false,
695                items: vec![],
696            },
697            limit: None,
698            order_by: None,
699            skip: None,
700        };
701
702        let mut analyzer = SemanticAnalyzer::new(test_config());
703        let result = analyzer.analyze(&query).unwrap();
704        assert!(result
705            .errors
706            .iter()
707            .any(|e| e.contains("Undefined variable: 'm'")));
708    }
709
710    #[test]
711    fn test_variable_redefinition_between_node_and_relationship() {
712        // MATCH (n:Person)-[n:KNOWS]->(m:Person)
713        let start = NodePattern::new(Some("n".to_string())).with_label("Person");
714        let end = NodePattern::new(Some("m".to_string())).with_label("Person");
715        let rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
716            .with_variable("n")
717            .with_type("KNOWS");
718
719        let path = PathPattern {
720            start_node: start,
721            segments: vec![PathSegment {
722                relationship: rel,
723                end_node: end,
724            }],
725        };
726
727        let query = CypherQuery {
728            match_clauses: vec![MatchClause {
729                patterns: vec![GraphPattern::Path(path)],
730            }],
731            where_clause: None,
732            return_clause: ReturnClause {
733                distinct: false,
734                items: vec![],
735            },
736            limit: None,
737            order_by: None,
738            skip: None,
739        };
740
741        let mut analyzer = SemanticAnalyzer::new(test_config());
742        let result = analyzer.analyze(&query).unwrap();
743        assert!(result
744            .errors
745            .iter()
746            .any(|e| e.contains("redefined with different type")));
747    }
748
749    #[test]
750    fn test_unknown_node_label_warns() {
751        // MATCH (x:Unknown)
752        let node = NodePattern::new(Some("x".to_string())).with_label("Unknown");
753        let query = CypherQuery {
754            match_clauses: vec![MatchClause {
755                patterns: vec![GraphPattern::Node(node)],
756            }],
757            where_clause: None,
758            return_clause: ReturnClause {
759                distinct: false,
760                items: vec![],
761            },
762            limit: None,
763            order_by: None,
764            skip: None,
765        };
766
767        let mut analyzer = SemanticAnalyzer::new(test_config());
768        let result = analyzer.analyze(&query).unwrap();
769        assert!(result
770            .warnings
771            .iter()
772            .any(|w| w.contains("Node label 'Unknown' not found in schema")));
773    }
774
775    #[test]
776    fn test_property_not_in_schema_reports_error() {
777        // Configure Person with allowed property 'name' only
778        let custom_config = GraphConfig::builder()
779            .with_node_mapping(
780                NodeMapping::new("Person", "id").with_properties(vec!["name".to_string()]),
781            )
782            .with_relationship("KNOWS", "src_id", "dst_id")
783            .build()
784            .unwrap();
785
786        // MATCH (n:Person {age: 30})
787        let node = NodePattern::new(Some("n".to_string()))
788            .with_label("Person")
789            .with_property("age", PropertyValue::Integer(30));
790        let query = CypherQuery {
791            match_clauses: vec![MatchClause {
792                patterns: vec![GraphPattern::Node(node)],
793            }],
794            where_clause: None,
795            return_clause: ReturnClause {
796                distinct: false,
797                items: vec![],
798            },
799            limit: None,
800            order_by: None,
801            skip: None,
802        };
803
804        let mut analyzer = SemanticAnalyzer::new(custom_config);
805        let result = analyzer.analyze(&query).unwrap();
806        assert!(result
807            .errors
808            .iter()
809            .any(|e| e.contains("Property 'age' not found on labels [\"Person\"]")));
810    }
811
812    #[test]
813    fn test_valid_length_range_ok() {
814        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
815        let end = NodePattern::new(Some("b".to_string())).with_label("Person");
816        let mut rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
817            .with_variable("r")
818            .with_type("KNOWS");
819        rel.length = Some(LengthRange {
820            min: Some(2),
821            max: Some(3),
822        });
823
824        let path = PathPattern {
825            start_node: start,
826            segments: vec![PathSegment {
827                relationship: rel,
828                end_node: end,
829            }],
830        };
831
832        let query = CypherQuery {
833            match_clauses: vec![MatchClause {
834                patterns: vec![GraphPattern::Path(path)],
835            }],
836            where_clause: None,
837            return_clause: ReturnClause {
838                distinct: false,
839                items: vec![],
840            },
841            limit: None,
842            order_by: None,
843            skip: None,
844        };
845
846        let mut analyzer = SemanticAnalyzer::new(test_config());
847        let result = analyzer.analyze(&query).unwrap();
848        assert!(result
849            .errors
850            .iter()
851            .all(|e| !e.contains("Invalid path length range")));
852    }
853
854    #[test]
855    fn test_relationship_variable_metadata_merge_across_segments() {
856        // Path with two segments sharing the same relationship variable 'r'
857        // (a:Person)-[r:KNOWS {since: 2020}]->(b:Person)-[r:FRIEND {level: 1}]->(c:Person)
858        let start = NodePattern::new(Some("a".to_string())).with_label("Person");
859        let mid = NodePattern::new(Some("b".to_string())).with_label("Person");
860        let end = NodePattern::new(Some("c".to_string())).with_label("Person");
861
862        let mut rel1 = RelationshipPattern::new(RelationshipDirection::Outgoing)
863            .with_variable("r")
864            .with_type("KNOWS")
865            .with_property("since", PropertyValue::Integer(2020));
866        rel1.length = None;
867
868        let mut rel2 = RelationshipPattern::new(RelationshipDirection::Outgoing)
869            .with_variable("r")
870            .with_type("FRIEND")
871            .with_property("level", PropertyValue::Integer(1));
872        rel2.length = None;
873
874        let path = PathPattern {
875            start_node: start,
876            segments: vec![
877                PathSegment {
878                    relationship: rel1,
879                    end_node: mid,
880                },
881                PathSegment {
882                    relationship: rel2,
883                    end_node: end,
884                },
885            ],
886        };
887
888        // Custom config that knows both relationship types to avoid warnings muddying the assertion
889        let custom_config = GraphConfig::builder()
890            .with_node_label("Person", "id")
891            .with_relationship("KNOWS", "src_id", "dst_id")
892            .with_relationship("FRIEND", "src_id", "dst_id")
893            .build()
894            .unwrap();
895
896        let query = CypherQuery {
897            match_clauses: vec![MatchClause {
898                patterns: vec![GraphPattern::Path(path)],
899            }],
900            where_clause: None,
901            return_clause: ReturnClause {
902                distinct: false,
903                items: vec![],
904            },
905            limit: None,
906            order_by: None,
907            skip: None,
908        };
909
910        let mut analyzer = SemanticAnalyzer::new(custom_config);
911        let result = analyzer.analyze(&query).unwrap();
912        let r = result.variables.get("r").expect("variable r present");
913        // Types merged
914        assert!(r.labels.contains(&"KNOWS".to_string()));
915        assert!(r.labels.contains(&"FRIEND".to_string()));
916        // Properties unioned
917        assert!(r.properties.contains("since"));
918        assert!(r.properties.contains("level"));
919    }
920
921    #[test]
922    fn test_function_argument_undefined_variable_in_return() {
923        // RETURN toUpper(m.name)
924        let expr = ValueExpression::Function {
925            name: "toUpper".to_string(),
926            args: vec![ValueExpression::Property(PropertyRef::new("m", "name"))],
927        };
928        let result = analyze_return_expr(expr).unwrap();
929        assert!(result
930            .errors
931            .iter()
932            .any(|e| e.contains("Undefined variable: 'm'")));
933    }
934
935    #[test]
936    fn test_function_argument_valid_variable_ok() {
937        // MATCH (n:Person) RETURN toUpper(n.name)
938        let expr = ValueExpression::Function {
939            name: "toUpper".to_string(),
940            args: vec![ValueExpression::Property(PropertyRef::new("n", "name"))],
941        };
942        let result = analyze_return_with_match("n", "Person", expr).unwrap();
943        assert!(result.errors.is_empty());
944    }
945
946    #[test]
947    fn test_arithmetic_with_undefined_variable_in_return() {
948        // RETURN x + 1
949        let expr = ValueExpression::Arithmetic {
950            left: Box::new(ValueExpression::Variable("x".to_string())),
951            operator: ArithmeticOperator::Add,
952            right: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
953        };
954        let result = analyze_return_expr(expr).unwrap();
955        assert!(result
956            .errors
957            .iter()
958            .any(|e| e.contains("Undefined variable: 'x'")));
959    }
960
961    #[test]
962    fn test_arithmetic_with_defined_property_ok() {
963        let expr = ValueExpression::Arithmetic {
964            left: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
965            operator: ArithmeticOperator::Add,
966            right: Box::new(ValueExpression::Property(PropertyRef::new("n", "age"))),
967        };
968        let result = analyze_return_with_match("n", "Person", expr).unwrap();
969        // Should not report undefined variable 'n'
970        assert!(result
971            .errors
972            .iter()
973            .all(|e| !e.contains("Undefined variable: 'n'")));
974    }
975
976    #[test]
977    fn test_count_with_multiple_args_fails_validation() {
978        // COUNT(n.age, n.name) should fail semantic validation
979        let expr = ValueExpression::Function {
980            name: "count".to_string(),
981            args: vec![
982                ValueExpression::Property(PropertyRef::new("n", "age")),
983                ValueExpression::Property(PropertyRef::new("n", "name")),
984            ],
985        };
986        let result = analyze_return_with_match("n", "Person", expr).unwrap();
987        assert!(
988            result
989                .errors
990                .iter()
991                .any(|e| e.contains("COUNT requires exactly 1 argument")),
992            "Expected error about COUNT arity, got: {:?}",
993            result.errors
994        );
995    }
996
997    #[test]
998    fn test_count_with_zero_args_fails_validation() {
999        // COUNT() with no arguments should fail
1000        let expr = ValueExpression::Function {
1001            name: "count".to_string(),
1002            args: vec![],
1003        };
1004        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1005        assert!(
1006            result
1007                .errors
1008                .iter()
1009                .any(|e| e.contains("COUNT requires exactly 1 argument")),
1010            "Expected error about COUNT arity, got: {:?}",
1011            result.errors
1012        );
1013    }
1014
1015    #[test]
1016    fn test_count_with_one_arg_passes_validation() {
1017        // COUNT(n.age) should pass validation
1018        let expr = ValueExpression::Function {
1019            name: "count".to_string(),
1020            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1021        };
1022        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1023        assert!(
1024            result
1025                .errors
1026                .iter()
1027                .all(|e| !e.contains("COUNT requires exactly 1 argument")),
1028            "COUNT with 1 arg should not produce arity error, got: {:?}",
1029            result.errors
1030        );
1031    }
1032
1033    #[test]
1034    fn test_sum_with_variable_fails_validation() {
1035        let expr = ValueExpression::Function {
1036            name: "sum".to_string(),
1037            args: vec![ValueExpression::Variable("n".to_string())],
1038        };
1039        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1040        assert!(
1041            !result.errors.is_empty(),
1042            "Expected SUM(variable) to produce validation errors"
1043        );
1044        let has_sum_error = result
1045            .errors
1046            .iter()
1047            .any(|e| e.contains("SUM(n) is invalid") && e.contains("requires a property"));
1048        assert!(
1049            has_sum_error,
1050            "Expected error about SUM requiring property, got: {:?}",
1051            result.errors
1052        );
1053    }
1054
1055    #[test]
1056    fn test_avg_with_variable_fails_validation() {
1057        let expr = ValueExpression::Function {
1058            name: "avg".to_string(),
1059            args: vec![ValueExpression::Variable("n".to_string())],
1060        };
1061        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1062        assert!(
1063            !result.errors.is_empty(),
1064            "Expected AVG(variable) to produce validation errors"
1065        );
1066        let has_avg_error = result
1067            .errors
1068            .iter()
1069            .any(|e| e.contains("AVG(n) is invalid") && e.contains("requires a property"));
1070        assert!(
1071            has_avg_error,
1072            "Expected error about AVG requiring property, got: {:?}",
1073            result.errors
1074        );
1075    }
1076
1077    #[test]
1078    fn test_sum_with_property_passes_validation() {
1079        let expr = ValueExpression::Function {
1080            name: "sum".to_string(),
1081            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1082        };
1083        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1084        assert!(
1085            result.errors.is_empty(),
1086            "SUM with property should pass validation, got errors: {:?}",
1087            result.errors
1088        );
1089    }
1090
1091    #[test]
1092    fn test_min_with_variable_fails_validation() {
1093        let expr = ValueExpression::Function {
1094            name: "min".to_string(),
1095            args: vec![ValueExpression::Variable("n".to_string())],
1096        };
1097        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1098        assert!(
1099            !result.errors.is_empty(),
1100            "Expected MIN(variable) to produce validation errors"
1101        );
1102        let has_min_error = result
1103            .errors
1104            .iter()
1105            .any(|e| e.contains("MIN(n) is invalid") && e.contains("requires a property"));
1106        assert!(
1107            has_min_error,
1108            "Expected error about MIN requiring property, got: {:?}",
1109            result.errors
1110        );
1111    }
1112
1113    #[test]
1114    fn test_max_with_variable_fails_validation() {
1115        let expr = ValueExpression::Function {
1116            name: "max".to_string(),
1117            args: vec![ValueExpression::Variable("n".to_string())],
1118        };
1119        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1120        assert!(
1121            !result.errors.is_empty(),
1122            "Expected MAX(variable) to produce validation errors"
1123        );
1124        let has_max_error = result
1125            .errors
1126            .iter()
1127            .any(|e| e.contains("MAX(n) is invalid") && e.contains("requires a property"));
1128        assert!(
1129            has_max_error,
1130            "Expected error about MAX requiring property, got: {:?}",
1131            result.errors
1132        );
1133    }
1134
1135    #[test]
1136    fn test_min_with_property_passes_validation() {
1137        let expr = ValueExpression::Function {
1138            name: "min".to_string(),
1139            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1140        };
1141        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1142        assert!(
1143            result.errors.is_empty(),
1144            "MIN with property should pass validation, got errors: {:?}",
1145            result.errors
1146        );
1147    }
1148
1149    #[test]
1150    fn test_max_with_property_passes_validation() {
1151        let expr = ValueExpression::Function {
1152            name: "max".to_string(),
1153            args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
1154        };
1155        let result = analyze_return_with_match("n", "Person", expr).unwrap();
1156        assert!(
1157            result.errors.is_empty(),
1158            "MAX with property should pass validation, got errors: {:?}",
1159            result.errors
1160        );
1161    }
1162
1163    #[test]
1164    fn test_arithmetic_with_non_numeric_literal_error() {
1165        // RETURN "x" + 1
1166        let expr = ValueExpression::Arithmetic {
1167            left: Box::new(ValueExpression::Literal(PropertyValue::String(
1168                "x".to_string(),
1169            ))),
1170            operator: ArithmeticOperator::Add,
1171            right: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1172        };
1173        let result = analyze_return_expr(expr).unwrap();
1174        // The semantic analyzer returns Ok with errors collected in the result
1175        assert!(result
1176            .errors
1177            .iter()
1178            .any(|e| e.contains("Arithmetic requires numeric literal operands")));
1179    }
1180
1181    #[test]
1182    fn test_arithmetic_with_numeric_literals_ok() {
1183        // RETURN 1 + 2.0
1184        let expr = ValueExpression::Arithmetic {
1185            left: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
1186            operator: ArithmeticOperator::Add,
1187            right: Box::new(ValueExpression::Literal(PropertyValue::Float(2.0))),
1188        };
1189        let result = analyze_return_expr(expr);
1190        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1191        assert!(result.unwrap().errors.is_empty());
1192    }
1193
1194    #[test]
1195    fn test_vector_distance_with_property() {
1196        use crate::ast::DistanceMetric;
1197
1198        // MATCH (p:Person) RETURN vector_distance(p.embedding, p.embedding, l2)
1199        let expr = ValueExpression::VectorDistance {
1200            left: Box::new(ValueExpression::Property(PropertyRef {
1201                variable: "p".to_string(),
1202                property: "embedding".to_string(),
1203            })),
1204            right: Box::new(ValueExpression::Property(PropertyRef {
1205                variable: "p".to_string(),
1206                property: "embedding".to_string(),
1207            })),
1208            metric: DistanceMetric::L2,
1209        };
1210
1211        let result = analyze_return_with_match("p", "Person", expr);
1212        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1213        assert!(result.unwrap().errors.is_empty());
1214    }
1215
1216    #[test]
1217    fn test_vector_distance_without_property_fails() {
1218        use crate::ast::DistanceMetric;
1219
1220        // MATCH (p:Person) RETURN vector_distance(0.5, 0.3, l2) - both literals, should fail
1221        let expr = ValueExpression::VectorDistance {
1222            left: Box::new(ValueExpression::Literal(PropertyValue::Float(0.5))),
1223            right: Box::new(ValueExpression::Literal(PropertyValue::Float(0.3))),
1224            metric: DistanceMetric::L2,
1225        };
1226
1227        let result = analyze_return_with_match("p", "Person", expr);
1228        // Semantic analyzer returns Ok but with errors in the result
1229        assert!(
1230            result.is_ok(),
1231            "Analyzer should return Ok with errors, got {:?}",
1232            result
1233        );
1234        let semantic_result = result.unwrap();
1235        assert!(
1236            !semantic_result.errors.is_empty(),
1237            "Expected validation errors"
1238        );
1239        assert!(semantic_result
1240            .errors
1241            .iter()
1242            .any(|e| e.contains("requires at least one argument to be a property")));
1243    }
1244
1245    #[test]
1246    fn test_vector_similarity_with_property() {
1247        use crate::ast::DistanceMetric;
1248
1249        // MATCH (p:Person) RETURN vector_similarity(p.embedding, p.embedding, cosine)
1250        let expr = ValueExpression::VectorSimilarity {
1251            left: Box::new(ValueExpression::Property(PropertyRef {
1252                variable: "p".to_string(),
1253                property: "embedding".to_string(),
1254            })),
1255            right: Box::new(ValueExpression::Property(PropertyRef {
1256                variable: "p".to_string(),
1257                property: "embedding".to_string(),
1258            })),
1259            metric: DistanceMetric::Cosine,
1260        };
1261
1262        let result = analyze_return_with_match("p", "Person", expr);
1263        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1264        assert!(result.unwrap().errors.is_empty());
1265    }
1266
1267    #[test]
1268    fn test_vector_similarity_one_literal_ok() {
1269        use crate::ast::DistanceMetric;
1270
1271        // MATCH (p:Person) RETURN vector_similarity(p.embedding, 0.5, cosine)
1272        // One property reference is sufficient
1273        let expr = ValueExpression::VectorSimilarity {
1274            left: Box::new(ValueExpression::Property(PropertyRef {
1275                variable: "p".to_string(),
1276                property: "embedding".to_string(),
1277            })),
1278            right: Box::new(ValueExpression::Literal(PropertyValue::Float(0.5))),
1279            metric: DistanceMetric::Cosine,
1280        };
1281
1282        let result = analyze_return_with_match("p", "Person", expr);
1283        assert!(result.is_ok(), "Expected Ok but got {:?}", result);
1284        assert!(result.unwrap().errors.is_empty());
1285    }
1286
1287    #[test]
1288    fn test_vector_distance_all_metrics() {
1289        use crate::ast::DistanceMetric;
1290
1291        // Test all distance metrics are accepted
1292        for metric in [
1293            DistanceMetric::L2,
1294            DistanceMetric::Cosine,
1295            DistanceMetric::Dot,
1296        ] {
1297            let expr = ValueExpression::VectorDistance {
1298                left: Box::new(ValueExpression::Property(PropertyRef {
1299                    variable: "p".to_string(),
1300                    property: "embedding".to_string(),
1301                })),
1302                right: Box::new(ValueExpression::Property(PropertyRef {
1303                    variable: "p".to_string(),
1304                    property: "embedding".to_string(),
1305                })),
1306                metric: metric.clone(),
1307            };
1308
1309            let result = analyze_return_with_match("p", "Person", expr);
1310            assert!(
1311                result.is_ok(),
1312                "Expected Ok for metric {:?} but got {:?}",
1313                metric,
1314                result
1315            );
1316            assert!(result.unwrap().errors.is_empty());
1317        }
1318    }
1319}