use crate::ast::*;
use crate::case_insensitive::CaseInsensitiveLookup;
use crate::config::GraphConfig;
use crate::error::{GraphError, Result};
use std::collections::{HashMap, HashSet};
pub struct SemanticAnalyzer {
config: GraphConfig,
variables: HashMap<String, VariableInfo>,
current_scope: ScopeType,
}
#[derive(Debug, Clone)]
pub struct VariableInfo {
pub name: String,
pub variable_type: VariableType,
pub labels: Vec<String>,
pub properties: HashSet<String>,
pub defined_in: ScopeType,
}
#[derive(Debug, Clone, PartialEq)]
pub enum VariableType {
Node,
Relationship,
Path,
Property,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ScopeType {
Match,
Where,
With,
PostWithWhere,
Return,
OrderBy,
}
#[derive(Debug, Clone)]
pub struct SemanticResult {
pub ast: CypherQuery,
pub variables: HashMap<String, VariableInfo>,
pub errors: Vec<String>,
pub warnings: Vec<String>,
}
impl SemanticAnalyzer {
pub fn new(config: GraphConfig) -> Self {
Self {
config,
variables: HashMap::new(),
current_scope: ScopeType::Match,
}
}
pub fn analyze(
&mut self,
query: &CypherQuery,
parameters: &HashMap<String, serde_json::Value>,
) -> Result<SemanticResult> {
let mut analyzed_query = query.clone();
self.substitute_parameters(&mut analyzed_query, parameters)?;
let mut errors = Vec::new();
let mut warnings = Vec::new();
self.current_scope = ScopeType::Match;
for clause in &analyzed_query.reading_clauses {
match clause {
ReadingClause::Match(match_clause) => {
if let Err(e) = self.analyze_match_clause(match_clause) {
errors.push(format!("MATCH clause error: {}", e));
}
}
ReadingClause::Unwind(unwind_clause) => {
if let Err(e) = self.analyze_unwind_clause(unwind_clause) {
errors.push(format!("UNWIND clause error: {}", e));
}
}
}
}
if let Some(where_clause) = &analyzed_query.where_clause {
self.current_scope = ScopeType::Where;
if let Err(e) = self.analyze_where_clause(where_clause) {
errors.push(format!("WHERE clause error: {}", e));
}
}
if let Some(with_clause) = &analyzed_query.with_clause {
self.current_scope = ScopeType::With;
if let Err(e) = self.analyze_with_clause(with_clause) {
errors.push(format!("WITH clause error: {}", e));
}
}
self.current_scope = ScopeType::Match;
for clause in &analyzed_query.post_with_reading_clauses {
match clause {
ReadingClause::Match(match_clause) => {
if let Err(e) = self.analyze_match_clause(match_clause) {
errors.push(format!("Post-WITH MATCH clause error: {}", e));
}
}
ReadingClause::Unwind(unwind_clause) => {
if let Err(e) = self.analyze_unwind_clause(unwind_clause) {
errors.push(format!("Post-WITH UNWIND clause error: {}", e));
}
}
}
}
if let Some(post_where) = &analyzed_query.post_with_where_clause {
self.current_scope = ScopeType::PostWithWhere;
if let Err(e) = self.analyze_where_clause(post_where) {
errors.push(format!("Post-WITH WHERE clause error: {}", e));
}
}
self.current_scope = ScopeType::Return;
if let Err(e) = self.analyze_return_clause(&analyzed_query.return_clause) {
errors.push(format!("RETURN clause error: {}", e));
}
if let Some(order_by) = &analyzed_query.order_by {
self.current_scope = ScopeType::OrderBy;
if let Err(e) = self.analyze_order_by_clause(order_by) {
errors.push(format!("ORDER BY clause error: {}", e));
}
}
self.validate_schema(&mut warnings);
self.validate_types(&mut errors);
Ok(SemanticResult {
ast: analyzed_query,
variables: self.variables.clone(),
errors,
warnings,
})
}
fn analyze_match_clause(&mut self, match_clause: &MatchClause) -> Result<()> {
for pattern in &match_clause.patterns {
self.analyze_graph_pattern(pattern)?;
}
Ok(())
}
fn analyze_unwind_clause(&mut self, unwind_clause: &UnwindClause) -> Result<()> {
self.analyze_value_expression(&unwind_clause.expression)?;
let var_name = &unwind_clause.alias;
let var_name_lower = var_name.to_lowercase();
if let Some(existing) = self.variables.get_mut(&var_name_lower) {
if existing.variable_type != VariableType::Property {
return Err(GraphError::PlanError {
message: format!("Variable '{}' redefined with different type", var_name),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
} else {
let var_info = VariableInfo {
name: var_name.clone(),
variable_type: VariableType::Property,
labels: vec![],
properties: HashSet::new(),
defined_in: self.current_scope.clone(),
};
self.variables.insert(var_name_lower, var_info);
}
Ok(())
}
fn analyze_graph_pattern(&mut self, pattern: &GraphPattern) -> Result<()> {
match pattern {
GraphPattern::Node(node) => {
self.register_node_variable(node)?;
}
GraphPattern::Path(path) => {
self.register_node_variable(&path.start_node)?;
for segment in &path.segments {
self.validate_length_range(&segment.relationship)?;
if let Some(rel_var) = &segment.relationship.variable {
self.register_relationship_variable(rel_var, &segment.relationship)?;
}
self.register_node_variable(&segment.end_node)?;
}
}
}
Ok(())
}
fn register_node_variable(&mut self, node: &NodePattern) -> Result<()> {
if let Some(var_name) = &node.variable {
let var_name_lower = var_name.to_lowercase();
if let Some(existing) = self.variables.get_mut(&var_name_lower) {
if existing.variable_type != VariableType::Node {
return Err(GraphError::PlanError {
message: format!("Variable '{}' redefined with different type", var_name),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
for label in &node.labels {
if !existing.labels.contains(label) {
existing.labels.push(label.clone());
}
}
for prop in node.properties.keys() {
existing.properties.insert(prop.clone());
}
} else {
let var_info = VariableInfo {
name: var_name.clone(),
variable_type: VariableType::Node,
labels: node.labels.clone(),
properties: node.properties.keys().cloned().collect(),
defined_in: self.current_scope.clone(),
};
self.variables.insert(var_name_lower, var_info);
}
}
Ok(())
}
fn register_relationship_variable(
&mut self,
var_name: &str,
rel: &RelationshipPattern,
) -> Result<()> {
let var_name_lower = var_name.to_lowercase();
if let Some(existing) = self.variables.get_mut(&var_name_lower) {
if existing.variable_type != VariableType::Relationship {
return Err(GraphError::PlanError {
message: format!("Variable '{}' redefined with different type", var_name),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
for rel_type in &rel.types {
if !existing.labels.contains(rel_type) {
existing.labels.push(rel_type.clone());
}
}
for prop in rel.properties.keys() {
existing.properties.insert(prop.clone());
}
} else {
let var_info = VariableInfo {
name: var_name.to_string(),
variable_type: VariableType::Relationship,
labels: rel.types.clone(), properties: rel.properties.keys().cloned().collect(),
defined_in: self.current_scope.clone(),
};
self.variables.insert(var_name_lower, var_info);
}
Ok(())
}
fn analyze_where_clause(&mut self, where_clause: &WhereClause) -> Result<()> {
self.analyze_boolean_expression(&where_clause.expression)
}
fn analyze_boolean_expression(&mut self, expr: &BooleanExpression) -> Result<()> {
match expr {
BooleanExpression::Comparison { left, right, .. } => {
self.analyze_value_expression(left)?;
self.analyze_value_expression(right)?;
}
BooleanExpression::And(left, right) | BooleanExpression::Or(left, right) => {
self.analyze_boolean_expression(left)?;
self.analyze_boolean_expression(right)?;
}
BooleanExpression::Not(inner) => {
self.analyze_boolean_expression(inner)?;
}
BooleanExpression::Exists(prop_ref) => {
self.validate_property_reference(prop_ref)?;
}
BooleanExpression::In { expression, list } => {
self.analyze_value_expression(expression)?;
for item in list {
self.analyze_value_expression(item)?;
}
}
BooleanExpression::Like { expression, .. } => {
self.analyze_value_expression(expression)?;
}
BooleanExpression::ILike { expression, .. } => {
self.analyze_value_expression(expression)?;
}
BooleanExpression::Contains { expression, .. } => {
self.analyze_value_expression(expression)?;
}
BooleanExpression::StartsWith { expression, .. } => {
self.analyze_value_expression(expression)?;
}
BooleanExpression::EndsWith { expression, .. } => {
self.analyze_value_expression(expression)?;
}
BooleanExpression::IsNull(expression) => {
self.analyze_value_expression(expression)?;
}
BooleanExpression::IsNotNull(expression) => {
self.analyze_value_expression(expression)?;
}
}
Ok(())
}
fn analyze_value_expression(&mut self, expr: &ValueExpression) -> Result<()> {
match expr {
ValueExpression::Property(prop_ref) => {
self.validate_property_reference(prop_ref)?;
}
ValueExpression::Literal(_) => {
}
ValueExpression::Variable(var) => {
if !self.variables.contains_key_ci(var) {
return Err(GraphError::PlanError {
message: format!("Undefined variable: '{}'", var),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
ValueExpression::ScalarFunction { name, args } => {
let function_name = name.to_lowercase();
match function_name.as_str() {
"tolower" | "lower" | "toupper" | "upper" => {
if args.len() != 1 {
return Err(GraphError::PlanError {
message: format!(
"{} requires exactly 1 argument, got {}",
name.to_uppercase(),
args.len()
),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
_ => {
return Err(GraphError::UnsupportedFeature {
feature: format!(
"Cypher function '{}' is not implemented. Supported scalar functions: toLower, lower, toUpper, upper. Supported aggregate functions: COUNT, SUM, AVG, MIN, MAX, COLLECT.",
name
),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
for arg in args {
self.analyze_value_expression(arg)?;
}
}
ValueExpression::AggregateFunction {
name,
args,
distinct,
} => {
let function_name = name.to_lowercase();
match function_name.as_str() {
"count" | "sum" | "avg" | "min" | "max" | "collect" => {
if *distinct && function_name != "count" {
return Err(GraphError::UnsupportedFeature {
feature: format!(
"DISTINCT is only supported with COUNT, not {}",
function_name.to_uppercase()
),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
if *distinct && function_name == "count" {
if let Some(ValueExpression::Variable(v)) = args.first() {
if v == "*" {
return Err(GraphError::PlanError {
message: "COUNT(DISTINCT *) is not supported. \
Use COUNT(*) to count all rows, or \
COUNT(DISTINCT property) to count distinct values."
.to_string(),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
}
if args.len() != 1 {
return Err(GraphError::PlanError {
message: format!(
"{} requires exactly 1 argument, got {}",
function_name.to_uppercase(),
args.len()
),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
if matches!(function_name.as_str(), "sum" | "avg" | "min" | "max") {
if let Some(ValueExpression::Variable(v)) = args.first() {
return Err(GraphError::PlanError {
message: format!(
"{}({}) is invalid - {} requires a property like {}({}.property). You cannot {} a node/entity.",
function_name.to_uppercase(), v, function_name.to_uppercase(), function_name.to_uppercase(), v, function_name
),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
}
_ => {
return Err(GraphError::UnsupportedFeature {
feature: format!(
"Cypher aggregate function '{}' is not implemented. Supported aggregate functions: COUNT, SUM, AVG, MIN, MAX, COLLECT.",
name
),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
for arg in args {
if function_name == "count"
&& matches!(arg, ValueExpression::Variable(v) if v == "*")
{
continue;
}
self.analyze_value_expression(arg)?;
}
}
ValueExpression::Arithmetic { left, right, .. } => {
self.analyze_value_expression(left)?;
self.analyze_value_expression(right)?;
let is_numeric_literal = |pv: &PropertyValue| {
matches!(pv, PropertyValue::Integer(_) | PropertyValue::Float(_))
};
if let (ValueExpression::Literal(l1), ValueExpression::Literal(l2)) =
(&**left, &**right)
{
if !(is_numeric_literal(l1) && is_numeric_literal(l2)) {
return Err(GraphError::PlanError {
message: "Arithmetic requires numeric literal operands".to_string(),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
}
ValueExpression::VectorDistance { left, right, .. } => {
self.analyze_value_expression(left)?;
self.analyze_value_expression(right)?;
let has_property = matches!(**left, ValueExpression::Property(_))
|| matches!(**right, ValueExpression::Property(_));
if !has_property {
return Err(GraphError::PlanError {
message: "vector_distance() requires at least one argument to be a property reference".to_string(),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
ValueExpression::VectorSimilarity { left, right, .. } => {
self.analyze_value_expression(left)?;
self.analyze_value_expression(right)?;
let has_property = matches!(**left, ValueExpression::Property(_))
|| matches!(**right, ValueExpression::Property(_));
if !has_property {
return Err(GraphError::PlanError {
message: "vector_similarity() requires at least one argument to be a property reference".to_string(),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
ValueExpression::VectorLiteral(values) => {
if values.is_empty() {
return Err(GraphError::PlanError {
message: "Vector literal cannot be empty".to_string(),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
ValueExpression::Parameter(_) => {
}
}
Ok(())
}
fn register_projection_alias(&mut self, alias: &str) {
if self.variables.contains_key_ci(alias) {
return;
}
let var_info = VariableInfo {
name: alias.to_string(),
variable_type: VariableType::Property,
labels: vec![],
properties: HashSet::new(),
defined_in: self.current_scope.clone(),
};
self.variables.insert(alias.to_lowercase(), var_info);
}
fn validate_property_reference(&self, prop_ref: &PropertyRef) -> Result<()> {
if !self.variables.contains_key_ci(&prop_ref.variable) {
return Err(GraphError::PlanError {
message: format!("Undefined variable: '{}'", prop_ref.variable),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
Ok(())
}
fn analyze_return_clause(&mut self, return_clause: &ReturnClause) -> Result<()> {
for item in &return_clause.items {
self.analyze_value_expression(&item.expression)?;
if let Some(alias) = &item.alias {
self.register_projection_alias(alias);
}
}
Ok(())
}
fn analyze_with_clause(&mut self, with_clause: &WithClause) -> Result<()> {
for item in &with_clause.items {
self.analyze_value_expression(&item.expression)?;
if let Some(alias) = &item.alias {
self.register_projection_alias(alias);
}
}
if let Some(order_by) = &with_clause.order_by {
for item in &order_by.items {
self.analyze_value_expression(&item.expression)?;
}
}
Ok(())
}
fn analyze_order_by_clause(&mut self, order_by: &OrderByClause) -> Result<()> {
for item in &order_by.items {
self.analyze_value_expression(&item.expression)?;
}
Ok(())
}
fn validate_schema(&self, warnings: &mut Vec<String>) {
for var_info in self.variables.values() {
match var_info.variable_type {
VariableType::Node => {
for label in &var_info.labels {
if self.config.get_node_mapping(label).is_none() {
warnings.push(format!("Node label '{}' not found in schema", label));
}
}
}
VariableType::Relationship => {
for rel_type in &var_info.labels {
if self.config.get_relationship_mapping(rel_type).is_none() {
warnings.push(format!(
"Relationship type '{}' not found in schema",
rel_type
));
}
}
}
_ => {}
}
}
}
fn validate_types(&self, errors: &mut Vec<String>) {
for var_info in self.variables.values() {
match var_info.variable_type {
VariableType::Node => {
let mut label_property_sets: Vec<&[String]> = Vec::new();
for label in &var_info.labels {
if let Some(mapping) = self.config.get_node_mapping(label) {
if !mapping.property_fields.is_empty() {
label_property_sets.push(&mapping.property_fields);
}
}
}
if !label_property_sets.is_empty() {
'prop: for prop in &var_info.properties {
let prop_lower = prop.to_lowercase();
for fields in &label_property_sets {
if fields.iter().any(|f| f.to_lowercase() == prop_lower) {
continue 'prop;
}
}
errors.push(format!(
"Property '{}' not found on labels {:?}",
prop, var_info.labels
));
}
}
}
VariableType::Relationship => {
let mut rel_property_sets: Vec<&[String]> = Vec::new();
for rel_type in &var_info.labels {
if let Some(mapping) = self.config.get_relationship_mapping(rel_type) {
if !mapping.property_fields.is_empty() {
rel_property_sets.push(&mapping.property_fields);
}
}
}
if !rel_property_sets.is_empty() {
'prop_rel: for prop in &var_info.properties {
let prop_lower = prop.to_lowercase();
for fields in &rel_property_sets {
if fields.iter().any(|f| f.to_lowercase() == prop_lower) {
continue 'prop_rel;
}
}
errors.push(format!(
"Property '{}' not found on relationship types {:?}",
prop, var_info.labels
));
}
}
}
_ => {}
}
}
}
}
impl SemanticAnalyzer {
fn validate_length_range(&self, rel: &RelationshipPattern) -> Result<()> {
if let Some(len) = &rel.length {
if let (Some(min), Some(max)) = (len.min, len.max) {
if min > max {
return Err(GraphError::PlanError {
message: "Invalid path length range: min > max".to_string(),
location: snafu::Location::new(file!(), line!(), column!()),
});
}
}
}
Ok(())
}
fn substitute_parameters(
&self,
query: &mut CypherQuery,
parameters: &HashMap<String, serde_json::Value>,
) -> Result<()> {
crate::parameter_substitution::substitute_parameters(query, parameters)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::{
ArithmeticOperator, BooleanExpression, CypherQuery, GraphPattern, LengthRange, MatchClause,
NodePattern, PathPattern, PathSegment, PropertyRef, PropertyValue, RelationshipDirection,
RelationshipPattern, ReturnClause, ReturnItem, ValueExpression, WhereClause,
};
use crate::config::{GraphConfig, NodeMapping};
fn test_config() -> GraphConfig {
GraphConfig::builder()
.with_node_label("Person", "id")
.with_node_label("Employee", "id")
.with_node_label("Company", "id")
.with_relationship("KNOWS", "src_id", "dst_id")
.build()
.unwrap()
}
fn analyze_return_expr(expr: ValueExpression) -> Result<SemanticResult> {
let query = CypherQuery {
reading_clauses: vec![],
where_clause: None,
with_clause: None,
post_with_reading_clauses: vec![],
post_with_where_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![ReturnItem {
expression: expr,
alias: None,
}],
},
limit: None,
order_by: None,
skip: None,
};
let mut analyzer = SemanticAnalyzer::new(test_config());
analyzer.analyze(&query, &HashMap::new())
}
fn analyze_return_with_match(
var: &str,
label: &str,
expr: ValueExpression,
) -> Result<SemanticResult> {
let node = NodePattern::new(Some(var.to_string())).with_label(label);
let query = CypherQuery {
reading_clauses: vec![ReadingClause::Match(MatchClause {
patterns: vec![GraphPattern::Node(node)],
})],
where_clause: None,
with_clause: None,
post_with_reading_clauses: vec![],
post_with_where_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![ReturnItem {
expression: expr,
alias: None,
}],
},
limit: None,
order_by: None,
skip: None,
};
let mut analyzer = SemanticAnalyzer::new(test_config());
analyzer.analyze(&query, &HashMap::new())
}
#[test]
fn test_merge_node_variable_metadata() {
let node1 = NodePattern::new(Some("n".to_string()))
.with_label("Person")
.with_property("age", PropertyValue::Integer(30));
let node2 = NodePattern::new(Some("n".to_string()))
.with_label("Employee")
.with_property("dept", PropertyValue::String("X".to_string()));
let query = CypherQuery {
reading_clauses: vec![ReadingClause::Match(MatchClause {
patterns: vec![GraphPattern::Node(node1), GraphPattern::Node(node2)],
})],
where_clause: None,
with_clause: None,
post_with_reading_clauses: vec![],
post_with_where_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![],
},
limit: None,
order_by: None,
skip: None,
};
let mut analyzer = SemanticAnalyzer::new(test_config());
let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
assert!(result.errors.is_empty());
let n = result.variables.get("n").expect("variable n present");
assert!(n.labels.contains(&"Person".to_string()));
assert!(n.labels.contains(&"Employee".to_string()));
assert!(n.properties.contains("age"));
assert!(n.properties.contains("dept"));
}
#[test]
fn test_invalid_length_range_collects_error() {
let start = NodePattern::new(Some("a".to_string())).with_label("Person");
let end = NodePattern::new(Some("b".to_string())).with_label("Person");
let mut rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
.with_variable("r")
.with_type("KNOWS");
rel.length = Some(LengthRange {
min: Some(3),
max: Some(2),
});
let path = PathPattern {
start_node: start,
segments: vec![PathSegment {
relationship: rel,
end_node: end,
}],
};
let query = CypherQuery {
reading_clauses: vec![ReadingClause::Match(MatchClause {
patterns: vec![GraphPattern::Path(path)],
})],
where_clause: None,
with_clause: None,
post_with_reading_clauses: vec![],
post_with_where_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![],
},
limit: None,
order_by: None,
skip: None,
};
let mut analyzer = SemanticAnalyzer::new(test_config());
let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
assert!(result
.errors
.iter()
.any(|e| e.contains("Invalid path length range")));
}
#[test]
fn test_undefined_variable_in_where() {
let node = NodePattern::new(Some("n".to_string())).with_label("Person");
let where_clause = WhereClause {
expression: BooleanExpression::Exists(PropertyRef::new("m", "name")),
};
let query = CypherQuery {
reading_clauses: vec![ReadingClause::Match(MatchClause {
patterns: vec![GraphPattern::Node(node)],
})],
where_clause: Some(where_clause),
with_clause: None,
post_with_reading_clauses: vec![],
post_with_where_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![],
},
limit: None,
order_by: None,
skip: None,
};
let mut analyzer = SemanticAnalyzer::new(test_config());
let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
assert!(result
.errors
.iter()
.any(|e| e.contains("Undefined variable: 'm'")));
}
#[test]
fn test_variable_redefinition_between_node_and_relationship() {
let start = NodePattern::new(Some("n".to_string())).with_label("Person");
let end = NodePattern::new(Some("m".to_string())).with_label("Person");
let rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
.with_variable("n")
.with_type("KNOWS");
let path = PathPattern {
start_node: start,
segments: vec![PathSegment {
relationship: rel,
end_node: end,
}],
};
let query = CypherQuery {
reading_clauses: vec![ReadingClause::Match(MatchClause {
patterns: vec![GraphPattern::Path(path)],
})],
where_clause: None,
with_clause: None,
post_with_reading_clauses: vec![],
post_with_where_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![],
},
limit: None,
order_by: None,
skip: None,
};
let mut analyzer = SemanticAnalyzer::new(test_config());
let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
assert!(result
.errors
.iter()
.any(|e| e.contains("redefined with different type")));
}
#[test]
fn test_unknown_node_label_warns() {
let node = NodePattern::new(Some("x".to_string())).with_label("Unknown");
let query = CypherQuery {
reading_clauses: vec![ReadingClause::Match(MatchClause {
patterns: vec![GraphPattern::Node(node)],
})],
post_with_reading_clauses: vec![],
post_with_where_clause: None,
where_clause: None,
with_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![],
},
limit: None,
order_by: None,
skip: None,
};
let mut analyzer = SemanticAnalyzer::new(test_config());
let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
assert!(result
.warnings
.iter()
.any(|w| w.contains("Node label 'Unknown' not found in schema")));
}
#[test]
fn test_property_not_in_schema_reports_error() {
let custom_config = GraphConfig::builder()
.with_node_mapping(
NodeMapping::new("Person", "id").with_properties(vec!["name".to_string()]),
)
.with_relationship("KNOWS", "src_id", "dst_id")
.build()
.unwrap();
let node = NodePattern::new(Some("n".to_string()))
.with_label("Person")
.with_property("age", PropertyValue::Integer(30));
let query = CypherQuery {
reading_clauses: vec![ReadingClause::Match(MatchClause {
patterns: vec![GraphPattern::Node(node)],
})],
post_with_reading_clauses: vec![],
post_with_where_clause: None,
where_clause: None,
with_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![],
},
limit: None,
order_by: None,
skip: None,
};
let mut analyzer = SemanticAnalyzer::new(custom_config);
let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
assert!(result
.errors
.iter()
.any(|e| e.contains("Property 'age' not found on labels [\"Person\"]")));
}
#[test]
fn test_valid_length_range_ok() {
let start = NodePattern::new(Some("a".to_string())).with_label("Person");
let end = NodePattern::new(Some("b".to_string())).with_label("Person");
let mut rel = RelationshipPattern::new(RelationshipDirection::Outgoing)
.with_variable("r")
.with_type("KNOWS");
rel.length = Some(LengthRange {
min: Some(2),
max: Some(3),
});
let path = PathPattern {
start_node: start,
segments: vec![PathSegment {
relationship: rel,
end_node: end,
}],
};
let query = CypherQuery {
reading_clauses: vec![ReadingClause::Match(MatchClause {
patterns: vec![GraphPattern::Path(path)],
})],
post_with_reading_clauses: vec![],
post_with_where_clause: None,
where_clause: None,
with_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![],
},
limit: None,
order_by: None,
skip: None,
};
let mut analyzer = SemanticAnalyzer::new(test_config());
let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
assert!(result
.errors
.iter()
.all(|e| !e.contains("Invalid path length range")));
}
#[test]
fn test_relationship_variable_metadata_merge_across_segments() {
let start = NodePattern::new(Some("a".to_string())).with_label("Person");
let mid = NodePattern::new(Some("b".to_string())).with_label("Person");
let end = NodePattern::new(Some("c".to_string())).with_label("Person");
let mut rel1 = RelationshipPattern::new(RelationshipDirection::Outgoing)
.with_variable("r")
.with_type("KNOWS")
.with_property("since", PropertyValue::Integer(2020));
rel1.length = None;
let mut rel2 = RelationshipPattern::new(RelationshipDirection::Outgoing)
.with_variable("r")
.with_type("FRIEND")
.with_property("level", PropertyValue::Integer(1));
rel2.length = None;
let path = PathPattern {
start_node: start,
segments: vec![
PathSegment {
relationship: rel1,
end_node: mid,
},
PathSegment {
relationship: rel2,
end_node: end,
},
],
};
let custom_config = GraphConfig::builder()
.with_node_label("Person", "id")
.with_relationship("KNOWS", "src_id", "dst_id")
.with_relationship("FRIEND", "src_id", "dst_id")
.build()
.unwrap();
let query = CypherQuery {
reading_clauses: vec![ReadingClause::Match(MatchClause {
patterns: vec![GraphPattern::Path(path)],
})],
post_with_reading_clauses: vec![],
post_with_where_clause: None,
where_clause: None,
with_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![],
},
limit: None,
order_by: None,
skip: None,
};
let mut analyzer = SemanticAnalyzer::new(custom_config);
let result = analyzer.analyze(&query, &HashMap::new()).unwrap();
let r = result.variables.get("r").expect("variable r present");
assert!(r.labels.contains(&"KNOWS".to_string()));
assert!(r.labels.contains(&"FRIEND".to_string()));
assert!(r.properties.contains("since"));
assert!(r.properties.contains("level"));
}
#[test]
fn test_parameter_substitution() {
let node = NodePattern::new(Some("n".to_string())).with_label("Person");
let where_clause = WhereClause {
expression: BooleanExpression::Comparison {
left: ValueExpression::Property(PropertyRef::new("n", "age")),
operator: crate::ast::ComparisonOperator::GreaterThan,
right: ValueExpression::Parameter("min_age".to_string()),
},
};
let query = CypherQuery {
reading_clauses: vec![ReadingClause::Match(MatchClause {
patterns: vec![GraphPattern::Node(node)],
})],
where_clause: Some(where_clause),
with_clause: None,
post_with_reading_clauses: vec![],
post_with_where_clause: None,
return_clause: ReturnClause {
distinct: false,
items: vec![ReturnItem {
expression: ValueExpression::Variable("n".to_string()),
alias: None,
}],
},
limit: None,
order_by: None,
skip: None,
};
let mut parameters = HashMap::new();
parameters.insert("min_age".to_string(), serde_json::json!(18));
let mut analyzer = SemanticAnalyzer::new(test_config());
let result = analyzer
.analyze(&query, ¶meters)
.expect("Analysis failed");
let where_clause = result.ast.where_clause.as_ref().unwrap();
match &where_clause.expression {
BooleanExpression::Comparison { right, .. } => match right {
ValueExpression::Literal(PropertyValue::Integer(val)) => {
assert_eq!(*val, 18);
}
_ => panic!("Expected Integer literal, got {:?}", right),
},
_ => panic!("Expected Comparison expression"),
}
}
#[test]
fn test_function_argument_undefined_variable_in_return() {
let expr = ValueExpression::ScalarFunction {
name: "toUpper".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("m", "name"))],
};
let result = analyze_return_expr(expr).unwrap();
assert!(result
.errors
.iter()
.any(|e| e.contains("Undefined variable: 'm'")));
}
#[test]
fn test_function_argument_valid_variable_ok() {
let expr = ValueExpression::ScalarFunction {
name: "toUpper".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "name"))],
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(result.errors.is_empty());
}
#[test]
fn test_arithmetic_with_undefined_variable_in_return() {
let expr = ValueExpression::Arithmetic {
left: Box::new(ValueExpression::Variable("x".to_string())),
operator: ArithmeticOperator::Add,
right: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
};
let result = analyze_return_expr(expr).unwrap();
assert!(result
.errors
.iter()
.any(|e| e.contains("Undefined variable: 'x'")));
}
#[test]
fn test_arithmetic_with_defined_property_ok() {
let expr = ValueExpression::Arithmetic {
left: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
operator: ArithmeticOperator::Add,
right: Box::new(ValueExpression::Property(PropertyRef::new("n", "age"))),
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(result
.errors
.iter()
.all(|e| !e.contains("Undefined variable: 'n'")));
}
#[test]
fn test_count_with_multiple_args_fails_validation() {
let expr = ValueExpression::AggregateFunction {
name: "count".to_string(),
args: vec![
ValueExpression::Property(PropertyRef::new("n", "age")),
ValueExpression::Property(PropertyRef::new("n", "name")),
],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result
.errors
.iter()
.any(|e| e.contains("COUNT requires exactly 1 argument")),
"Expected error about COUNT arity, got: {:?}",
result.errors
);
}
#[test]
fn test_count_with_zero_args_fails_validation() {
let expr = ValueExpression::AggregateFunction {
name: "count".to_string(),
args: vec![],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result
.errors
.iter()
.any(|e| e.contains("COUNT requires exactly 1 argument")),
"Expected error about COUNT arity, got: {:?}",
result.errors
);
}
#[test]
fn test_count_with_one_arg_passes_validation() {
let expr = ValueExpression::AggregateFunction {
name: "count".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result
.errors
.iter()
.all(|e| !e.contains("COUNT requires exactly 1 argument")),
"COUNT with 1 arg should not produce arity error, got: {:?}",
result.errors
);
}
#[test]
fn test_count_star_passes_validation() {
let expr = ValueExpression::AggregateFunction {
name: "count".to_string(),
args: vec![ValueExpression::Variable("*".to_string())],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result.errors.is_empty(),
"Expected COUNT(*) to pass semantic validation, got: {:?}",
result.errors
);
}
#[test]
fn test_unimplemented_scalar_function_fails_validation() {
let expr = ValueExpression::ScalarFunction {
name: "replace".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "name"))],
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result
.errors
.iter()
.any(|e| e.to_lowercase().contains("not implemented")),
"Expected semantic validation to reject unimplemented function, got: {:?}",
result.errors
);
}
#[test]
fn test_sum_with_variable_fails_validation() {
let expr = ValueExpression::AggregateFunction {
name: "sum".to_string(),
args: vec![ValueExpression::Variable("n".to_string())],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
!result.errors.is_empty(),
"Expected SUM(variable) to produce validation errors"
);
let has_sum_error = result
.errors
.iter()
.any(|e| e.contains("SUM(n) is invalid") && e.contains("requires a property"));
assert!(
has_sum_error,
"Expected error about SUM requiring property, got: {:?}",
result.errors
);
}
#[test]
fn test_avg_with_variable_fails_validation() {
let expr = ValueExpression::AggregateFunction {
name: "avg".to_string(),
args: vec![ValueExpression::Variable("n".to_string())],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
!result.errors.is_empty(),
"Expected AVG(variable) to produce validation errors"
);
let has_avg_error = result
.errors
.iter()
.any(|e| e.contains("AVG(n) is invalid") && e.contains("requires a property"));
assert!(
has_avg_error,
"Expected error about AVG requiring property, got: {:?}",
result.errors
);
}
#[test]
fn test_sum_with_property_passes_validation() {
let expr = ValueExpression::AggregateFunction {
name: "sum".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result.errors.is_empty(),
"SUM with property should pass validation, got errors: {:?}",
result.errors
);
}
#[test]
fn test_min_with_variable_fails_validation() {
let expr = ValueExpression::AggregateFunction {
name: "min".to_string(),
args: vec![ValueExpression::Variable("n".to_string())],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
!result.errors.is_empty(),
"Expected MIN(variable) to produce validation errors"
);
let has_min_error = result
.errors
.iter()
.any(|e| e.contains("MIN(n) is invalid") && e.contains("requires a property"));
assert!(
has_min_error,
"Expected error about MIN requiring property, got: {:?}",
result.errors
);
}
#[test]
fn test_max_with_variable_fails_validation() {
let expr = ValueExpression::AggregateFunction {
name: "max".to_string(),
args: vec![ValueExpression::Variable("n".to_string())],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
!result.errors.is_empty(),
"Expected MAX(variable) to produce validation errors"
);
let has_max_error = result
.errors
.iter()
.any(|e| e.contains("MAX(n) is invalid") && e.contains("requires a property"));
assert!(
has_max_error,
"Expected error about MAX requiring property, got: {:?}",
result.errors
);
}
#[test]
fn test_min_with_property_passes_validation() {
let expr = ValueExpression::AggregateFunction {
name: "min".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result.errors.is_empty(),
"MIN with property should pass validation, got errors: {:?}",
result.errors
);
}
#[test]
fn test_max_with_property_passes_validation() {
let expr = ValueExpression::AggregateFunction {
name: "max".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
distinct: false,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result.errors.is_empty(),
"MAX with property should pass validation, got errors: {:?}",
result.errors
);
}
#[test]
fn test_distinct_only_supported_on_count() {
let expr = ValueExpression::AggregateFunction {
name: "sum".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
distinct: true,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result
.errors
.iter()
.any(|e| e.contains("DISTINCT is only supported with COUNT")),
"Expected error about DISTINCT only for COUNT, got: {:?}",
result.errors
);
}
#[test]
fn test_count_distinct_star_rejected() {
let expr = ValueExpression::AggregateFunction {
name: "count".to_string(),
args: vec![ValueExpression::Variable("*".to_string())],
distinct: true,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result
.errors
.iter()
.any(|e| e.contains("COUNT(DISTINCT *)")),
"Expected error about COUNT(DISTINCT *), got: {:?}",
result.errors
);
}
#[test]
fn test_count_distinct_passes_validation() {
let expr = ValueExpression::AggregateFunction {
name: "count".to_string(),
args: vec![ValueExpression::Property(PropertyRef::new("n", "age"))],
distinct: true,
};
let result = analyze_return_with_match("n", "Person", expr).unwrap();
assert!(
result.errors.is_empty(),
"COUNT(DISTINCT) should pass validation, got errors: {:?}",
result.errors
);
}
#[test]
fn test_arithmetic_with_non_numeric_literal_error() {
let expr = ValueExpression::Arithmetic {
left: Box::new(ValueExpression::Literal(PropertyValue::String(
"x".to_string(),
))),
operator: ArithmeticOperator::Add,
right: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
};
let result = analyze_return_expr(expr).unwrap();
assert!(result
.errors
.iter()
.any(|e| e.contains("Arithmetic requires numeric literal operands")));
}
#[test]
fn test_arithmetic_with_numeric_literals_ok() {
let expr = ValueExpression::Arithmetic {
left: Box::new(ValueExpression::Literal(PropertyValue::Integer(1))),
operator: ArithmeticOperator::Add,
right: Box::new(ValueExpression::Literal(PropertyValue::Float(2.0))),
};
let result = analyze_return_expr(expr);
assert!(result.is_ok(), "Expected Ok but got {:?}", result);
assert!(result.unwrap().errors.is_empty());
}
#[test]
fn test_vector_distance_with_property() {
use crate::ast::DistanceMetric;
let expr = ValueExpression::VectorDistance {
left: Box::new(ValueExpression::Property(PropertyRef {
variable: "p".to_string(),
property: "embedding".to_string(),
})),
right: Box::new(ValueExpression::Property(PropertyRef {
variable: "p".to_string(),
property: "embedding".to_string(),
})),
metric: DistanceMetric::L2,
};
let result = analyze_return_with_match("p", "Person", expr);
assert!(result.is_ok(), "Expected Ok but got {:?}", result);
assert!(result.unwrap().errors.is_empty());
}
#[test]
fn test_vector_distance_without_property_fails() {
use crate::ast::DistanceMetric;
let expr = ValueExpression::VectorDistance {
left: Box::new(ValueExpression::Literal(PropertyValue::Float(0.5))),
right: Box::new(ValueExpression::Literal(PropertyValue::Float(0.3))),
metric: DistanceMetric::L2,
};
let result = analyze_return_with_match("p", "Person", expr);
assert!(
result.is_ok(),
"Analyzer should return Ok with errors, got {:?}",
result
);
let semantic_result = result.unwrap();
assert!(
!semantic_result.errors.is_empty(),
"Expected validation errors"
);
assert!(semantic_result
.errors
.iter()
.any(|e| e.contains("requires at least one argument to be a property")));
}
#[test]
fn test_vector_similarity_with_property() {
use crate::ast::DistanceMetric;
let expr = ValueExpression::VectorSimilarity {
left: Box::new(ValueExpression::Property(PropertyRef {
variable: "p".to_string(),
property: "embedding".to_string(),
})),
right: Box::new(ValueExpression::Property(PropertyRef {
variable: "p".to_string(),
property: "embedding".to_string(),
})),
metric: DistanceMetric::Cosine,
};
let result = analyze_return_with_match("p", "Person", expr);
assert!(result.is_ok(), "Expected Ok but got {:?}", result);
assert!(result.unwrap().errors.is_empty());
}
#[test]
fn test_vector_similarity_one_literal_ok() {
use crate::ast::DistanceMetric;
let expr = ValueExpression::VectorSimilarity {
left: Box::new(ValueExpression::Property(PropertyRef {
variable: "p".to_string(),
property: "embedding".to_string(),
})),
right: Box::new(ValueExpression::Literal(PropertyValue::Float(0.5))),
metric: DistanceMetric::Cosine,
};
let result = analyze_return_with_match("p", "Person", expr);
assert!(result.is_ok(), "Expected Ok but got {:?}", result);
assert!(result.unwrap().errors.is_empty());
}
#[test]
fn test_vector_distance_all_metrics() {
use crate::ast::DistanceMetric;
for metric in [
DistanceMetric::L2,
DistanceMetric::Cosine,
DistanceMetric::Dot,
] {
let expr = ValueExpression::VectorDistance {
left: Box::new(ValueExpression::Property(PropertyRef {
variable: "p".to_string(),
property: "embedding".to_string(),
})),
right: Box::new(ValueExpression::Property(PropertyRef {
variable: "p".to_string(),
property: "embedding".to_string(),
})),
metric: metric.clone(),
};
let result = analyze_return_with_match("p", "Person", expr);
assert!(
result.is_ok(),
"Expected Ok for metric {:?} but got {:?}",
metric,
result
);
assert!(result.unwrap().errors.is_empty());
}
}
}