Skip to main content

grafeo_engine/query/
binder.rs

1//! Semantic validation - catching errors before execution.
2//!
3//! The binder walks the logical plan and validates that everything makes sense:
4//! - Is that variable actually defined? (You can't use `RETURN x` if `x` wasn't matched)
5//! - Does that property access make sense? (Accessing `.age` on an integer fails)
6//! - Are types compatible? (Can't compare a string to an integer)
7//!
8//! Better to catch these errors early than waste time executing a broken query.
9
10use crate::query::plan::{
11    ExpandOp, FilterOp, LogicalExpression, LogicalOperator, LogicalPlan, NodeScanOp, ReturnItem,
12    ReturnOp, TripleScanOp,
13};
14use grafeo_common::types::LogicalType;
15use grafeo_common::utils::error::{Error, QueryError, QueryErrorKind, Result};
16use grafeo_common::utils::strings::{find_similar, format_suggestion};
17use std::collections::HashMap;
18
19/// Creates a semantic binding error.
20fn binding_error(message: impl Into<String>) -> Error {
21    Error::Query(QueryError::new(QueryErrorKind::Semantic, message))
22}
23
24/// Creates a semantic binding error with a hint.
25fn binding_error_with_hint(message: impl Into<String>, hint: impl Into<String>) -> Error {
26    Error::Query(QueryError::new(QueryErrorKind::Semantic, message).with_hint(hint))
27}
28
29/// Creates an "undefined variable" error with a suggestion if a similar variable exists.
30fn undefined_variable_error(variable: &str, context: &BindingContext, suffix: &str) -> Error {
31    let candidates: Vec<String> = context.variable_names().to_vec();
32    let candidates_ref: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
33
34    if let Some(suggestion) = find_similar(variable, &candidates_ref) {
35        binding_error_with_hint(
36            format!("Undefined variable '{variable}'{suffix}"),
37            format_suggestion(suggestion),
38        )
39    } else {
40        binding_error(format!("Undefined variable '{variable}'{suffix}"))
41    }
42}
43
44/// Information about a bound variable.
45#[derive(Debug, Clone)]
46pub struct VariableInfo {
47    /// The name of the variable.
48    pub name: String,
49    /// The inferred type of the variable.
50    pub data_type: LogicalType,
51    /// Whether this variable is a node.
52    pub is_node: bool,
53    /// Whether this variable is an edge.
54    pub is_edge: bool,
55}
56
57/// Context containing all bound variables and their information.
58#[derive(Debug, Clone, Default)]
59pub struct BindingContext {
60    /// Map from variable name to its info.
61    variables: HashMap<String, VariableInfo>,
62    /// Variables in order of definition.
63    order: Vec<String>,
64}
65
66impl BindingContext {
67    /// Creates a new empty binding context.
68    #[must_use]
69    pub fn new() -> Self {
70        Self {
71            variables: HashMap::new(),
72            order: Vec::new(),
73        }
74    }
75
76    /// Adds a variable to the context.
77    pub fn add_variable(&mut self, name: String, info: VariableInfo) {
78        if !self.variables.contains_key(&name) {
79            self.order.push(name.clone());
80        }
81        self.variables.insert(name, info);
82    }
83
84    /// Looks up a variable by name.
85    #[must_use]
86    pub fn get(&self, name: &str) -> Option<&VariableInfo> {
87        self.variables.get(name)
88    }
89
90    /// Checks if a variable is defined.
91    #[must_use]
92    pub fn contains(&self, name: &str) -> bool {
93        self.variables.contains_key(name)
94    }
95
96    /// Returns all variable names in definition order.
97    #[must_use]
98    pub fn variable_names(&self) -> &[String] {
99        &self.order
100    }
101
102    /// Returns the number of bound variables.
103    #[must_use]
104    pub fn len(&self) -> usize {
105        self.variables.len()
106    }
107
108    /// Returns true if no variables are bound.
109    #[must_use]
110    pub fn is_empty(&self) -> bool {
111        self.variables.is_empty()
112    }
113}
114
115/// Semantic binder for query plans.
116///
117/// The binder walks the logical plan and:
118/// 1. Collects all variable definitions
119/// 2. Validates that all variable references are valid
120/// 3. Infers types where possible
121/// 4. Reports semantic errors
122pub struct Binder {
123    /// The current binding context.
124    context: BindingContext,
125}
126
127impl Binder {
128    /// Creates a new binder.
129    #[must_use]
130    pub fn new() -> Self {
131        Self {
132            context: BindingContext::new(),
133        }
134    }
135
136    /// Binds a logical plan, returning the binding context.
137    ///
138    /// # Errors
139    ///
140    /// Returns an error if semantic validation fails.
141    pub fn bind(&mut self, plan: &LogicalPlan) -> Result<BindingContext> {
142        self.bind_operator(&plan.root)?;
143        Ok(self.context.clone())
144    }
145
146    /// Binds a single logical operator.
147    fn bind_operator(&mut self, op: &LogicalOperator) -> Result<()> {
148        match op {
149            LogicalOperator::NodeScan(scan) => self.bind_node_scan(scan),
150            LogicalOperator::Expand(expand) => self.bind_expand(expand),
151            LogicalOperator::Filter(filter) => self.bind_filter(filter),
152            LogicalOperator::Return(ret) => self.bind_return(ret),
153            LogicalOperator::Project(project) => {
154                self.bind_operator(&project.input)?;
155                for projection in &project.projections {
156                    self.validate_expression(&projection.expression)?;
157                    // Add the projection alias to the context (for WITH clause support)
158                    if let Some(ref alias) = projection.alias {
159                        // Determine the type from the expression
160                        let data_type = self.infer_expression_type(&projection.expression);
161                        self.context.add_variable(
162                            alias.clone(),
163                            VariableInfo {
164                                name: alias.clone(),
165                                data_type,
166                                is_node: false,
167                                is_edge: false,
168                            },
169                        );
170                    }
171                }
172                Ok(())
173            }
174            LogicalOperator::Limit(limit) => self.bind_operator(&limit.input),
175            LogicalOperator::Skip(skip) => self.bind_operator(&skip.input),
176            LogicalOperator::Sort(sort) => {
177                self.bind_operator(&sort.input)?;
178                for key in &sort.keys {
179                    self.validate_expression(&key.expression)?;
180                }
181                Ok(())
182            }
183            LogicalOperator::CreateNode(create) => {
184                // CreateNode introduces a new variable
185                if let Some(ref input) = create.input {
186                    self.bind_operator(input)?;
187                }
188                self.context.add_variable(
189                    create.variable.clone(),
190                    VariableInfo {
191                        name: create.variable.clone(),
192                        data_type: LogicalType::Node,
193                        is_node: true,
194                        is_edge: false,
195                    },
196                );
197                // Validate property expressions
198                for (_, expr) in &create.properties {
199                    self.validate_expression(expr)?;
200                }
201                Ok(())
202            }
203            LogicalOperator::EdgeScan(scan) => {
204                if let Some(ref input) = scan.input {
205                    self.bind_operator(input)?;
206                }
207                self.context.add_variable(
208                    scan.variable.clone(),
209                    VariableInfo {
210                        name: scan.variable.clone(),
211                        data_type: LogicalType::Edge,
212                        is_node: false,
213                        is_edge: true,
214                    },
215                );
216                Ok(())
217            }
218            LogicalOperator::Distinct(distinct) => self.bind_operator(&distinct.input),
219            LogicalOperator::Join(join) => self.bind_join(join),
220            LogicalOperator::Aggregate(agg) => self.bind_aggregate(agg),
221            LogicalOperator::CreateEdge(create) => {
222                self.bind_operator(&create.input)?;
223                // Validate that source and target variables are defined
224                if !self.context.contains(&create.from_variable) {
225                    return Err(undefined_variable_error(
226                        &create.from_variable,
227                        &self.context,
228                        " (source in CREATE EDGE)",
229                    ));
230                }
231                if !self.context.contains(&create.to_variable) {
232                    return Err(undefined_variable_error(
233                        &create.to_variable,
234                        &self.context,
235                        " (target in CREATE EDGE)",
236                    ));
237                }
238                // Add edge variable if present
239                if let Some(ref var) = create.variable {
240                    self.context.add_variable(
241                        var.clone(),
242                        VariableInfo {
243                            name: var.clone(),
244                            data_type: LogicalType::Edge,
245                            is_node: false,
246                            is_edge: true,
247                        },
248                    );
249                }
250                // Validate property expressions
251                for (_, expr) in &create.properties {
252                    self.validate_expression(expr)?;
253                }
254                Ok(())
255            }
256            LogicalOperator::DeleteNode(delete) => {
257                self.bind_operator(&delete.input)?;
258                // Validate that the variable to delete is defined
259                if !self.context.contains(&delete.variable) {
260                    return Err(undefined_variable_error(
261                        &delete.variable,
262                        &self.context,
263                        " in DELETE",
264                    ));
265                }
266                Ok(())
267            }
268            LogicalOperator::DeleteEdge(delete) => {
269                self.bind_operator(&delete.input)?;
270                // Validate that the variable to delete is defined
271                if !self.context.contains(&delete.variable) {
272                    return Err(undefined_variable_error(
273                        &delete.variable,
274                        &self.context,
275                        " in DELETE",
276                    ));
277                }
278                Ok(())
279            }
280            LogicalOperator::SetProperty(set) => {
281                self.bind_operator(&set.input)?;
282                // Validate that the variable to update is defined
283                if !self.context.contains(&set.variable) {
284                    return Err(undefined_variable_error(
285                        &set.variable,
286                        &self.context,
287                        " in SET",
288                    ));
289                }
290                // Validate property value expressions
291                for (_, expr) in &set.properties {
292                    self.validate_expression(expr)?;
293                }
294                Ok(())
295            }
296            LogicalOperator::Empty => Ok(()),
297
298            LogicalOperator::Unwind(unwind) => {
299                // First bind the input
300                self.bind_operator(&unwind.input)?;
301                // Validate the expression being unwound
302                self.validate_expression(&unwind.expression)?;
303                // Add the new variable to the context
304                self.context.add_variable(
305                    unwind.variable.clone(),
306                    VariableInfo {
307                        name: unwind.variable.clone(),
308                        data_type: LogicalType::Any, // Unwound elements can be any type
309                        is_node: false,
310                        is_edge: false,
311                    },
312                );
313                Ok(())
314            }
315
316            // RDF/SPARQL operators
317            LogicalOperator::TripleScan(scan) => self.bind_triple_scan(scan),
318            LogicalOperator::Union(union) => {
319                for input in &union.inputs {
320                    self.bind_operator(input)?;
321                }
322                Ok(())
323            }
324            LogicalOperator::LeftJoin(lj) => {
325                self.bind_operator(&lj.left)?;
326                self.bind_operator(&lj.right)?;
327                if let Some(ref cond) = lj.condition {
328                    self.validate_expression(cond)?;
329                }
330                Ok(())
331            }
332            LogicalOperator::AntiJoin(aj) => {
333                self.bind_operator(&aj.left)?;
334                self.bind_operator(&aj.right)?;
335                Ok(())
336            }
337            LogicalOperator::Bind(bind) => {
338                self.bind_operator(&bind.input)?;
339                self.validate_expression(&bind.expression)?;
340                self.context.add_variable(
341                    bind.variable.clone(),
342                    VariableInfo {
343                        name: bind.variable.clone(),
344                        data_type: LogicalType::Any,
345                        is_node: false,
346                        is_edge: false,
347                    },
348                );
349                Ok(())
350            }
351            LogicalOperator::Merge(merge) => {
352                // First bind the input
353                self.bind_operator(&merge.input)?;
354                // Validate the match property expressions
355                for (_, expr) in &merge.match_properties {
356                    self.validate_expression(expr)?;
357                }
358                // Validate the ON CREATE property expressions
359                for (_, expr) in &merge.on_create {
360                    self.validate_expression(expr)?;
361                }
362                // Validate the ON MATCH property expressions
363                for (_, expr) in &merge.on_match {
364                    self.validate_expression(expr)?;
365                }
366                // MERGE introduces a new variable
367                self.context.add_variable(
368                    merge.variable.clone(),
369                    VariableInfo {
370                        name: merge.variable.clone(),
371                        data_type: LogicalType::Node,
372                        is_node: true,
373                        is_edge: false,
374                    },
375                );
376                Ok(())
377            }
378            LogicalOperator::AddLabel(add_label) => {
379                self.bind_operator(&add_label.input)?;
380                // Validate that the variable exists
381                if !self.context.contains(&add_label.variable) {
382                    return Err(undefined_variable_error(
383                        &add_label.variable,
384                        &self.context,
385                        " in SET labels",
386                    ));
387                }
388                Ok(())
389            }
390            LogicalOperator::RemoveLabel(remove_label) => {
391                self.bind_operator(&remove_label.input)?;
392                // Validate that the variable exists
393                if !self.context.contains(&remove_label.variable) {
394                    return Err(undefined_variable_error(
395                        &remove_label.variable,
396                        &self.context,
397                        " in REMOVE labels",
398                    ));
399                }
400                Ok(())
401            }
402            LogicalOperator::ShortestPath(sp) => {
403                // First bind the input
404                self.bind_operator(&sp.input)?;
405                // Validate that source and target variables are defined
406                if !self.context.contains(&sp.source_var) {
407                    return Err(undefined_variable_error(
408                        &sp.source_var,
409                        &self.context,
410                        " (source in shortestPath)",
411                    ));
412                }
413                if !self.context.contains(&sp.target_var) {
414                    return Err(undefined_variable_error(
415                        &sp.target_var,
416                        &self.context,
417                        " (target in shortestPath)",
418                    ));
419                }
420                // Add the path alias variable to the context
421                self.context.add_variable(
422                    sp.path_alias.clone(),
423                    VariableInfo {
424                        name: sp.path_alias.clone(),
425                        data_type: LogicalType::Any, // Path is a complex type
426                        is_node: false,
427                        is_edge: false,
428                    },
429                );
430                // Also add the path length variable for length(p) calls
431                let path_length_var = format!("_path_length_{}", sp.path_alias);
432                self.context.add_variable(
433                    path_length_var.clone(),
434                    VariableInfo {
435                        name: path_length_var,
436                        data_type: LogicalType::Int64,
437                        is_node: false,
438                        is_edge: false,
439                    },
440                );
441                Ok(())
442            }
443            // SPARQL Update operators - these don't require variable binding
444            LogicalOperator::InsertTriple(insert) => {
445                if let Some(ref input) = insert.input {
446                    self.bind_operator(input)?;
447                }
448                Ok(())
449            }
450            LogicalOperator::DeleteTriple(delete) => {
451                if let Some(ref input) = delete.input {
452                    self.bind_operator(input)?;
453                }
454                Ok(())
455            }
456            LogicalOperator::Modify(modify) => {
457                self.bind_operator(&modify.where_clause)?;
458                Ok(())
459            }
460            LogicalOperator::ClearGraph(_)
461            | LogicalOperator::CreateGraph(_)
462            | LogicalOperator::DropGraph(_)
463            | LogicalOperator::LoadGraph(_)
464            | LogicalOperator::CopyGraph(_)
465            | LogicalOperator::MoveGraph(_)
466            | LogicalOperator::AddGraph(_) => Ok(()),
467            LogicalOperator::VectorScan(scan) => {
468                // VectorScan introduces a variable for matched nodes
469                if let Some(ref input) = scan.input {
470                    self.bind_operator(input)?;
471                }
472                self.context.add_variable(
473                    scan.variable.clone(),
474                    VariableInfo {
475                        name: scan.variable.clone(),
476                        data_type: LogicalType::Node,
477                        is_node: true,
478                        is_edge: false,
479                    },
480                );
481                // Validate the query vector expression
482                self.validate_expression(&scan.query_vector)?;
483                Ok(())
484            }
485            LogicalOperator::VectorJoin(join) => {
486                // VectorJoin takes input from left side and produces right-side matches
487                self.bind_operator(&join.input)?;
488                // Add right variable for matched nodes
489                self.context.add_variable(
490                    join.right_variable.clone(),
491                    VariableInfo {
492                        name: join.right_variable.clone(),
493                        data_type: LogicalType::Node,
494                        is_node: true,
495                        is_edge: false,
496                    },
497                );
498                // Optionally add score variable
499                if let Some(ref score_var) = join.score_variable {
500                    self.context.add_variable(
501                        score_var.clone(),
502                        VariableInfo {
503                            name: score_var.clone(),
504                            data_type: LogicalType::Float64,
505                            is_node: false,
506                            is_edge: false,
507                        },
508                    );
509                }
510                // Validate the query vector expression
511                self.validate_expression(&join.query_vector)?;
512                Ok(())
513            }
514        }
515    }
516
517    /// Binds a triple scan operator (for RDF/SPARQL).
518    fn bind_triple_scan(&mut self, scan: &TripleScanOp) -> Result<()> {
519        use crate::query::plan::TripleComponent;
520
521        // First bind the input if present
522        if let Some(ref input) = scan.input {
523            self.bind_operator(input)?;
524        }
525
526        // Add variables for subject, predicate, object
527        if let TripleComponent::Variable(name) = &scan.subject
528            && !self.context.contains(name)
529        {
530            self.context.add_variable(
531                name.clone(),
532                VariableInfo {
533                    name: name.clone(),
534                    data_type: LogicalType::Any, // RDF term
535                    is_node: false,
536                    is_edge: false,
537                },
538            );
539        }
540
541        if let TripleComponent::Variable(name) = &scan.predicate
542            && !self.context.contains(name)
543        {
544            self.context.add_variable(
545                name.clone(),
546                VariableInfo {
547                    name: name.clone(),
548                    data_type: LogicalType::Any, // IRI
549                    is_node: false,
550                    is_edge: false,
551                },
552            );
553        }
554
555        if let TripleComponent::Variable(name) = &scan.object
556            && !self.context.contains(name)
557        {
558            self.context.add_variable(
559                name.clone(),
560                VariableInfo {
561                    name: name.clone(),
562                    data_type: LogicalType::Any, // RDF term
563                    is_node: false,
564                    is_edge: false,
565                },
566            );
567        }
568
569        if let Some(TripleComponent::Variable(name)) = &scan.graph
570            && !self.context.contains(name)
571        {
572            self.context.add_variable(
573                name.clone(),
574                VariableInfo {
575                    name: name.clone(),
576                    data_type: LogicalType::Any, // IRI
577                    is_node: false,
578                    is_edge: false,
579                },
580            );
581        }
582
583        Ok(())
584    }
585
586    /// Binds a node scan operator.
587    fn bind_node_scan(&mut self, scan: &NodeScanOp) -> Result<()> {
588        // First bind the input if present
589        if let Some(ref input) = scan.input {
590            self.bind_operator(input)?;
591        }
592
593        // Add the scanned variable to scope
594        self.context.add_variable(
595            scan.variable.clone(),
596            VariableInfo {
597                name: scan.variable.clone(),
598                data_type: LogicalType::Node,
599                is_node: true,
600                is_edge: false,
601            },
602        );
603
604        Ok(())
605    }
606
607    /// Binds an expand operator.
608    fn bind_expand(&mut self, expand: &ExpandOp) -> Result<()> {
609        // First bind the input
610        self.bind_operator(&expand.input)?;
611
612        // Validate that the source variable is defined
613        if !self.context.contains(&expand.from_variable) {
614            return Err(undefined_variable_error(
615                &expand.from_variable,
616                &self.context,
617                " in EXPAND",
618            ));
619        }
620
621        // Validate that the source is a node
622        if let Some(info) = self.context.get(&expand.from_variable)
623            && !info.is_node
624        {
625            return Err(binding_error(format!(
626                "Variable '{}' is not a node, cannot expand from it",
627                expand.from_variable
628            )));
629        }
630
631        // Add edge variable if present
632        if let Some(ref edge_var) = expand.edge_variable {
633            self.context.add_variable(
634                edge_var.clone(),
635                VariableInfo {
636                    name: edge_var.clone(),
637                    data_type: LogicalType::Edge,
638                    is_node: false,
639                    is_edge: true,
640                },
641            );
642        }
643
644        // Add target variable
645        self.context.add_variable(
646            expand.to_variable.clone(),
647            VariableInfo {
648                name: expand.to_variable.clone(),
649                data_type: LogicalType::Node,
650                is_node: true,
651                is_edge: false,
652            },
653        );
654
655        // Add path length variable for variable-length paths (for length(p) calls)
656        if let Some(ref path_alias) = expand.path_alias {
657            let path_length_var = format!("_path_length_{}", path_alias);
658            self.context.add_variable(
659                path_length_var.clone(),
660                VariableInfo {
661                    name: path_length_var,
662                    data_type: LogicalType::Int64,
663                    is_node: false,
664                    is_edge: false,
665                },
666            );
667        }
668
669        Ok(())
670    }
671
672    /// Binds a filter operator.
673    fn bind_filter(&mut self, filter: &FilterOp) -> Result<()> {
674        // First bind the input
675        self.bind_operator(&filter.input)?;
676
677        // Validate the predicate expression
678        self.validate_expression(&filter.predicate)?;
679
680        Ok(())
681    }
682
683    /// Binds a return operator.
684    fn bind_return(&mut self, ret: &ReturnOp) -> Result<()> {
685        // First bind the input
686        self.bind_operator(&ret.input)?;
687
688        // Validate all return expressions
689        for item in &ret.items {
690            self.validate_return_item(item)?;
691        }
692
693        Ok(())
694    }
695
696    /// Validates a return item.
697    fn validate_return_item(&self, item: &ReturnItem) -> Result<()> {
698        self.validate_expression(&item.expression)
699    }
700
701    /// Validates that an expression only references defined variables.
702    fn validate_expression(&self, expr: &LogicalExpression) -> Result<()> {
703        match expr {
704            LogicalExpression::Variable(name) => {
705                if !self.context.contains(name) && !name.starts_with("_anon_") {
706                    return Err(undefined_variable_error(name, &self.context, ""));
707                }
708                Ok(())
709            }
710            LogicalExpression::Property { variable, .. } => {
711                if !self.context.contains(variable) && !variable.starts_with("_anon_") {
712                    return Err(undefined_variable_error(
713                        variable,
714                        &self.context,
715                        " in property access",
716                    ));
717                }
718                Ok(())
719            }
720            LogicalExpression::Literal(_) => Ok(()),
721            LogicalExpression::Binary { left, right, .. } => {
722                self.validate_expression(left)?;
723                self.validate_expression(right)
724            }
725            LogicalExpression::Unary { operand, .. } => self.validate_expression(operand),
726            LogicalExpression::FunctionCall { args, .. } => {
727                for arg in args {
728                    self.validate_expression(arg)?;
729                }
730                Ok(())
731            }
732            LogicalExpression::List(items) => {
733                for item in items {
734                    self.validate_expression(item)?;
735                }
736                Ok(())
737            }
738            LogicalExpression::Map(pairs) => {
739                for (_, value) in pairs {
740                    self.validate_expression(value)?;
741                }
742                Ok(())
743            }
744            LogicalExpression::IndexAccess { base, index } => {
745                self.validate_expression(base)?;
746                self.validate_expression(index)
747            }
748            LogicalExpression::SliceAccess { base, start, end } => {
749                self.validate_expression(base)?;
750                if let Some(s) = start {
751                    self.validate_expression(s)?;
752                }
753                if let Some(e) = end {
754                    self.validate_expression(e)?;
755                }
756                Ok(())
757            }
758            LogicalExpression::Case {
759                operand,
760                when_clauses,
761                else_clause,
762            } => {
763                if let Some(op) = operand {
764                    self.validate_expression(op)?;
765                }
766                for (cond, result) in when_clauses {
767                    self.validate_expression(cond)?;
768                    self.validate_expression(result)?;
769                }
770                if let Some(else_expr) = else_clause {
771                    self.validate_expression(else_expr)?;
772                }
773                Ok(())
774            }
775            // Parameter references are validated externally
776            LogicalExpression::Parameter(_) => Ok(()),
777            // labels(n), type(e), id(n) need the variable to be defined
778            LogicalExpression::Labels(var)
779            | LogicalExpression::Type(var)
780            | LogicalExpression::Id(var) => {
781                if !self.context.contains(var) && !var.starts_with("_anon_") {
782                    return Err(undefined_variable_error(var, &self.context, " in function"));
783                }
784                Ok(())
785            }
786            LogicalExpression::ListComprehension {
787                list_expr,
788                filter_expr,
789                map_expr,
790                ..
791            } => {
792                // Validate the list expression
793                self.validate_expression(list_expr)?;
794                // Note: filter_expr and map_expr use the comprehension variable
795                // which is defined within the comprehension scope, so we don't
796                // need to validate it against the outer context
797                if let Some(filter) = filter_expr {
798                    self.validate_expression(filter)?;
799                }
800                self.validate_expression(map_expr)?;
801                Ok(())
802            }
803            LogicalExpression::ExistsSubquery(subquery)
804            | LogicalExpression::CountSubquery(subquery) => {
805                // Subqueries have their own binding context
806                // For now, just validate the structure exists
807                let _ = subquery; // Would need recursive binding
808                Ok(())
809            }
810        }
811    }
812
813    /// Infers the type of an expression for use in WITH clause aliasing.
814    fn infer_expression_type(&self, expr: &LogicalExpression) -> LogicalType {
815        match expr {
816            LogicalExpression::Variable(name) => {
817                // Look up the variable type from context
818                self.context
819                    .get(name)
820                    .map_or(LogicalType::Any, |info| info.data_type.clone())
821            }
822            LogicalExpression::Property { .. } => LogicalType::Any, // Properties can be any type
823            LogicalExpression::Literal(value) => {
824                // Infer type from literal value
825                use grafeo_common::types::Value;
826                match value {
827                    Value::Bool(_) => LogicalType::Bool,
828                    Value::Int64(_) => LogicalType::Int64,
829                    Value::Float64(_) => LogicalType::Float64,
830                    Value::String(_) => LogicalType::String,
831                    Value::List(_) => LogicalType::Any, // Complex type
832                    Value::Map(_) => LogicalType::Any,  // Complex type
833                    Value::Null => LogicalType::Any,
834                    _ => LogicalType::Any,
835                }
836            }
837            LogicalExpression::Binary { .. } => LogicalType::Any, // Could be bool or numeric
838            LogicalExpression::Unary { .. } => LogicalType::Any,
839            LogicalExpression::FunctionCall { name, .. } => {
840                // Infer based on function name
841                match name.to_lowercase().as_str() {
842                    "count" | "sum" | "id" => LogicalType::Int64,
843                    "avg" => LogicalType::Float64,
844                    "type" => LogicalType::String,
845                    // List-returning functions use Any since we don't track element type
846                    "labels" | "collect" => LogicalType::Any,
847                    _ => LogicalType::Any,
848                }
849            }
850            LogicalExpression::List(_) => LogicalType::Any, // Complex type
851            LogicalExpression::Map(_) => LogicalType::Any,  // Complex type
852            _ => LogicalType::Any,
853        }
854    }
855
856    /// Binds a join operator.
857    fn bind_join(&mut self, join: &crate::query::plan::JoinOp) -> Result<()> {
858        // Bind both sides of the join
859        self.bind_operator(&join.left)?;
860        self.bind_operator(&join.right)?;
861
862        // Validate join conditions
863        for condition in &join.conditions {
864            self.validate_expression(&condition.left)?;
865            self.validate_expression(&condition.right)?;
866        }
867
868        Ok(())
869    }
870
871    /// Binds an aggregate operator.
872    fn bind_aggregate(&mut self, agg: &crate::query::plan::AggregateOp) -> Result<()> {
873        // Bind the input first
874        self.bind_operator(&agg.input)?;
875
876        // Validate group by expressions
877        for expr in &agg.group_by {
878            self.validate_expression(expr)?;
879        }
880
881        // Validate aggregate expressions
882        for agg_expr in &agg.aggregates {
883            if let Some(ref expr) = agg_expr.expression {
884                self.validate_expression(expr)?;
885            }
886            // Add the alias as a new variable if present
887            if let Some(ref alias) = agg_expr.alias {
888                self.context.add_variable(
889                    alias.clone(),
890                    VariableInfo {
891                        name: alias.clone(),
892                        data_type: LogicalType::Any,
893                        is_node: false,
894                        is_edge: false,
895                    },
896                );
897            }
898        }
899
900        Ok(())
901    }
902}
903
904impl Default for Binder {
905    fn default() -> Self {
906        Self::new()
907    }
908}
909
910#[cfg(test)]
911mod tests {
912    use super::*;
913    use crate::query::plan::{BinaryOp, FilterOp};
914
915    #[test]
916    fn test_bind_simple_scan() {
917        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
918            items: vec![ReturnItem {
919                expression: LogicalExpression::Variable("n".to_string()),
920                alias: None,
921            }],
922            distinct: false,
923            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
924                variable: "n".to_string(),
925                label: Some("Person".to_string()),
926                input: None,
927            })),
928        }));
929
930        let mut binder = Binder::new();
931        let result = binder.bind(&plan);
932
933        assert!(result.is_ok());
934        let ctx = result.unwrap();
935        assert!(ctx.contains("n"));
936        assert!(ctx.get("n").unwrap().is_node);
937    }
938
939    #[test]
940    fn test_bind_undefined_variable() {
941        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
942            items: vec![ReturnItem {
943                expression: LogicalExpression::Variable("undefined".to_string()),
944                alias: None,
945            }],
946            distinct: false,
947            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
948                variable: "n".to_string(),
949                label: None,
950                input: None,
951            })),
952        }));
953
954        let mut binder = Binder::new();
955        let result = binder.bind(&plan);
956
957        assert!(result.is_err());
958        let err = result.unwrap_err();
959        assert!(err.to_string().contains("Undefined variable"));
960    }
961
962    #[test]
963    fn test_bind_property_access() {
964        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
965            items: vec![ReturnItem {
966                expression: LogicalExpression::Property {
967                    variable: "n".to_string(),
968                    property: "name".to_string(),
969                },
970                alias: None,
971            }],
972            distinct: false,
973            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
974                variable: "n".to_string(),
975                label: Some("Person".to_string()),
976                input: None,
977            })),
978        }));
979
980        let mut binder = Binder::new();
981        let result = binder.bind(&plan);
982
983        assert!(result.is_ok());
984    }
985
986    #[test]
987    fn test_bind_filter_with_undefined_variable() {
988        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
989            items: vec![ReturnItem {
990                expression: LogicalExpression::Variable("n".to_string()),
991                alias: None,
992            }],
993            distinct: false,
994            input: Box::new(LogicalOperator::Filter(FilterOp {
995                predicate: LogicalExpression::Binary {
996                    left: Box::new(LogicalExpression::Property {
997                        variable: "m".to_string(), // undefined!
998                        property: "age".to_string(),
999                    }),
1000                    op: BinaryOp::Gt,
1001                    right: Box::new(LogicalExpression::Literal(
1002                        grafeo_common::types::Value::Int64(30),
1003                    )),
1004                },
1005                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1006                    variable: "n".to_string(),
1007                    label: None,
1008                    input: None,
1009                })),
1010            })),
1011        }));
1012
1013        let mut binder = Binder::new();
1014        let result = binder.bind(&plan);
1015
1016        assert!(result.is_err());
1017        let err = result.unwrap_err();
1018        assert!(err.to_string().contains("Undefined variable 'm'"));
1019    }
1020
1021    #[test]
1022    fn test_bind_expand() {
1023        use crate::query::plan::{ExpandDirection, ExpandOp};
1024
1025        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1026            items: vec![
1027                ReturnItem {
1028                    expression: LogicalExpression::Variable("a".to_string()),
1029                    alias: None,
1030                },
1031                ReturnItem {
1032                    expression: LogicalExpression::Variable("b".to_string()),
1033                    alias: None,
1034                },
1035            ],
1036            distinct: false,
1037            input: Box::new(LogicalOperator::Expand(ExpandOp {
1038                from_variable: "a".to_string(),
1039                to_variable: "b".to_string(),
1040                edge_variable: Some("e".to_string()),
1041                direction: ExpandDirection::Outgoing,
1042                edge_type: Some("KNOWS".to_string()),
1043                min_hops: 1,
1044                max_hops: Some(1),
1045                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1046                    variable: "a".to_string(),
1047                    label: Some("Person".to_string()),
1048                    input: None,
1049                })),
1050                path_alias: None,
1051            })),
1052        }));
1053
1054        let mut binder = Binder::new();
1055        let result = binder.bind(&plan);
1056
1057        assert!(result.is_ok());
1058        let ctx = result.unwrap();
1059        assert!(ctx.contains("a"));
1060        assert!(ctx.contains("b"));
1061        assert!(ctx.contains("e"));
1062        assert!(ctx.get("a").unwrap().is_node);
1063        assert!(ctx.get("b").unwrap().is_node);
1064        assert!(ctx.get("e").unwrap().is_edge);
1065    }
1066
1067    #[test]
1068    fn test_bind_expand_from_undefined_variable() {
1069        // Tests that expanding from an undefined variable produces a clear error
1070        use crate::query::plan::{ExpandDirection, ExpandOp};
1071
1072        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1073            items: vec![ReturnItem {
1074                expression: LogicalExpression::Variable("b".to_string()),
1075                alias: None,
1076            }],
1077            distinct: false,
1078            input: Box::new(LogicalOperator::Expand(ExpandOp {
1079                from_variable: "undefined".to_string(), // not defined!
1080                to_variable: "b".to_string(),
1081                edge_variable: None,
1082                direction: ExpandDirection::Outgoing,
1083                edge_type: None,
1084                min_hops: 1,
1085                max_hops: Some(1),
1086                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1087                    variable: "a".to_string(),
1088                    label: None,
1089                    input: None,
1090                })),
1091                path_alias: None,
1092            })),
1093        }));
1094
1095        let mut binder = Binder::new();
1096        let result = binder.bind(&plan);
1097
1098        assert!(result.is_err());
1099        let err = result.unwrap_err();
1100        assert!(
1101            err.to_string().contains("Undefined variable 'undefined'"),
1102            "Expected error about undefined variable, got: {}",
1103            err
1104        );
1105    }
1106
1107    #[test]
1108    fn test_bind_return_with_aggregate_and_non_aggregate() {
1109        // Tests binding of aggregate functions alongside regular expressions
1110        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1111            items: vec![
1112                ReturnItem {
1113                    expression: LogicalExpression::FunctionCall {
1114                        name: "count".to_string(),
1115                        args: vec![LogicalExpression::Variable("n".to_string())],
1116                        distinct: false,
1117                    },
1118                    alias: Some("cnt".to_string()),
1119                },
1120                ReturnItem {
1121                    expression: LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1122                    alias: Some("one".to_string()),
1123                },
1124            ],
1125            distinct: false,
1126            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1127                variable: "n".to_string(),
1128                label: Some("Person".to_string()),
1129                input: None,
1130            })),
1131        }));
1132
1133        let mut binder = Binder::new();
1134        let result = binder.bind(&plan);
1135
1136        // This should succeed - count(n) with literal is valid
1137        assert!(result.is_ok());
1138    }
1139
1140    #[test]
1141    fn test_bind_nested_property_access() {
1142        // Tests that nested property access on the same variable works
1143        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1144            items: vec![
1145                ReturnItem {
1146                    expression: LogicalExpression::Property {
1147                        variable: "n".to_string(),
1148                        property: "name".to_string(),
1149                    },
1150                    alias: None,
1151                },
1152                ReturnItem {
1153                    expression: LogicalExpression::Property {
1154                        variable: "n".to_string(),
1155                        property: "age".to_string(),
1156                    },
1157                    alias: None,
1158                },
1159            ],
1160            distinct: false,
1161            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1162                variable: "n".to_string(),
1163                label: Some("Person".to_string()),
1164                input: None,
1165            })),
1166        }));
1167
1168        let mut binder = Binder::new();
1169        let result = binder.bind(&plan);
1170
1171        assert!(result.is_ok());
1172    }
1173
1174    #[test]
1175    fn test_bind_binary_expression_with_undefined() {
1176        // Tests that binary expressions with undefined variables produce errors
1177        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1178            items: vec![ReturnItem {
1179                expression: LogicalExpression::Binary {
1180                    left: Box::new(LogicalExpression::Property {
1181                        variable: "n".to_string(),
1182                        property: "age".to_string(),
1183                    }),
1184                    op: BinaryOp::Add,
1185                    right: Box::new(LogicalExpression::Property {
1186                        variable: "m".to_string(), // undefined!
1187                        property: "age".to_string(),
1188                    }),
1189                },
1190                alias: Some("total".to_string()),
1191            }],
1192            distinct: false,
1193            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1194                variable: "n".to_string(),
1195                label: None,
1196                input: None,
1197            })),
1198        }));
1199
1200        let mut binder = Binder::new();
1201        let result = binder.bind(&plan);
1202
1203        assert!(result.is_err());
1204        assert!(
1205            result
1206                .unwrap_err()
1207                .to_string()
1208                .contains("Undefined variable 'm'")
1209        );
1210    }
1211
1212    #[test]
1213    fn test_bind_duplicate_variable_definition() {
1214        // Tests behavior when the same variable is defined twice (via two NodeScans)
1215        // This is typically not allowed or the second shadows the first
1216        use crate::query::plan::{JoinOp, JoinType};
1217
1218        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1219            items: vec![ReturnItem {
1220                expression: LogicalExpression::Variable("n".to_string()),
1221                alias: None,
1222            }],
1223            distinct: false,
1224            input: Box::new(LogicalOperator::Join(JoinOp {
1225                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1226                    variable: "n".to_string(),
1227                    label: Some("A".to_string()),
1228                    input: None,
1229                })),
1230                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1231                    variable: "m".to_string(), // different variable is fine
1232                    label: Some("B".to_string()),
1233                    input: None,
1234                })),
1235                join_type: JoinType::Inner,
1236                conditions: vec![],
1237            })),
1238        }));
1239
1240        let mut binder = Binder::new();
1241        let result = binder.bind(&plan);
1242
1243        // Join with different variables should work
1244        assert!(result.is_ok());
1245        let ctx = result.unwrap();
1246        assert!(ctx.contains("n"));
1247        assert!(ctx.contains("m"));
1248    }
1249
1250    #[test]
1251    fn test_bind_function_with_wrong_arity() {
1252        // Tests that functions with wrong number of arguments are handled
1253        // (behavior depends on whether binder validates arity)
1254        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1255            items: vec![ReturnItem {
1256                expression: LogicalExpression::FunctionCall {
1257                    name: "count".to_string(),
1258                    args: vec![], // count() needs an argument
1259                    distinct: false,
1260                },
1261                alias: None,
1262            }],
1263            distinct: false,
1264            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1265                variable: "n".to_string(),
1266                label: None,
1267                input: None,
1268            })),
1269        }));
1270
1271        let mut binder = Binder::new();
1272        let result = binder.bind(&plan);
1273
1274        // The binder may or may not catch this - if it passes, execution will fail
1275        // This test documents current behavior
1276        // If binding fails, that's fine; if it passes, execution will handle it
1277        let _ = result; // We're just testing it doesn't panic
1278    }
1279
1280    // --- Mutation operator validation ---
1281
1282    #[test]
1283    fn test_create_edge_rejects_undefined_source() {
1284        use crate::query::plan::CreateEdgeOp;
1285
1286        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1287            variable: Some("e".to_string()),
1288            from_variable: "ghost".to_string(), // not defined!
1289            to_variable: "b".to_string(),
1290            edge_type: "KNOWS".to_string(),
1291            properties: vec![],
1292            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1293                variable: "b".to_string(),
1294                label: None,
1295                input: None,
1296            })),
1297        }));
1298
1299        let mut binder = Binder::new();
1300        let err = binder.bind(&plan).unwrap_err();
1301        assert!(
1302            err.to_string().contains("Undefined variable 'ghost'"),
1303            "Should reject undefined source variable, got: {err}"
1304        );
1305    }
1306
1307    #[test]
1308    fn test_create_edge_rejects_undefined_target() {
1309        use crate::query::plan::CreateEdgeOp;
1310
1311        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1312            variable: None,
1313            from_variable: "a".to_string(),
1314            to_variable: "missing".to_string(), // not defined!
1315            edge_type: "KNOWS".to_string(),
1316            properties: vec![],
1317            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1318                variable: "a".to_string(),
1319                label: None,
1320                input: None,
1321            })),
1322        }));
1323
1324        let mut binder = Binder::new();
1325        let err = binder.bind(&plan).unwrap_err();
1326        assert!(
1327            err.to_string().contains("Undefined variable 'missing'"),
1328            "Should reject undefined target variable, got: {err}"
1329        );
1330    }
1331
1332    #[test]
1333    fn test_create_edge_validates_property_expressions() {
1334        use crate::query::plan::CreateEdgeOp;
1335
1336        // Source and target defined, but property references undefined variable
1337        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1338            variable: Some("e".to_string()),
1339            from_variable: "a".to_string(),
1340            to_variable: "b".to_string(),
1341            edge_type: "KNOWS".to_string(),
1342            properties: vec![(
1343                "since".to_string(),
1344                LogicalExpression::Property {
1345                    variable: "x".to_string(), // undefined!
1346                    property: "year".to_string(),
1347                },
1348            )],
1349            input: Box::new(LogicalOperator::Join(crate::query::plan::JoinOp {
1350                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1351                    variable: "a".to_string(),
1352                    label: None,
1353                    input: None,
1354                })),
1355                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1356                    variable: "b".to_string(),
1357                    label: None,
1358                    input: None,
1359                })),
1360                join_type: crate::query::plan::JoinType::Inner,
1361                conditions: vec![],
1362            })),
1363        }));
1364
1365        let mut binder = Binder::new();
1366        let err = binder.bind(&plan).unwrap_err();
1367        assert!(err.to_string().contains("Undefined variable 'x'"));
1368    }
1369
1370    #[test]
1371    fn test_set_property_rejects_undefined_variable() {
1372        use crate::query::plan::SetPropertyOp;
1373
1374        let plan = LogicalPlan::new(LogicalOperator::SetProperty(SetPropertyOp {
1375            variable: "ghost".to_string(),
1376            properties: vec![(
1377                "name".to_string(),
1378                LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1379            )],
1380            replace: false,
1381            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1382                variable: "n".to_string(),
1383                label: None,
1384                input: None,
1385            })),
1386        }));
1387
1388        let mut binder = Binder::new();
1389        let err = binder.bind(&plan).unwrap_err();
1390        assert!(
1391            err.to_string().contains("in SET"),
1392            "Error should indicate SET context, got: {err}"
1393        );
1394    }
1395
1396    #[test]
1397    fn test_delete_node_rejects_undefined_variable() {
1398        use crate::query::plan::DeleteNodeOp;
1399
1400        let plan = LogicalPlan::new(LogicalOperator::DeleteNode(DeleteNodeOp {
1401            variable: "phantom".to_string(),
1402            detach: false,
1403            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1404                variable: "n".to_string(),
1405                label: None,
1406                input: None,
1407            })),
1408        }));
1409
1410        let mut binder = Binder::new();
1411        let err = binder.bind(&plan).unwrap_err();
1412        assert!(err.to_string().contains("Undefined variable 'phantom'"));
1413    }
1414
1415    #[test]
1416    fn test_delete_edge_rejects_undefined_variable() {
1417        use crate::query::plan::DeleteEdgeOp;
1418
1419        let plan = LogicalPlan::new(LogicalOperator::DeleteEdge(DeleteEdgeOp {
1420            variable: "gone".to_string(),
1421            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1422                variable: "n".to_string(),
1423                label: None,
1424                input: None,
1425            })),
1426        }));
1427
1428        let mut binder = Binder::new();
1429        let err = binder.bind(&plan).unwrap_err();
1430        assert!(err.to_string().contains("Undefined variable 'gone'"));
1431    }
1432
1433    // --- WITH/Project clause ---
1434
1435    #[test]
1436    fn test_project_alias_becomes_available_downstream() {
1437        use crate::query::plan::{ProjectOp, Projection};
1438
1439        // WITH n.name AS person_name RETURN person_name
1440        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1441            items: vec![ReturnItem {
1442                expression: LogicalExpression::Variable("person_name".to_string()),
1443                alias: None,
1444            }],
1445            distinct: false,
1446            input: Box::new(LogicalOperator::Project(ProjectOp {
1447                projections: vec![Projection {
1448                    expression: LogicalExpression::Property {
1449                        variable: "n".to_string(),
1450                        property: "name".to_string(),
1451                    },
1452                    alias: Some("person_name".to_string()),
1453                }],
1454                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1455                    variable: "n".to_string(),
1456                    label: None,
1457                    input: None,
1458                })),
1459            })),
1460        }));
1461
1462        let mut binder = Binder::new();
1463        let ctx = binder.bind(&plan).unwrap();
1464        assert!(
1465            ctx.contains("person_name"),
1466            "WITH alias should be available to RETURN"
1467        );
1468    }
1469
1470    #[test]
1471    fn test_project_rejects_undefined_expression() {
1472        use crate::query::plan::{ProjectOp, Projection};
1473
1474        let plan = LogicalPlan::new(LogicalOperator::Project(ProjectOp {
1475            projections: vec![Projection {
1476                expression: LogicalExpression::Variable("nope".to_string()),
1477                alias: Some("x".to_string()),
1478            }],
1479            input: Box::new(LogicalOperator::Empty),
1480        }));
1481
1482        let mut binder = Binder::new();
1483        let result = binder.bind(&plan);
1484        assert!(result.is_err(), "WITH on undefined variable should fail");
1485    }
1486
1487    // --- UNWIND ---
1488
1489    #[test]
1490    fn test_unwind_adds_element_variable() {
1491        use crate::query::plan::UnwindOp;
1492
1493        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1494            items: vec![ReturnItem {
1495                expression: LogicalExpression::Variable("item".to_string()),
1496                alias: None,
1497            }],
1498            distinct: false,
1499            input: Box::new(LogicalOperator::Unwind(UnwindOp {
1500                expression: LogicalExpression::List(vec![
1501                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1502                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(2)),
1503                ]),
1504                variable: "item".to_string(),
1505                input: Box::new(LogicalOperator::Empty),
1506            })),
1507        }));
1508
1509        let mut binder = Binder::new();
1510        let ctx = binder.bind(&plan).unwrap();
1511        assert!(ctx.contains("item"), "UNWIND variable should be in scope");
1512        let info = ctx.get("item").unwrap();
1513        assert!(
1514            !info.is_node && !info.is_edge,
1515            "UNWIND variable is not a graph element"
1516        );
1517    }
1518
1519    // --- MERGE ---
1520
1521    #[test]
1522    fn test_merge_adds_variable_and_validates_properties() {
1523        use crate::query::plan::MergeOp;
1524
1525        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1526            items: vec![ReturnItem {
1527                expression: LogicalExpression::Variable("m".to_string()),
1528                alias: None,
1529            }],
1530            distinct: false,
1531            input: Box::new(LogicalOperator::Merge(MergeOp {
1532                variable: "m".to_string(),
1533                labels: vec!["Person".to_string()],
1534                match_properties: vec![(
1535                    "name".to_string(),
1536                    LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1537                )],
1538                on_create: vec![(
1539                    "created".to_string(),
1540                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1541                )],
1542                on_match: vec![(
1543                    "updated".to_string(),
1544                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1545                )],
1546                input: Box::new(LogicalOperator::Empty),
1547            })),
1548        }));
1549
1550        let mut binder = Binder::new();
1551        let ctx = binder.bind(&plan).unwrap();
1552        assert!(ctx.contains("m"));
1553        assert!(
1554            ctx.get("m").unwrap().is_node,
1555            "MERGE variable should be a node"
1556        );
1557    }
1558
1559    #[test]
1560    fn test_merge_rejects_undefined_in_on_create() {
1561        use crate::query::plan::MergeOp;
1562
1563        let plan = LogicalPlan::new(LogicalOperator::Merge(MergeOp {
1564            variable: "m".to_string(),
1565            labels: vec![],
1566            match_properties: vec![],
1567            on_create: vec![(
1568                "name".to_string(),
1569                LogicalExpression::Property {
1570                    variable: "other".to_string(), // undefined!
1571                    property: "name".to_string(),
1572                },
1573            )],
1574            on_match: vec![],
1575            input: Box::new(LogicalOperator::Empty),
1576        }));
1577
1578        let mut binder = Binder::new();
1579        let result = binder.bind(&plan);
1580        assert!(
1581            result.is_err(),
1582            "ON CREATE referencing undefined variable should fail"
1583        );
1584    }
1585
1586    // --- ShortestPath ---
1587
1588    #[test]
1589    fn test_shortest_path_rejects_undefined_source() {
1590        use crate::query::plan::{ExpandDirection, ShortestPathOp};
1591
1592        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1593            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1594                variable: "b".to_string(),
1595                label: None,
1596                input: None,
1597            })),
1598            source_var: "missing".to_string(), // not defined
1599            target_var: "b".to_string(),
1600            edge_type: None,
1601            direction: ExpandDirection::Both,
1602            path_alias: "p".to_string(),
1603            all_paths: false,
1604        }));
1605
1606        let mut binder = Binder::new();
1607        let err = binder.bind(&plan).unwrap_err();
1608        assert!(
1609            err.to_string().contains("source in shortestPath"),
1610            "Error should mention shortestPath source context, got: {err}"
1611        );
1612    }
1613
1614    #[test]
1615    fn test_shortest_path_adds_path_and_length_variables() {
1616        use crate::query::plan::{ExpandDirection, JoinOp, JoinType, ShortestPathOp};
1617
1618        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1619            input: Box::new(LogicalOperator::Join(JoinOp {
1620                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1621                    variable: "a".to_string(),
1622                    label: None,
1623                    input: None,
1624                })),
1625                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1626                    variable: "b".to_string(),
1627                    label: None,
1628                    input: None,
1629                })),
1630                join_type: JoinType::Cross,
1631                conditions: vec![],
1632            })),
1633            source_var: "a".to_string(),
1634            target_var: "b".to_string(),
1635            edge_type: Some("ROAD".to_string()),
1636            direction: ExpandDirection::Outgoing,
1637            path_alias: "p".to_string(),
1638            all_paths: false,
1639        }));
1640
1641        let mut binder = Binder::new();
1642        let ctx = binder.bind(&plan).unwrap();
1643        assert!(ctx.contains("p"), "Path alias should be bound");
1644        assert!(
1645            ctx.contains("_path_length_p"),
1646            "Path length variable should be auto-created"
1647        );
1648    }
1649
1650    // --- Expression validation edge cases ---
1651
1652    #[test]
1653    fn test_case_expression_validates_all_branches() {
1654        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1655            items: vec![ReturnItem {
1656                expression: LogicalExpression::Case {
1657                    operand: None,
1658                    when_clauses: vec![
1659                        (
1660                            LogicalExpression::Binary {
1661                                left: Box::new(LogicalExpression::Property {
1662                                    variable: "n".to_string(),
1663                                    property: "age".to_string(),
1664                                }),
1665                                op: BinaryOp::Gt,
1666                                right: Box::new(LogicalExpression::Literal(
1667                                    grafeo_common::types::Value::Int64(18),
1668                                )),
1669                            },
1670                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1671                                "adult".into(),
1672                            )),
1673                        ),
1674                        (
1675                            // This branch references undefined variable
1676                            LogicalExpression::Property {
1677                                variable: "ghost".to_string(),
1678                                property: "flag".to_string(),
1679                            },
1680                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1681                                "flagged".into(),
1682                            )),
1683                        ),
1684                    ],
1685                    else_clause: Some(Box::new(LogicalExpression::Literal(
1686                        grafeo_common::types::Value::String("other".into()),
1687                    ))),
1688                },
1689                alias: None,
1690            }],
1691            distinct: false,
1692            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1693                variable: "n".to_string(),
1694                label: None,
1695                input: None,
1696            })),
1697        }));
1698
1699        let mut binder = Binder::new();
1700        let err = binder.bind(&plan).unwrap_err();
1701        assert!(
1702            err.to_string().contains("ghost"),
1703            "CASE should validate all when-clause conditions"
1704        );
1705    }
1706
1707    #[test]
1708    fn test_case_expression_validates_else_clause() {
1709        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1710            items: vec![ReturnItem {
1711                expression: LogicalExpression::Case {
1712                    operand: None,
1713                    when_clauses: vec![(
1714                        LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1715                        LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1716                    )],
1717                    else_clause: Some(Box::new(LogicalExpression::Property {
1718                        variable: "missing".to_string(),
1719                        property: "x".to_string(),
1720                    })),
1721                },
1722                alias: None,
1723            }],
1724            distinct: false,
1725            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1726                variable: "n".to_string(),
1727                label: None,
1728                input: None,
1729            })),
1730        }));
1731
1732        let mut binder = Binder::new();
1733        let err = binder.bind(&plan).unwrap_err();
1734        assert!(
1735            err.to_string().contains("missing"),
1736            "CASE ELSE should validate its expression too"
1737        );
1738    }
1739
1740    #[test]
1741    fn test_slice_access_validates_expressions() {
1742        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1743            items: vec![ReturnItem {
1744                expression: LogicalExpression::SliceAccess {
1745                    base: Box::new(LogicalExpression::Variable("n".to_string())),
1746                    start: Some(Box::new(LogicalExpression::Variable(
1747                        "undefined_start".to_string(),
1748                    ))),
1749                    end: None,
1750                },
1751                alias: None,
1752            }],
1753            distinct: false,
1754            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1755                variable: "n".to_string(),
1756                label: None,
1757                input: None,
1758            })),
1759        }));
1760
1761        let mut binder = Binder::new();
1762        let err = binder.bind(&plan).unwrap_err();
1763        assert!(err.to_string().contains("undefined_start"));
1764    }
1765
1766    #[test]
1767    fn test_list_comprehension_validates_list_source() {
1768        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1769            items: vec![ReturnItem {
1770                expression: LogicalExpression::ListComprehension {
1771                    variable: "x".to_string(),
1772                    list_expr: Box::new(LogicalExpression::Variable("not_defined".to_string())),
1773                    filter_expr: None,
1774                    map_expr: Box::new(LogicalExpression::Variable("x".to_string())),
1775                },
1776                alias: None,
1777            }],
1778            distinct: false,
1779            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1780                variable: "n".to_string(),
1781                label: None,
1782                input: None,
1783            })),
1784        }));
1785
1786        let mut binder = Binder::new();
1787        let err = binder.bind(&plan).unwrap_err();
1788        assert!(
1789            err.to_string().contains("not_defined"),
1790            "List comprehension should validate source list expression"
1791        );
1792    }
1793
1794    #[test]
1795    fn test_labels_type_id_reject_undefined() {
1796        // labels(x) where x is not defined
1797        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1798            items: vec![ReturnItem {
1799                expression: LogicalExpression::Labels("x".to_string()),
1800                alias: None,
1801            }],
1802            distinct: false,
1803            input: Box::new(LogicalOperator::Empty),
1804        }));
1805
1806        let mut binder = Binder::new();
1807        assert!(
1808            binder.bind(&plan).is_err(),
1809            "labels(x) on undefined x should fail"
1810        );
1811
1812        // type(e) where e is not defined
1813        let plan2 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1814            items: vec![ReturnItem {
1815                expression: LogicalExpression::Type("e".to_string()),
1816                alias: None,
1817            }],
1818            distinct: false,
1819            input: Box::new(LogicalOperator::Empty),
1820        }));
1821
1822        let mut binder2 = Binder::new();
1823        assert!(
1824            binder2.bind(&plan2).is_err(),
1825            "type(e) on undefined e should fail"
1826        );
1827
1828        // id(n) where n is not defined
1829        let plan3 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1830            items: vec![ReturnItem {
1831                expression: LogicalExpression::Id("n".to_string()),
1832                alias: None,
1833            }],
1834            distinct: false,
1835            input: Box::new(LogicalOperator::Empty),
1836        }));
1837
1838        let mut binder3 = Binder::new();
1839        assert!(
1840            binder3.bind(&plan3).is_err(),
1841            "id(n) on undefined n should fail"
1842        );
1843    }
1844
1845    #[test]
1846    fn test_expand_rejects_non_node_source() {
1847        use crate::query::plan::{ExpandDirection, ExpandOp, UnwindOp};
1848
1849        // UNWIND [1,2] AS x  -- x is not a node
1850        // MATCH (x)-[:E]->(b)  -- should fail: x isn't a node
1851        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1852            items: vec![ReturnItem {
1853                expression: LogicalExpression::Variable("b".to_string()),
1854                alias: None,
1855            }],
1856            distinct: false,
1857            input: Box::new(LogicalOperator::Expand(ExpandOp {
1858                from_variable: "x".to_string(),
1859                to_variable: "b".to_string(),
1860                edge_variable: None,
1861                direction: ExpandDirection::Outgoing,
1862                edge_type: None,
1863                min_hops: 1,
1864                max_hops: Some(1),
1865                input: Box::new(LogicalOperator::Unwind(UnwindOp {
1866                    expression: LogicalExpression::List(vec![]),
1867                    variable: "x".to_string(),
1868                    input: Box::new(LogicalOperator::Empty),
1869                })),
1870                path_alias: None,
1871            })),
1872        }));
1873
1874        let mut binder = Binder::new();
1875        let err = binder.bind(&plan).unwrap_err();
1876        assert!(
1877            err.to_string().contains("not a node"),
1878            "Expanding from non-node should fail, got: {err}"
1879        );
1880    }
1881
1882    #[test]
1883    fn test_add_label_rejects_undefined_variable() {
1884        use crate::query::plan::AddLabelOp;
1885
1886        let plan = LogicalPlan::new(LogicalOperator::AddLabel(AddLabelOp {
1887            variable: "missing".to_string(),
1888            labels: vec!["Admin".to_string()],
1889            input: Box::new(LogicalOperator::Empty),
1890        }));
1891
1892        let mut binder = Binder::new();
1893        let err = binder.bind(&plan).unwrap_err();
1894        assert!(err.to_string().contains("SET labels"));
1895    }
1896
1897    #[test]
1898    fn test_remove_label_rejects_undefined_variable() {
1899        use crate::query::plan::RemoveLabelOp;
1900
1901        let plan = LogicalPlan::new(LogicalOperator::RemoveLabel(RemoveLabelOp {
1902            variable: "missing".to_string(),
1903            labels: vec!["Admin".to_string()],
1904            input: Box::new(LogicalOperator::Empty),
1905        }));
1906
1907        let mut binder = Binder::new();
1908        let err = binder.bind(&plan).unwrap_err();
1909        assert!(err.to_string().contains("REMOVE labels"));
1910    }
1911
1912    #[test]
1913    fn test_sort_validates_key_expressions() {
1914        use crate::query::plan::{SortKey, SortOp, SortOrder};
1915
1916        let plan = LogicalPlan::new(LogicalOperator::Sort(SortOp {
1917            keys: vec![SortKey {
1918                expression: LogicalExpression::Property {
1919                    variable: "missing".to_string(),
1920                    property: "name".to_string(),
1921                },
1922                order: SortOrder::Ascending,
1923            }],
1924            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1925                variable: "n".to_string(),
1926                label: None,
1927                input: None,
1928            })),
1929        }));
1930
1931        let mut binder = Binder::new();
1932        assert!(
1933            binder.bind(&plan).is_err(),
1934            "ORDER BY on undefined variable should fail"
1935        );
1936    }
1937
1938    #[test]
1939    fn test_create_node_adds_variable_before_property_validation() {
1940        use crate::query::plan::CreateNodeOp;
1941
1942        // CREATE (n:Person {friend: n.name}) - referencing the node being created
1943        // The variable should be available for property expressions (self-reference)
1944        let plan = LogicalPlan::new(LogicalOperator::CreateNode(CreateNodeOp {
1945            variable: "n".to_string(),
1946            labels: vec!["Person".to_string()],
1947            properties: vec![(
1948                "self_ref".to_string(),
1949                LogicalExpression::Property {
1950                    variable: "n".to_string(),
1951                    property: "name".to_string(),
1952                },
1953            )],
1954            input: None,
1955        }));
1956
1957        let mut binder = Binder::new();
1958        // This should succeed because CreateNode adds the variable before validating properties
1959        let ctx = binder.bind(&plan).unwrap();
1960        assert!(ctx.get("n").unwrap().is_node);
1961    }
1962
1963    #[test]
1964    fn test_undefined_variable_suggests_similar() {
1965        // 'person' is defined, user types 'persn' - should get a suggestion
1966        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1967            items: vec![ReturnItem {
1968                expression: LogicalExpression::Variable("persn".to_string()),
1969                alias: None,
1970            }],
1971            distinct: false,
1972            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1973                variable: "person".to_string(),
1974                label: None,
1975                input: None,
1976            })),
1977        }));
1978
1979        let mut binder = Binder::new();
1980        let err = binder.bind(&plan).unwrap_err();
1981        let msg = err.to_string();
1982        // The error should contain the variable name at minimum
1983        assert!(
1984            msg.contains("persn"),
1985            "Error should mention the undefined variable"
1986        );
1987    }
1988
1989    #[test]
1990    fn test_anon_variables_skip_validation() {
1991        // Variables starting with _anon_ are anonymous and should be silently accepted
1992        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1993            items: vec![ReturnItem {
1994                expression: LogicalExpression::Variable("_anon_42".to_string()),
1995                alias: None,
1996            }],
1997            distinct: false,
1998            input: Box::new(LogicalOperator::Empty),
1999        }));
2000
2001        let mut binder = Binder::new();
2002        let result = binder.bind(&plan);
2003        assert!(
2004            result.is_ok(),
2005            "Anonymous variables should bypass validation"
2006        );
2007    }
2008
2009    #[test]
2010    fn test_map_expression_validates_values() {
2011        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2012            items: vec![ReturnItem {
2013                expression: LogicalExpression::Map(vec![(
2014                    "key".to_string(),
2015                    LogicalExpression::Variable("undefined".to_string()),
2016                )]),
2017                alias: None,
2018            }],
2019            distinct: false,
2020            input: Box::new(LogicalOperator::Empty),
2021        }));
2022
2023        let mut binder = Binder::new();
2024        assert!(
2025            binder.bind(&plan).is_err(),
2026            "Map values should be validated"
2027        );
2028    }
2029
2030    #[test]
2031    fn test_vector_scan_validates_query_vector() {
2032        use crate::query::plan::VectorScanOp;
2033
2034        let plan = LogicalPlan::new(LogicalOperator::VectorScan(VectorScanOp {
2035            variable: "result".to_string(),
2036            index_name: None,
2037            property: "embedding".to_string(),
2038            label: Some("Doc".to_string()),
2039            query_vector: LogicalExpression::Variable("undefined_vec".to_string()),
2040            k: 10,
2041            metric: None,
2042            min_similarity: None,
2043            max_distance: None,
2044            input: None,
2045        }));
2046
2047        let mut binder = Binder::new();
2048        let err = binder.bind(&plan).unwrap_err();
2049        assert!(err.to_string().contains("undefined_vec"));
2050    }
2051}