Skip to main content

grafeo_engine/query/
binder.rs

1//! Semantic validation - catching errors before execution.
2//!
3//! The binder walks the logical plan and validates that everything makes sense:
4//! - Is that variable actually defined? (You can't use `RETURN x` if `x` wasn't matched)
5//! - Does that property access make sense? (Accessing `.age` on an integer fails)
6//! - Are types compatible? (Can't compare a string to an integer)
7//!
8//! Better to catch these errors early than waste time executing a broken query.
9
10use crate::query::plan::{
11    ExpandOp, FilterOp, LogicalExpression, LogicalOperator, LogicalPlan, NodeScanOp, ReturnItem,
12    ReturnOp, TripleScanOp,
13};
14use grafeo_common::types::LogicalType;
15use grafeo_common::utils::error::{Error, QueryError, QueryErrorKind, Result};
16use grafeo_common::utils::strings::{find_similar, format_suggestion};
17use std::collections::HashMap;
18
19/// Creates a semantic binding error.
20fn binding_error(message: impl Into<String>) -> Error {
21    Error::Query(QueryError::new(QueryErrorKind::Semantic, message))
22}
23
24/// Creates a semantic binding error with a hint.
25fn binding_error_with_hint(message: impl Into<String>, hint: impl Into<String>) -> Error {
26    Error::Query(QueryError::new(QueryErrorKind::Semantic, message).with_hint(hint))
27}
28
29/// Creates an "undefined variable" error with a suggestion if a similar variable exists.
30fn undefined_variable_error(variable: &str, context: &BindingContext, suffix: &str) -> Error {
31    let candidates: Vec<String> = context.variable_names().to_vec();
32    let candidates_ref: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
33
34    if let Some(suggestion) = find_similar(variable, &candidates_ref) {
35        binding_error_with_hint(
36            format!("Undefined variable '{variable}'{suffix}"),
37            format_suggestion(suggestion),
38        )
39    } else {
40        binding_error(format!("Undefined variable '{variable}'{suffix}"))
41    }
42}
43
44/// Information about a bound variable.
45#[derive(Debug, Clone)]
46pub struct VariableInfo {
47    /// The name of the variable.
48    pub name: String,
49    /// The inferred type of the variable.
50    pub data_type: LogicalType,
51    /// Whether this variable is a node.
52    pub is_node: bool,
53    /// Whether this variable is an edge.
54    pub is_edge: bool,
55}
56
57/// Context containing all bound variables and their information.
58#[derive(Debug, Clone, Default)]
59pub struct BindingContext {
60    /// Map from variable name to its info.
61    variables: HashMap<String, VariableInfo>,
62    /// Variables in order of definition.
63    order: Vec<String>,
64}
65
66impl BindingContext {
67    /// Creates a new empty binding context.
68    #[must_use]
69    pub fn new() -> Self {
70        Self {
71            variables: HashMap::new(),
72            order: Vec::new(),
73        }
74    }
75
76    /// Adds a variable to the context.
77    pub fn add_variable(&mut self, name: String, info: VariableInfo) {
78        if !self.variables.contains_key(&name) {
79            self.order.push(name.clone());
80        }
81        self.variables.insert(name, info);
82    }
83
84    /// Looks up a variable by name.
85    #[must_use]
86    pub fn get(&self, name: &str) -> Option<&VariableInfo> {
87        self.variables.get(name)
88    }
89
90    /// Checks if a variable is defined.
91    #[must_use]
92    pub fn contains(&self, name: &str) -> bool {
93        self.variables.contains_key(name)
94    }
95
96    /// Returns all variable names in definition order.
97    #[must_use]
98    pub fn variable_names(&self) -> &[String] {
99        &self.order
100    }
101
102    /// Returns the number of bound variables.
103    #[must_use]
104    pub fn len(&self) -> usize {
105        self.variables.len()
106    }
107
108    /// Returns true if no variables are bound.
109    #[must_use]
110    pub fn is_empty(&self) -> bool {
111        self.variables.is_empty()
112    }
113}
114
115/// Semantic binder for query plans.
116///
117/// The binder walks the logical plan and:
118/// 1. Collects all variable definitions
119/// 2. Validates that all variable references are valid
120/// 3. Infers types where possible
121/// 4. Reports semantic errors
122pub struct Binder {
123    /// The current binding context.
124    context: BindingContext,
125}
126
127impl Binder {
128    /// Creates a new binder.
129    #[must_use]
130    pub fn new() -> Self {
131        Self {
132            context: BindingContext::new(),
133        }
134    }
135
136    /// Binds a logical plan, returning the binding context.
137    ///
138    /// # Errors
139    ///
140    /// Returns an error if semantic validation fails.
141    pub fn bind(&mut self, plan: &LogicalPlan) -> Result<BindingContext> {
142        self.bind_operator(&plan.root)?;
143        Ok(self.context.clone())
144    }
145
146    /// Binds a single logical operator.
147    fn bind_operator(&mut self, op: &LogicalOperator) -> Result<()> {
148        match op {
149            LogicalOperator::NodeScan(scan) => self.bind_node_scan(scan),
150            LogicalOperator::Expand(expand) => self.bind_expand(expand),
151            LogicalOperator::Filter(filter) => self.bind_filter(filter),
152            LogicalOperator::Return(ret) => self.bind_return(ret),
153            LogicalOperator::Project(project) => {
154                self.bind_operator(&project.input)?;
155                for projection in &project.projections {
156                    self.validate_expression(&projection.expression)?;
157                    // Add the projection alias to the context (for WITH clause support)
158                    if let Some(ref alias) = projection.alias {
159                        // Determine the type from the expression
160                        let data_type = self.infer_expression_type(&projection.expression);
161                        self.context.add_variable(
162                            alias.clone(),
163                            VariableInfo {
164                                name: alias.clone(),
165                                data_type,
166                                is_node: false,
167                                is_edge: false,
168                            },
169                        );
170                    }
171                }
172                Ok(())
173            }
174            LogicalOperator::Limit(limit) => self.bind_operator(&limit.input),
175            LogicalOperator::Skip(skip) => self.bind_operator(&skip.input),
176            LogicalOperator::Sort(sort) => {
177                self.bind_operator(&sort.input)?;
178                for key in &sort.keys {
179                    self.validate_expression(&key.expression)?;
180                }
181                Ok(())
182            }
183            LogicalOperator::CreateNode(create) => {
184                // CreateNode introduces a new variable
185                if let Some(ref input) = create.input {
186                    self.bind_operator(input)?;
187                }
188                self.context.add_variable(
189                    create.variable.clone(),
190                    VariableInfo {
191                        name: create.variable.clone(),
192                        data_type: LogicalType::Node,
193                        is_node: true,
194                        is_edge: false,
195                    },
196                );
197                // Validate property expressions
198                for (_, expr) in &create.properties {
199                    self.validate_expression(expr)?;
200                }
201                Ok(())
202            }
203            LogicalOperator::EdgeScan(scan) => {
204                if let Some(ref input) = scan.input {
205                    self.bind_operator(input)?;
206                }
207                self.context.add_variable(
208                    scan.variable.clone(),
209                    VariableInfo {
210                        name: scan.variable.clone(),
211                        data_type: LogicalType::Edge,
212                        is_node: false,
213                        is_edge: true,
214                    },
215                );
216                Ok(())
217            }
218            LogicalOperator::Distinct(distinct) => self.bind_operator(&distinct.input),
219            LogicalOperator::Join(join) => self.bind_join(join),
220            LogicalOperator::Aggregate(agg) => self.bind_aggregate(agg),
221            LogicalOperator::CreateEdge(create) => {
222                self.bind_operator(&create.input)?;
223                // Validate that source and target variables are defined
224                if !self.context.contains(&create.from_variable) {
225                    return Err(undefined_variable_error(
226                        &create.from_variable,
227                        &self.context,
228                        " (source in CREATE EDGE)",
229                    ));
230                }
231                if !self.context.contains(&create.to_variable) {
232                    return Err(undefined_variable_error(
233                        &create.to_variable,
234                        &self.context,
235                        " (target in CREATE EDGE)",
236                    ));
237                }
238                // Add edge variable if present
239                if let Some(ref var) = create.variable {
240                    self.context.add_variable(
241                        var.clone(),
242                        VariableInfo {
243                            name: var.clone(),
244                            data_type: LogicalType::Edge,
245                            is_node: false,
246                            is_edge: true,
247                        },
248                    );
249                }
250                // Validate property expressions
251                for (_, expr) in &create.properties {
252                    self.validate_expression(expr)?;
253                }
254                Ok(())
255            }
256            LogicalOperator::DeleteNode(delete) => {
257                self.bind_operator(&delete.input)?;
258                // Validate that the variable to delete is defined
259                if !self.context.contains(&delete.variable) {
260                    return Err(undefined_variable_error(
261                        &delete.variable,
262                        &self.context,
263                        " in DELETE",
264                    ));
265                }
266                Ok(())
267            }
268            LogicalOperator::DeleteEdge(delete) => {
269                self.bind_operator(&delete.input)?;
270                // Validate that the variable to delete is defined
271                if !self.context.contains(&delete.variable) {
272                    return Err(undefined_variable_error(
273                        &delete.variable,
274                        &self.context,
275                        " in DELETE",
276                    ));
277                }
278                Ok(())
279            }
280            LogicalOperator::SetProperty(set) => {
281                self.bind_operator(&set.input)?;
282                // Validate that the variable to update is defined
283                if !self.context.contains(&set.variable) {
284                    return Err(undefined_variable_error(
285                        &set.variable,
286                        &self.context,
287                        " in SET",
288                    ));
289                }
290                // Validate property value expressions
291                for (_, expr) in &set.properties {
292                    self.validate_expression(expr)?;
293                }
294                Ok(())
295            }
296            LogicalOperator::Empty => Ok(()),
297
298            LogicalOperator::Unwind(unwind) => {
299                // First bind the input
300                self.bind_operator(&unwind.input)?;
301                // Validate the expression being unwound
302                self.validate_expression(&unwind.expression)?;
303                // Add the new variable to the context
304                self.context.add_variable(
305                    unwind.variable.clone(),
306                    VariableInfo {
307                        name: unwind.variable.clone(),
308                        data_type: LogicalType::Any, // Unwound elements can be any type
309                        is_node: false,
310                        is_edge: false,
311                    },
312                );
313                // Add ORDINALITY variable if present (1-based index)
314                if let Some(ref ord_var) = unwind.ordinality_var {
315                    self.context.add_variable(
316                        ord_var.clone(),
317                        VariableInfo {
318                            name: ord_var.clone(),
319                            data_type: LogicalType::Int64,
320                            is_node: false,
321                            is_edge: false,
322                        },
323                    );
324                }
325                // Add OFFSET variable if present (0-based index)
326                if let Some(ref off_var) = unwind.offset_var {
327                    self.context.add_variable(
328                        off_var.clone(),
329                        VariableInfo {
330                            name: off_var.clone(),
331                            data_type: LogicalType::Int64,
332                            is_node: false,
333                            is_edge: false,
334                        },
335                    );
336                }
337                Ok(())
338            }
339
340            // RDF/SPARQL operators
341            LogicalOperator::TripleScan(scan) => self.bind_triple_scan(scan),
342            LogicalOperator::Union(union) => {
343                for input in &union.inputs {
344                    self.bind_operator(input)?;
345                }
346                Ok(())
347            }
348            LogicalOperator::LeftJoin(lj) => {
349                self.bind_operator(&lj.left)?;
350                self.bind_operator(&lj.right)?;
351                if let Some(ref cond) = lj.condition {
352                    self.validate_expression(cond)?;
353                }
354                Ok(())
355            }
356            LogicalOperator::AntiJoin(aj) => {
357                self.bind_operator(&aj.left)?;
358                self.bind_operator(&aj.right)?;
359                Ok(())
360            }
361            LogicalOperator::Bind(bind) => {
362                self.bind_operator(&bind.input)?;
363                self.validate_expression(&bind.expression)?;
364                self.context.add_variable(
365                    bind.variable.clone(),
366                    VariableInfo {
367                        name: bind.variable.clone(),
368                        data_type: LogicalType::Any,
369                        is_node: false,
370                        is_edge: false,
371                    },
372                );
373                Ok(())
374            }
375            LogicalOperator::Merge(merge) => {
376                // First bind the input
377                self.bind_operator(&merge.input)?;
378                // Validate the match property expressions
379                for (_, expr) in &merge.match_properties {
380                    self.validate_expression(expr)?;
381                }
382                // Validate the ON CREATE property expressions
383                for (_, expr) in &merge.on_create {
384                    self.validate_expression(expr)?;
385                }
386                // Validate the ON MATCH property expressions
387                for (_, expr) in &merge.on_match {
388                    self.validate_expression(expr)?;
389                }
390                // MERGE introduces a new variable
391                self.context.add_variable(
392                    merge.variable.clone(),
393                    VariableInfo {
394                        name: merge.variable.clone(),
395                        data_type: LogicalType::Node,
396                        is_node: true,
397                        is_edge: false,
398                    },
399                );
400                Ok(())
401            }
402            LogicalOperator::MergeRelationship(merge_rel) => {
403                self.bind_operator(&merge_rel.input)?;
404                // Validate source and target variables exist
405                if !self.context.contains(&merge_rel.source_variable) {
406                    return Err(undefined_variable_error(
407                        &merge_rel.source_variable,
408                        &self.context,
409                        " in MERGE relationship source",
410                    ));
411                }
412                if !self.context.contains(&merge_rel.target_variable) {
413                    return Err(undefined_variable_error(
414                        &merge_rel.target_variable,
415                        &self.context,
416                        " in MERGE relationship target",
417                    ));
418                }
419                for (_, expr) in &merge_rel.match_properties {
420                    self.validate_expression(expr)?;
421                }
422                for (_, expr) in &merge_rel.on_create {
423                    self.validate_expression(expr)?;
424                }
425                for (_, expr) in &merge_rel.on_match {
426                    self.validate_expression(expr)?;
427                }
428                // MERGE relationship introduces the edge variable
429                self.context.add_variable(
430                    merge_rel.variable.clone(),
431                    VariableInfo {
432                        name: merge_rel.variable.clone(),
433                        data_type: LogicalType::Edge,
434                        is_node: false,
435                        is_edge: true,
436                    },
437                );
438                Ok(())
439            }
440            LogicalOperator::AddLabel(add_label) => {
441                self.bind_operator(&add_label.input)?;
442                // Validate that the variable exists
443                if !self.context.contains(&add_label.variable) {
444                    return Err(undefined_variable_error(
445                        &add_label.variable,
446                        &self.context,
447                        " in SET labels",
448                    ));
449                }
450                Ok(())
451            }
452            LogicalOperator::RemoveLabel(remove_label) => {
453                self.bind_operator(&remove_label.input)?;
454                // Validate that the variable exists
455                if !self.context.contains(&remove_label.variable) {
456                    return Err(undefined_variable_error(
457                        &remove_label.variable,
458                        &self.context,
459                        " in REMOVE labels",
460                    ));
461                }
462                Ok(())
463            }
464            LogicalOperator::ShortestPath(sp) => {
465                // First bind the input
466                self.bind_operator(&sp.input)?;
467                // Validate that source and target variables are defined
468                if !self.context.contains(&sp.source_var) {
469                    return Err(undefined_variable_error(
470                        &sp.source_var,
471                        &self.context,
472                        " (source in shortestPath)",
473                    ));
474                }
475                if !self.context.contains(&sp.target_var) {
476                    return Err(undefined_variable_error(
477                        &sp.target_var,
478                        &self.context,
479                        " (target in shortestPath)",
480                    ));
481                }
482                // Add the path alias variable to the context
483                self.context.add_variable(
484                    sp.path_alias.clone(),
485                    VariableInfo {
486                        name: sp.path_alias.clone(),
487                        data_type: LogicalType::Any, // Path is a complex type
488                        is_node: false,
489                        is_edge: false,
490                    },
491                );
492                // Also add the path length variable for length(p) calls
493                let path_length_var = format!("_path_length_{}", sp.path_alias);
494                self.context.add_variable(
495                    path_length_var.clone(),
496                    VariableInfo {
497                        name: path_length_var,
498                        data_type: LogicalType::Int64,
499                        is_node: false,
500                        is_edge: false,
501                    },
502                );
503                Ok(())
504            }
505            // SPARQL Update operators - these don't require variable binding
506            LogicalOperator::InsertTriple(insert) => {
507                if let Some(ref input) = insert.input {
508                    self.bind_operator(input)?;
509                }
510                Ok(())
511            }
512            LogicalOperator::DeleteTriple(delete) => {
513                if let Some(ref input) = delete.input {
514                    self.bind_operator(input)?;
515                }
516                Ok(())
517            }
518            LogicalOperator::Modify(modify) => {
519                self.bind_operator(&modify.where_clause)?;
520                Ok(())
521            }
522            LogicalOperator::ClearGraph(_)
523            | LogicalOperator::CreateGraph(_)
524            | LogicalOperator::DropGraph(_)
525            | LogicalOperator::LoadGraph(_)
526            | LogicalOperator::CopyGraph(_)
527            | LogicalOperator::MoveGraph(_)
528            | LogicalOperator::AddGraph(_) => Ok(()),
529            LogicalOperator::VectorScan(scan) => {
530                // VectorScan introduces a variable for matched nodes
531                if let Some(ref input) = scan.input {
532                    self.bind_operator(input)?;
533                }
534                self.context.add_variable(
535                    scan.variable.clone(),
536                    VariableInfo {
537                        name: scan.variable.clone(),
538                        data_type: LogicalType::Node,
539                        is_node: true,
540                        is_edge: false,
541                    },
542                );
543                // Validate the query vector expression
544                self.validate_expression(&scan.query_vector)?;
545                Ok(())
546            }
547            LogicalOperator::VectorJoin(join) => {
548                // VectorJoin takes input from left side and produces right-side matches
549                self.bind_operator(&join.input)?;
550                // Add right variable for matched nodes
551                self.context.add_variable(
552                    join.right_variable.clone(),
553                    VariableInfo {
554                        name: join.right_variable.clone(),
555                        data_type: LogicalType::Node,
556                        is_node: true,
557                        is_edge: false,
558                    },
559                );
560                // Optionally add score variable
561                if let Some(ref score_var) = join.score_variable {
562                    self.context.add_variable(
563                        score_var.clone(),
564                        VariableInfo {
565                            name: score_var.clone(),
566                            data_type: LogicalType::Float64,
567                            is_node: false,
568                            is_edge: false,
569                        },
570                    );
571                }
572                // Validate the query vector expression
573                self.validate_expression(&join.query_vector)?;
574                Ok(())
575            }
576            LogicalOperator::MapCollect(mc) => {
577                self.bind_operator(&mc.input)?;
578                self.context.add_variable(
579                    mc.alias.clone(),
580                    VariableInfo {
581                        name: mc.alias.clone(),
582                        data_type: LogicalType::Any,
583                        is_node: false,
584                        is_edge: false,
585                    },
586                );
587                Ok(())
588            }
589            // DDL operators don't need binding — they're handled before the binder
590            LogicalOperator::CreatePropertyGraph(_) => Ok(()),
591            // Procedure calls: register yielded columns as variables for downstream operators
592            LogicalOperator::CallProcedure(call) => {
593                if let Some(yields) = &call.yield_items {
594                    for item in yields {
595                        let var_name = item.alias.as_deref().unwrap_or(&item.field_name);
596                        self.context.add_variable(
597                            var_name.to_string(),
598                            VariableInfo {
599                                name: var_name.to_string(),
600                                data_type: LogicalType::Any,
601                                is_node: false,
602                                is_edge: false,
603                            },
604                        );
605                    }
606                }
607                Ok(())
608            }
609        }
610    }
611
612    /// Binds a triple scan operator (for RDF/SPARQL).
613    fn bind_triple_scan(&mut self, scan: &TripleScanOp) -> Result<()> {
614        use crate::query::plan::TripleComponent;
615
616        // First bind the input if present
617        if let Some(ref input) = scan.input {
618            self.bind_operator(input)?;
619        }
620
621        // Add variables for subject, predicate, object
622        if let TripleComponent::Variable(name) = &scan.subject
623            && !self.context.contains(name)
624        {
625            self.context.add_variable(
626                name.clone(),
627                VariableInfo {
628                    name: name.clone(),
629                    data_type: LogicalType::Any, // RDF term
630                    is_node: false,
631                    is_edge: false,
632                },
633            );
634        }
635
636        if let TripleComponent::Variable(name) = &scan.predicate
637            && !self.context.contains(name)
638        {
639            self.context.add_variable(
640                name.clone(),
641                VariableInfo {
642                    name: name.clone(),
643                    data_type: LogicalType::Any, // IRI
644                    is_node: false,
645                    is_edge: false,
646                },
647            );
648        }
649
650        if let TripleComponent::Variable(name) = &scan.object
651            && !self.context.contains(name)
652        {
653            self.context.add_variable(
654                name.clone(),
655                VariableInfo {
656                    name: name.clone(),
657                    data_type: LogicalType::Any, // RDF term
658                    is_node: false,
659                    is_edge: false,
660                },
661            );
662        }
663
664        if let Some(TripleComponent::Variable(name)) = &scan.graph
665            && !self.context.contains(name)
666        {
667            self.context.add_variable(
668                name.clone(),
669                VariableInfo {
670                    name: name.clone(),
671                    data_type: LogicalType::Any, // IRI
672                    is_node: false,
673                    is_edge: false,
674                },
675            );
676        }
677
678        Ok(())
679    }
680
681    /// Binds a node scan operator.
682    fn bind_node_scan(&mut self, scan: &NodeScanOp) -> Result<()> {
683        // First bind the input if present
684        if let Some(ref input) = scan.input {
685            self.bind_operator(input)?;
686        }
687
688        // Add the scanned variable to scope
689        self.context.add_variable(
690            scan.variable.clone(),
691            VariableInfo {
692                name: scan.variable.clone(),
693                data_type: LogicalType::Node,
694                is_node: true,
695                is_edge: false,
696            },
697        );
698
699        Ok(())
700    }
701
702    /// Binds an expand operator.
703    fn bind_expand(&mut self, expand: &ExpandOp) -> Result<()> {
704        // First bind the input
705        self.bind_operator(&expand.input)?;
706
707        // Validate that the source variable is defined
708        if !self.context.contains(&expand.from_variable) {
709            return Err(undefined_variable_error(
710                &expand.from_variable,
711                &self.context,
712                " in EXPAND",
713            ));
714        }
715
716        // Validate that the source is a node
717        if let Some(info) = self.context.get(&expand.from_variable)
718            && !info.is_node
719        {
720            return Err(binding_error(format!(
721                "Variable '{}' is not a node, cannot expand from it",
722                expand.from_variable
723            )));
724        }
725
726        // Add edge variable if present
727        if let Some(ref edge_var) = expand.edge_variable {
728            self.context.add_variable(
729                edge_var.clone(),
730                VariableInfo {
731                    name: edge_var.clone(),
732                    data_type: LogicalType::Edge,
733                    is_node: false,
734                    is_edge: true,
735                },
736            );
737        }
738
739        // Add target variable
740        self.context.add_variable(
741            expand.to_variable.clone(),
742            VariableInfo {
743                name: expand.to_variable.clone(),
744                data_type: LogicalType::Node,
745                is_node: true,
746                is_edge: false,
747            },
748        );
749
750        // Add path variables for variable-length paths
751        if let Some(ref path_alias) = expand.path_alias {
752            // Register the path variable itself (e.g. p in MATCH p=...)
753            self.context.add_variable(
754                path_alias.clone(),
755                VariableInfo {
756                    name: path_alias.clone(),
757                    data_type: LogicalType::Any,
758                    is_node: false,
759                    is_edge: false,
760                },
761            );
762            // length(p) → _path_length_p
763            let path_length_var = format!("_path_length_{}", path_alias);
764            self.context.add_variable(
765                path_length_var.clone(),
766                VariableInfo {
767                    name: path_length_var,
768                    data_type: LogicalType::Int64,
769                    is_node: false,
770                    is_edge: false,
771                },
772            );
773            // nodes(p) → _path_nodes_p
774            let path_nodes_var = format!("_path_nodes_{}", path_alias);
775            self.context.add_variable(
776                path_nodes_var.clone(),
777                VariableInfo {
778                    name: path_nodes_var,
779                    data_type: LogicalType::Any,
780                    is_node: false,
781                    is_edge: false,
782                },
783            );
784            // edges(p) → _path_edges_p
785            let path_edges_var = format!("_path_edges_{}", path_alias);
786            self.context.add_variable(
787                path_edges_var.clone(),
788                VariableInfo {
789                    name: path_edges_var,
790                    data_type: LogicalType::Any,
791                    is_node: false,
792                    is_edge: false,
793                },
794            );
795        }
796
797        Ok(())
798    }
799
800    /// Binds a filter operator.
801    fn bind_filter(&mut self, filter: &FilterOp) -> Result<()> {
802        // First bind the input
803        self.bind_operator(&filter.input)?;
804
805        // Validate the predicate expression
806        self.validate_expression(&filter.predicate)?;
807
808        Ok(())
809    }
810
811    /// Binds a return operator.
812    fn bind_return(&mut self, ret: &ReturnOp) -> Result<()> {
813        // First bind the input
814        self.bind_operator(&ret.input)?;
815
816        // Validate all return expressions and register aliases
817        // (aliases must be visible to parent Sort for ORDER BY resolution)
818        for item in &ret.items {
819            self.validate_return_item(item)?;
820            if let Some(ref alias) = item.alias {
821                let data_type = self.infer_expression_type(&item.expression);
822                self.context.add_variable(
823                    alias.clone(),
824                    VariableInfo {
825                        name: alias.clone(),
826                        data_type,
827                        is_node: false,
828                        is_edge: false,
829                    },
830                );
831            }
832        }
833
834        Ok(())
835    }
836
837    /// Validates a return item.
838    fn validate_return_item(&self, item: &ReturnItem) -> Result<()> {
839        self.validate_expression(&item.expression)
840    }
841
842    /// Validates that an expression only references defined variables.
843    fn validate_expression(&self, expr: &LogicalExpression) -> Result<()> {
844        match expr {
845            LogicalExpression::Variable(name) => {
846                if !self.context.contains(name) && !name.starts_with("_anon_") {
847                    return Err(undefined_variable_error(name, &self.context, ""));
848                }
849                Ok(())
850            }
851            LogicalExpression::Property { variable, .. } => {
852                if !self.context.contains(variable) && !variable.starts_with("_anon_") {
853                    return Err(undefined_variable_error(
854                        variable,
855                        &self.context,
856                        " in property access",
857                    ));
858                }
859                Ok(())
860            }
861            LogicalExpression::Literal(_) => Ok(()),
862            LogicalExpression::Binary { left, right, .. } => {
863                self.validate_expression(left)?;
864                self.validate_expression(right)
865            }
866            LogicalExpression::Unary { operand, .. } => self.validate_expression(operand),
867            LogicalExpression::FunctionCall { args, .. } => {
868                for arg in args {
869                    self.validate_expression(arg)?;
870                }
871                Ok(())
872            }
873            LogicalExpression::List(items) => {
874                for item in items {
875                    self.validate_expression(item)?;
876                }
877                Ok(())
878            }
879            LogicalExpression::Map(pairs) => {
880                for (_, value) in pairs {
881                    self.validate_expression(value)?;
882                }
883                Ok(())
884            }
885            LogicalExpression::IndexAccess { base, index } => {
886                self.validate_expression(base)?;
887                self.validate_expression(index)
888            }
889            LogicalExpression::SliceAccess { base, start, end } => {
890                self.validate_expression(base)?;
891                if let Some(s) = start {
892                    self.validate_expression(s)?;
893                }
894                if let Some(e) = end {
895                    self.validate_expression(e)?;
896                }
897                Ok(())
898            }
899            LogicalExpression::Case {
900                operand,
901                when_clauses,
902                else_clause,
903            } => {
904                if let Some(op) = operand {
905                    self.validate_expression(op)?;
906                }
907                for (cond, result) in when_clauses {
908                    self.validate_expression(cond)?;
909                    self.validate_expression(result)?;
910                }
911                if let Some(else_expr) = else_clause {
912                    self.validate_expression(else_expr)?;
913                }
914                Ok(())
915            }
916            // Parameter references are validated externally
917            LogicalExpression::Parameter(_) => Ok(()),
918            // labels(n), type(e), id(n) need the variable to be defined
919            LogicalExpression::Labels(var)
920            | LogicalExpression::Type(var)
921            | LogicalExpression::Id(var) => {
922                if !self.context.contains(var) && !var.starts_with("_anon_") {
923                    return Err(undefined_variable_error(var, &self.context, " in function"));
924                }
925                Ok(())
926            }
927            LogicalExpression::ListComprehension {
928                list_expr,
929                filter_expr,
930                map_expr,
931                ..
932            } => {
933                // Validate the list expression
934                self.validate_expression(list_expr)?;
935                // Note: filter_expr and map_expr use the comprehension variable
936                // which is defined within the comprehension scope, so we don't
937                // need to validate it against the outer context
938                if let Some(filter) = filter_expr {
939                    self.validate_expression(filter)?;
940                }
941                self.validate_expression(map_expr)?;
942                Ok(())
943            }
944            LogicalExpression::ListPredicate { list_expr, .. } => {
945                // Validate the list expression against the outer context.
946                // The predicate uses the iteration variable which is locally
947                // scoped, so we skip validating it against the outer context.
948                self.validate_expression(list_expr)?;
949                Ok(())
950            }
951            LogicalExpression::ExistsSubquery(subquery)
952            | LogicalExpression::CountSubquery(subquery) => {
953                // Subqueries have their own binding context
954                // For now, just validate the structure exists
955                let _ = subquery; // Would need recursive binding
956                Ok(())
957            }
958        }
959    }
960
961    /// Infers the type of an expression for use in WITH clause aliasing.
962    fn infer_expression_type(&self, expr: &LogicalExpression) -> LogicalType {
963        match expr {
964            LogicalExpression::Variable(name) => {
965                // Look up the variable type from context
966                self.context
967                    .get(name)
968                    .map_or(LogicalType::Any, |info| info.data_type.clone())
969            }
970            LogicalExpression::Property { .. } => LogicalType::Any, // Properties can be any type
971            LogicalExpression::Literal(value) => {
972                // Infer type from literal value
973                use grafeo_common::types::Value;
974                match value {
975                    Value::Bool(_) => LogicalType::Bool,
976                    Value::Int64(_) => LogicalType::Int64,
977                    Value::Float64(_) => LogicalType::Float64,
978                    Value::String(_) => LogicalType::String,
979                    Value::List(_) => LogicalType::Any, // Complex type
980                    Value::Map(_) => LogicalType::Any,  // Complex type
981                    Value::Null => LogicalType::Any,
982                    _ => LogicalType::Any,
983                }
984            }
985            LogicalExpression::Binary { .. } => LogicalType::Any, // Could be bool or numeric
986            LogicalExpression::Unary { .. } => LogicalType::Any,
987            LogicalExpression::FunctionCall { name, .. } => {
988                // Infer based on function name
989                match name.to_lowercase().as_str() {
990                    "count" | "sum" | "id" => LogicalType::Int64,
991                    "avg" => LogicalType::Float64,
992                    "type" => LogicalType::String,
993                    // List-returning functions use Any since we don't track element type
994                    "labels" | "collect" => LogicalType::Any,
995                    _ => LogicalType::Any,
996                }
997            }
998            LogicalExpression::List(_) => LogicalType::Any, // Complex type
999            LogicalExpression::Map(_) => LogicalType::Any,  // Complex type
1000            _ => LogicalType::Any,
1001        }
1002    }
1003
1004    /// Binds a join operator.
1005    fn bind_join(&mut self, join: &crate::query::plan::JoinOp) -> Result<()> {
1006        // Bind both sides of the join
1007        self.bind_operator(&join.left)?;
1008        self.bind_operator(&join.right)?;
1009
1010        // Validate join conditions
1011        for condition in &join.conditions {
1012            self.validate_expression(&condition.left)?;
1013            self.validate_expression(&condition.right)?;
1014        }
1015
1016        Ok(())
1017    }
1018
1019    /// Binds an aggregate operator.
1020    fn bind_aggregate(&mut self, agg: &crate::query::plan::AggregateOp) -> Result<()> {
1021        // Bind the input first
1022        self.bind_operator(&agg.input)?;
1023
1024        // Validate group by expressions
1025        for expr in &agg.group_by {
1026            self.validate_expression(expr)?;
1027        }
1028
1029        // Validate aggregate expressions
1030        for agg_expr in &agg.aggregates {
1031            if let Some(ref expr) = agg_expr.expression {
1032                self.validate_expression(expr)?;
1033            }
1034            // Add the alias as a new variable if present
1035            if let Some(ref alias) = agg_expr.alias {
1036                self.context.add_variable(
1037                    alias.clone(),
1038                    VariableInfo {
1039                        name: alias.clone(),
1040                        data_type: LogicalType::Any,
1041                        is_node: false,
1042                        is_edge: false,
1043                    },
1044                );
1045            }
1046        }
1047
1048        Ok(())
1049    }
1050}
1051
1052impl Default for Binder {
1053    fn default() -> Self {
1054        Self::new()
1055    }
1056}
1057
1058#[cfg(test)]
1059mod tests {
1060    use super::*;
1061    use crate::query::plan::{BinaryOp, FilterOp};
1062
1063    #[test]
1064    fn test_bind_simple_scan() {
1065        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1066            items: vec![ReturnItem {
1067                expression: LogicalExpression::Variable("n".to_string()),
1068                alias: None,
1069            }],
1070            distinct: false,
1071            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1072                variable: "n".to_string(),
1073                label: Some("Person".to_string()),
1074                input: None,
1075            })),
1076        }));
1077
1078        let mut binder = Binder::new();
1079        let result = binder.bind(&plan);
1080
1081        assert!(result.is_ok());
1082        let ctx = result.unwrap();
1083        assert!(ctx.contains("n"));
1084        assert!(ctx.get("n").unwrap().is_node);
1085    }
1086
1087    #[test]
1088    fn test_bind_undefined_variable() {
1089        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1090            items: vec![ReturnItem {
1091                expression: LogicalExpression::Variable("undefined".to_string()),
1092                alias: None,
1093            }],
1094            distinct: false,
1095            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1096                variable: "n".to_string(),
1097                label: None,
1098                input: None,
1099            })),
1100        }));
1101
1102        let mut binder = Binder::new();
1103        let result = binder.bind(&plan);
1104
1105        assert!(result.is_err());
1106        let err = result.unwrap_err();
1107        assert!(err.to_string().contains("Undefined variable"));
1108    }
1109
1110    #[test]
1111    fn test_bind_property_access() {
1112        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1113            items: vec![ReturnItem {
1114                expression: LogicalExpression::Property {
1115                    variable: "n".to_string(),
1116                    property: "name".to_string(),
1117                },
1118                alias: None,
1119            }],
1120            distinct: false,
1121            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1122                variable: "n".to_string(),
1123                label: Some("Person".to_string()),
1124                input: None,
1125            })),
1126        }));
1127
1128        let mut binder = Binder::new();
1129        let result = binder.bind(&plan);
1130
1131        assert!(result.is_ok());
1132    }
1133
1134    #[test]
1135    fn test_bind_filter_with_undefined_variable() {
1136        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1137            items: vec![ReturnItem {
1138                expression: LogicalExpression::Variable("n".to_string()),
1139                alias: None,
1140            }],
1141            distinct: false,
1142            input: Box::new(LogicalOperator::Filter(FilterOp {
1143                predicate: LogicalExpression::Binary {
1144                    left: Box::new(LogicalExpression::Property {
1145                        variable: "m".to_string(), // undefined!
1146                        property: "age".to_string(),
1147                    }),
1148                    op: BinaryOp::Gt,
1149                    right: Box::new(LogicalExpression::Literal(
1150                        grafeo_common::types::Value::Int64(30),
1151                    )),
1152                },
1153                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1154                    variable: "n".to_string(),
1155                    label: None,
1156                    input: None,
1157                })),
1158            })),
1159        }));
1160
1161        let mut binder = Binder::new();
1162        let result = binder.bind(&plan);
1163
1164        assert!(result.is_err());
1165        let err = result.unwrap_err();
1166        assert!(err.to_string().contains("Undefined variable 'm'"));
1167    }
1168
1169    #[test]
1170    fn test_bind_expand() {
1171        use crate::query::plan::{ExpandDirection, ExpandOp};
1172
1173        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1174            items: vec![
1175                ReturnItem {
1176                    expression: LogicalExpression::Variable("a".to_string()),
1177                    alias: None,
1178                },
1179                ReturnItem {
1180                    expression: LogicalExpression::Variable("b".to_string()),
1181                    alias: None,
1182                },
1183            ],
1184            distinct: false,
1185            input: Box::new(LogicalOperator::Expand(ExpandOp {
1186                from_variable: "a".to_string(),
1187                to_variable: "b".to_string(),
1188                edge_variable: Some("e".to_string()),
1189                direction: ExpandDirection::Outgoing,
1190                edge_type: Some("KNOWS".to_string()),
1191                min_hops: 1,
1192                max_hops: Some(1),
1193                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1194                    variable: "a".to_string(),
1195                    label: Some("Person".to_string()),
1196                    input: None,
1197                })),
1198                path_alias: None,
1199            })),
1200        }));
1201
1202        let mut binder = Binder::new();
1203        let result = binder.bind(&plan);
1204
1205        assert!(result.is_ok());
1206        let ctx = result.unwrap();
1207        assert!(ctx.contains("a"));
1208        assert!(ctx.contains("b"));
1209        assert!(ctx.contains("e"));
1210        assert!(ctx.get("a").unwrap().is_node);
1211        assert!(ctx.get("b").unwrap().is_node);
1212        assert!(ctx.get("e").unwrap().is_edge);
1213    }
1214
1215    #[test]
1216    fn test_bind_expand_from_undefined_variable() {
1217        // Tests that expanding from an undefined variable produces a clear error
1218        use crate::query::plan::{ExpandDirection, ExpandOp};
1219
1220        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1221            items: vec![ReturnItem {
1222                expression: LogicalExpression::Variable("b".to_string()),
1223                alias: None,
1224            }],
1225            distinct: false,
1226            input: Box::new(LogicalOperator::Expand(ExpandOp {
1227                from_variable: "undefined".to_string(), // not defined!
1228                to_variable: "b".to_string(),
1229                edge_variable: None,
1230                direction: ExpandDirection::Outgoing,
1231                edge_type: None,
1232                min_hops: 1,
1233                max_hops: Some(1),
1234                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1235                    variable: "a".to_string(),
1236                    label: None,
1237                    input: None,
1238                })),
1239                path_alias: None,
1240            })),
1241        }));
1242
1243        let mut binder = Binder::new();
1244        let result = binder.bind(&plan);
1245
1246        assert!(result.is_err());
1247        let err = result.unwrap_err();
1248        assert!(
1249            err.to_string().contains("Undefined variable 'undefined'"),
1250            "Expected error about undefined variable, got: {}",
1251            err
1252        );
1253    }
1254
1255    #[test]
1256    fn test_bind_return_with_aggregate_and_non_aggregate() {
1257        // Tests binding of aggregate functions alongside regular expressions
1258        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1259            items: vec![
1260                ReturnItem {
1261                    expression: LogicalExpression::FunctionCall {
1262                        name: "count".to_string(),
1263                        args: vec![LogicalExpression::Variable("n".to_string())],
1264                        distinct: false,
1265                    },
1266                    alias: Some("cnt".to_string()),
1267                },
1268                ReturnItem {
1269                    expression: LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1270                    alias: Some("one".to_string()),
1271                },
1272            ],
1273            distinct: false,
1274            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1275                variable: "n".to_string(),
1276                label: Some("Person".to_string()),
1277                input: None,
1278            })),
1279        }));
1280
1281        let mut binder = Binder::new();
1282        let result = binder.bind(&plan);
1283
1284        // This should succeed - count(n) with literal is valid
1285        assert!(result.is_ok());
1286    }
1287
1288    #[test]
1289    fn test_bind_nested_property_access() {
1290        // Tests that nested property access on the same variable works
1291        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1292            items: vec![
1293                ReturnItem {
1294                    expression: LogicalExpression::Property {
1295                        variable: "n".to_string(),
1296                        property: "name".to_string(),
1297                    },
1298                    alias: None,
1299                },
1300                ReturnItem {
1301                    expression: LogicalExpression::Property {
1302                        variable: "n".to_string(),
1303                        property: "age".to_string(),
1304                    },
1305                    alias: None,
1306                },
1307            ],
1308            distinct: false,
1309            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1310                variable: "n".to_string(),
1311                label: Some("Person".to_string()),
1312                input: None,
1313            })),
1314        }));
1315
1316        let mut binder = Binder::new();
1317        let result = binder.bind(&plan);
1318
1319        assert!(result.is_ok());
1320    }
1321
1322    #[test]
1323    fn test_bind_binary_expression_with_undefined() {
1324        // Tests that binary expressions with undefined variables produce errors
1325        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1326            items: vec![ReturnItem {
1327                expression: LogicalExpression::Binary {
1328                    left: Box::new(LogicalExpression::Property {
1329                        variable: "n".to_string(),
1330                        property: "age".to_string(),
1331                    }),
1332                    op: BinaryOp::Add,
1333                    right: Box::new(LogicalExpression::Property {
1334                        variable: "m".to_string(), // undefined!
1335                        property: "age".to_string(),
1336                    }),
1337                },
1338                alias: Some("total".to_string()),
1339            }],
1340            distinct: false,
1341            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1342                variable: "n".to_string(),
1343                label: None,
1344                input: None,
1345            })),
1346        }));
1347
1348        let mut binder = Binder::new();
1349        let result = binder.bind(&plan);
1350
1351        assert!(result.is_err());
1352        assert!(
1353            result
1354                .unwrap_err()
1355                .to_string()
1356                .contains("Undefined variable 'm'")
1357        );
1358    }
1359
1360    #[test]
1361    fn test_bind_duplicate_variable_definition() {
1362        // Tests behavior when the same variable is defined twice (via two NodeScans)
1363        // This is typically not allowed or the second shadows the first
1364        use crate::query::plan::{JoinOp, JoinType};
1365
1366        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1367            items: vec![ReturnItem {
1368                expression: LogicalExpression::Variable("n".to_string()),
1369                alias: None,
1370            }],
1371            distinct: false,
1372            input: Box::new(LogicalOperator::Join(JoinOp {
1373                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1374                    variable: "n".to_string(),
1375                    label: Some("A".to_string()),
1376                    input: None,
1377                })),
1378                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1379                    variable: "m".to_string(), // different variable is fine
1380                    label: Some("B".to_string()),
1381                    input: None,
1382                })),
1383                join_type: JoinType::Inner,
1384                conditions: vec![],
1385            })),
1386        }));
1387
1388        let mut binder = Binder::new();
1389        let result = binder.bind(&plan);
1390
1391        // Join with different variables should work
1392        assert!(result.is_ok());
1393        let ctx = result.unwrap();
1394        assert!(ctx.contains("n"));
1395        assert!(ctx.contains("m"));
1396    }
1397
1398    #[test]
1399    fn test_bind_function_with_wrong_arity() {
1400        // Tests that functions with wrong number of arguments are handled
1401        // (behavior depends on whether binder validates arity)
1402        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1403            items: vec![ReturnItem {
1404                expression: LogicalExpression::FunctionCall {
1405                    name: "count".to_string(),
1406                    args: vec![], // count() needs an argument
1407                    distinct: false,
1408                },
1409                alias: None,
1410            }],
1411            distinct: false,
1412            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1413                variable: "n".to_string(),
1414                label: None,
1415                input: None,
1416            })),
1417        }));
1418
1419        let mut binder = Binder::new();
1420        let result = binder.bind(&plan);
1421
1422        // The binder may or may not catch this - if it passes, execution will fail
1423        // This test documents current behavior
1424        // If binding fails, that's fine; if it passes, execution will handle it
1425        let _ = result; // We're just testing it doesn't panic
1426    }
1427
1428    // --- Mutation operator validation ---
1429
1430    #[test]
1431    fn test_create_edge_rejects_undefined_source() {
1432        use crate::query::plan::CreateEdgeOp;
1433
1434        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1435            variable: Some("e".to_string()),
1436            from_variable: "ghost".to_string(), // not defined!
1437            to_variable: "b".to_string(),
1438            edge_type: "KNOWS".to_string(),
1439            properties: vec![],
1440            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1441                variable: "b".to_string(),
1442                label: None,
1443                input: None,
1444            })),
1445        }));
1446
1447        let mut binder = Binder::new();
1448        let err = binder.bind(&plan).unwrap_err();
1449        assert!(
1450            err.to_string().contains("Undefined variable 'ghost'"),
1451            "Should reject undefined source variable, got: {err}"
1452        );
1453    }
1454
1455    #[test]
1456    fn test_create_edge_rejects_undefined_target() {
1457        use crate::query::plan::CreateEdgeOp;
1458
1459        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1460            variable: None,
1461            from_variable: "a".to_string(),
1462            to_variable: "missing".to_string(), // not defined!
1463            edge_type: "KNOWS".to_string(),
1464            properties: vec![],
1465            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1466                variable: "a".to_string(),
1467                label: None,
1468                input: None,
1469            })),
1470        }));
1471
1472        let mut binder = Binder::new();
1473        let err = binder.bind(&plan).unwrap_err();
1474        assert!(
1475            err.to_string().contains("Undefined variable 'missing'"),
1476            "Should reject undefined target variable, got: {err}"
1477        );
1478    }
1479
1480    #[test]
1481    fn test_create_edge_validates_property_expressions() {
1482        use crate::query::plan::CreateEdgeOp;
1483
1484        // Source and target defined, but property references undefined variable
1485        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1486            variable: Some("e".to_string()),
1487            from_variable: "a".to_string(),
1488            to_variable: "b".to_string(),
1489            edge_type: "KNOWS".to_string(),
1490            properties: vec![(
1491                "since".to_string(),
1492                LogicalExpression::Property {
1493                    variable: "x".to_string(), // undefined!
1494                    property: "year".to_string(),
1495                },
1496            )],
1497            input: Box::new(LogicalOperator::Join(crate::query::plan::JoinOp {
1498                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1499                    variable: "a".to_string(),
1500                    label: None,
1501                    input: None,
1502                })),
1503                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1504                    variable: "b".to_string(),
1505                    label: None,
1506                    input: None,
1507                })),
1508                join_type: crate::query::plan::JoinType::Inner,
1509                conditions: vec![],
1510            })),
1511        }));
1512
1513        let mut binder = Binder::new();
1514        let err = binder.bind(&plan).unwrap_err();
1515        assert!(err.to_string().contains("Undefined variable 'x'"));
1516    }
1517
1518    #[test]
1519    fn test_set_property_rejects_undefined_variable() {
1520        use crate::query::plan::SetPropertyOp;
1521
1522        let plan = LogicalPlan::new(LogicalOperator::SetProperty(SetPropertyOp {
1523            variable: "ghost".to_string(),
1524            properties: vec![(
1525                "name".to_string(),
1526                LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1527            )],
1528            replace: false,
1529            is_edge: false,
1530            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1531                variable: "n".to_string(),
1532                label: None,
1533                input: None,
1534            })),
1535        }));
1536
1537        let mut binder = Binder::new();
1538        let err = binder.bind(&plan).unwrap_err();
1539        assert!(
1540            err.to_string().contains("in SET"),
1541            "Error should indicate SET context, got: {err}"
1542        );
1543    }
1544
1545    #[test]
1546    fn test_delete_node_rejects_undefined_variable() {
1547        use crate::query::plan::DeleteNodeOp;
1548
1549        let plan = LogicalPlan::new(LogicalOperator::DeleteNode(DeleteNodeOp {
1550            variable: "phantom".to_string(),
1551            detach: false,
1552            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1553                variable: "n".to_string(),
1554                label: None,
1555                input: None,
1556            })),
1557        }));
1558
1559        let mut binder = Binder::new();
1560        let err = binder.bind(&plan).unwrap_err();
1561        assert!(err.to_string().contains("Undefined variable 'phantom'"));
1562    }
1563
1564    #[test]
1565    fn test_delete_edge_rejects_undefined_variable() {
1566        use crate::query::plan::DeleteEdgeOp;
1567
1568        let plan = LogicalPlan::new(LogicalOperator::DeleteEdge(DeleteEdgeOp {
1569            variable: "gone".to_string(),
1570            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1571                variable: "n".to_string(),
1572                label: None,
1573                input: None,
1574            })),
1575        }));
1576
1577        let mut binder = Binder::new();
1578        let err = binder.bind(&plan).unwrap_err();
1579        assert!(err.to_string().contains("Undefined variable 'gone'"));
1580    }
1581
1582    // --- WITH/Project clause ---
1583
1584    #[test]
1585    fn test_project_alias_becomes_available_downstream() {
1586        use crate::query::plan::{ProjectOp, Projection};
1587
1588        // WITH n.name AS person_name RETURN person_name
1589        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1590            items: vec![ReturnItem {
1591                expression: LogicalExpression::Variable("person_name".to_string()),
1592                alias: None,
1593            }],
1594            distinct: false,
1595            input: Box::new(LogicalOperator::Project(ProjectOp {
1596                projections: vec![Projection {
1597                    expression: LogicalExpression::Property {
1598                        variable: "n".to_string(),
1599                        property: "name".to_string(),
1600                    },
1601                    alias: Some("person_name".to_string()),
1602                }],
1603                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1604                    variable: "n".to_string(),
1605                    label: None,
1606                    input: None,
1607                })),
1608            })),
1609        }));
1610
1611        let mut binder = Binder::new();
1612        let ctx = binder.bind(&plan).unwrap();
1613        assert!(
1614            ctx.contains("person_name"),
1615            "WITH alias should be available to RETURN"
1616        );
1617    }
1618
1619    #[test]
1620    fn test_project_rejects_undefined_expression() {
1621        use crate::query::plan::{ProjectOp, Projection};
1622
1623        let plan = LogicalPlan::new(LogicalOperator::Project(ProjectOp {
1624            projections: vec![Projection {
1625                expression: LogicalExpression::Variable("nope".to_string()),
1626                alias: Some("x".to_string()),
1627            }],
1628            input: Box::new(LogicalOperator::Empty),
1629        }));
1630
1631        let mut binder = Binder::new();
1632        let result = binder.bind(&plan);
1633        assert!(result.is_err(), "WITH on undefined variable should fail");
1634    }
1635
1636    // --- UNWIND ---
1637
1638    #[test]
1639    fn test_unwind_adds_element_variable() {
1640        use crate::query::plan::UnwindOp;
1641
1642        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1643            items: vec![ReturnItem {
1644                expression: LogicalExpression::Variable("item".to_string()),
1645                alias: None,
1646            }],
1647            distinct: false,
1648            input: Box::new(LogicalOperator::Unwind(UnwindOp {
1649                expression: LogicalExpression::List(vec![
1650                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1651                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(2)),
1652                ]),
1653                variable: "item".to_string(),
1654                ordinality_var: None,
1655                offset_var: None,
1656                input: Box::new(LogicalOperator::Empty),
1657            })),
1658        }));
1659
1660        let mut binder = Binder::new();
1661        let ctx = binder.bind(&plan).unwrap();
1662        assert!(ctx.contains("item"), "UNWIND variable should be in scope");
1663        let info = ctx.get("item").unwrap();
1664        assert!(
1665            !info.is_node && !info.is_edge,
1666            "UNWIND variable is not a graph element"
1667        );
1668    }
1669
1670    // --- MERGE ---
1671
1672    #[test]
1673    fn test_merge_adds_variable_and_validates_properties() {
1674        use crate::query::plan::MergeOp;
1675
1676        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1677            items: vec![ReturnItem {
1678                expression: LogicalExpression::Variable("m".to_string()),
1679                alias: None,
1680            }],
1681            distinct: false,
1682            input: Box::new(LogicalOperator::Merge(MergeOp {
1683                variable: "m".to_string(),
1684                labels: vec!["Person".to_string()],
1685                match_properties: vec![(
1686                    "name".to_string(),
1687                    LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1688                )],
1689                on_create: vec![(
1690                    "created".to_string(),
1691                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1692                )],
1693                on_match: vec![(
1694                    "updated".to_string(),
1695                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1696                )],
1697                input: Box::new(LogicalOperator::Empty),
1698            })),
1699        }));
1700
1701        let mut binder = Binder::new();
1702        let ctx = binder.bind(&plan).unwrap();
1703        assert!(ctx.contains("m"));
1704        assert!(
1705            ctx.get("m").unwrap().is_node,
1706            "MERGE variable should be a node"
1707        );
1708    }
1709
1710    #[test]
1711    fn test_merge_rejects_undefined_in_on_create() {
1712        use crate::query::plan::MergeOp;
1713
1714        let plan = LogicalPlan::new(LogicalOperator::Merge(MergeOp {
1715            variable: "m".to_string(),
1716            labels: vec![],
1717            match_properties: vec![],
1718            on_create: vec![(
1719                "name".to_string(),
1720                LogicalExpression::Property {
1721                    variable: "other".to_string(), // undefined!
1722                    property: "name".to_string(),
1723                },
1724            )],
1725            on_match: vec![],
1726            input: Box::new(LogicalOperator::Empty),
1727        }));
1728
1729        let mut binder = Binder::new();
1730        let result = binder.bind(&plan);
1731        assert!(
1732            result.is_err(),
1733            "ON CREATE referencing undefined variable should fail"
1734        );
1735    }
1736
1737    // --- ShortestPath ---
1738
1739    #[test]
1740    fn test_shortest_path_rejects_undefined_source() {
1741        use crate::query::plan::{ExpandDirection, ShortestPathOp};
1742
1743        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1744            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1745                variable: "b".to_string(),
1746                label: None,
1747                input: None,
1748            })),
1749            source_var: "missing".to_string(), // not defined
1750            target_var: "b".to_string(),
1751            edge_type: None,
1752            direction: ExpandDirection::Both,
1753            path_alias: "p".to_string(),
1754            all_paths: false,
1755        }));
1756
1757        let mut binder = Binder::new();
1758        let err = binder.bind(&plan).unwrap_err();
1759        assert!(
1760            err.to_string().contains("source in shortestPath"),
1761            "Error should mention shortestPath source context, got: {err}"
1762        );
1763    }
1764
1765    #[test]
1766    fn test_shortest_path_adds_path_and_length_variables() {
1767        use crate::query::plan::{ExpandDirection, JoinOp, JoinType, ShortestPathOp};
1768
1769        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1770            input: Box::new(LogicalOperator::Join(JoinOp {
1771                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1772                    variable: "a".to_string(),
1773                    label: None,
1774                    input: None,
1775                })),
1776                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1777                    variable: "b".to_string(),
1778                    label: None,
1779                    input: None,
1780                })),
1781                join_type: JoinType::Cross,
1782                conditions: vec![],
1783            })),
1784            source_var: "a".to_string(),
1785            target_var: "b".to_string(),
1786            edge_type: Some("ROAD".to_string()),
1787            direction: ExpandDirection::Outgoing,
1788            path_alias: "p".to_string(),
1789            all_paths: false,
1790        }));
1791
1792        let mut binder = Binder::new();
1793        let ctx = binder.bind(&plan).unwrap();
1794        assert!(ctx.contains("p"), "Path alias should be bound");
1795        assert!(
1796            ctx.contains("_path_length_p"),
1797            "Path length variable should be auto-created"
1798        );
1799    }
1800
1801    // --- Expression validation edge cases ---
1802
1803    #[test]
1804    fn test_case_expression_validates_all_branches() {
1805        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1806            items: vec![ReturnItem {
1807                expression: LogicalExpression::Case {
1808                    operand: None,
1809                    when_clauses: vec![
1810                        (
1811                            LogicalExpression::Binary {
1812                                left: Box::new(LogicalExpression::Property {
1813                                    variable: "n".to_string(),
1814                                    property: "age".to_string(),
1815                                }),
1816                                op: BinaryOp::Gt,
1817                                right: Box::new(LogicalExpression::Literal(
1818                                    grafeo_common::types::Value::Int64(18),
1819                                )),
1820                            },
1821                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1822                                "adult".into(),
1823                            )),
1824                        ),
1825                        (
1826                            // This branch references undefined variable
1827                            LogicalExpression::Property {
1828                                variable: "ghost".to_string(),
1829                                property: "flag".to_string(),
1830                            },
1831                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1832                                "flagged".into(),
1833                            )),
1834                        ),
1835                    ],
1836                    else_clause: Some(Box::new(LogicalExpression::Literal(
1837                        grafeo_common::types::Value::String("other".into()),
1838                    ))),
1839                },
1840                alias: None,
1841            }],
1842            distinct: false,
1843            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1844                variable: "n".to_string(),
1845                label: None,
1846                input: None,
1847            })),
1848        }));
1849
1850        let mut binder = Binder::new();
1851        let err = binder.bind(&plan).unwrap_err();
1852        assert!(
1853            err.to_string().contains("ghost"),
1854            "CASE should validate all when-clause conditions"
1855        );
1856    }
1857
1858    #[test]
1859    fn test_case_expression_validates_else_clause() {
1860        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1861            items: vec![ReturnItem {
1862                expression: LogicalExpression::Case {
1863                    operand: None,
1864                    when_clauses: vec![(
1865                        LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1866                        LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1867                    )],
1868                    else_clause: Some(Box::new(LogicalExpression::Property {
1869                        variable: "missing".to_string(),
1870                        property: "x".to_string(),
1871                    })),
1872                },
1873                alias: None,
1874            }],
1875            distinct: false,
1876            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1877                variable: "n".to_string(),
1878                label: None,
1879                input: None,
1880            })),
1881        }));
1882
1883        let mut binder = Binder::new();
1884        let err = binder.bind(&plan).unwrap_err();
1885        assert!(
1886            err.to_string().contains("missing"),
1887            "CASE ELSE should validate its expression too"
1888        );
1889    }
1890
1891    #[test]
1892    fn test_slice_access_validates_expressions() {
1893        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1894            items: vec![ReturnItem {
1895                expression: LogicalExpression::SliceAccess {
1896                    base: Box::new(LogicalExpression::Variable("n".to_string())),
1897                    start: Some(Box::new(LogicalExpression::Variable(
1898                        "undefined_start".to_string(),
1899                    ))),
1900                    end: None,
1901                },
1902                alias: None,
1903            }],
1904            distinct: false,
1905            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1906                variable: "n".to_string(),
1907                label: None,
1908                input: None,
1909            })),
1910        }));
1911
1912        let mut binder = Binder::new();
1913        let err = binder.bind(&plan).unwrap_err();
1914        assert!(err.to_string().contains("undefined_start"));
1915    }
1916
1917    #[test]
1918    fn test_list_comprehension_validates_list_source() {
1919        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1920            items: vec![ReturnItem {
1921                expression: LogicalExpression::ListComprehension {
1922                    variable: "x".to_string(),
1923                    list_expr: Box::new(LogicalExpression::Variable("not_defined".to_string())),
1924                    filter_expr: None,
1925                    map_expr: Box::new(LogicalExpression::Variable("x".to_string())),
1926                },
1927                alias: None,
1928            }],
1929            distinct: false,
1930            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1931                variable: "n".to_string(),
1932                label: None,
1933                input: None,
1934            })),
1935        }));
1936
1937        let mut binder = Binder::new();
1938        let err = binder.bind(&plan).unwrap_err();
1939        assert!(
1940            err.to_string().contains("not_defined"),
1941            "List comprehension should validate source list expression"
1942        );
1943    }
1944
1945    #[test]
1946    fn test_labels_type_id_reject_undefined() {
1947        // labels(x) where x is not defined
1948        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1949            items: vec![ReturnItem {
1950                expression: LogicalExpression::Labels("x".to_string()),
1951                alias: None,
1952            }],
1953            distinct: false,
1954            input: Box::new(LogicalOperator::Empty),
1955        }));
1956
1957        let mut binder = Binder::new();
1958        assert!(
1959            binder.bind(&plan).is_err(),
1960            "labels(x) on undefined x should fail"
1961        );
1962
1963        // type(e) where e is not defined
1964        let plan2 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1965            items: vec![ReturnItem {
1966                expression: LogicalExpression::Type("e".to_string()),
1967                alias: None,
1968            }],
1969            distinct: false,
1970            input: Box::new(LogicalOperator::Empty),
1971        }));
1972
1973        let mut binder2 = Binder::new();
1974        assert!(
1975            binder2.bind(&plan2).is_err(),
1976            "type(e) on undefined e should fail"
1977        );
1978
1979        // id(n) where n is not defined
1980        let plan3 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1981            items: vec![ReturnItem {
1982                expression: LogicalExpression::Id("n".to_string()),
1983                alias: None,
1984            }],
1985            distinct: false,
1986            input: Box::new(LogicalOperator::Empty),
1987        }));
1988
1989        let mut binder3 = Binder::new();
1990        assert!(
1991            binder3.bind(&plan3).is_err(),
1992            "id(n) on undefined n should fail"
1993        );
1994    }
1995
1996    #[test]
1997    fn test_expand_rejects_non_node_source() {
1998        use crate::query::plan::{ExpandDirection, ExpandOp, UnwindOp};
1999
2000        // UNWIND [1,2] AS x  -- x is not a node
2001        // MATCH (x)-[:E]->(b)  -- should fail: x isn't a node
2002        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2003            items: vec![ReturnItem {
2004                expression: LogicalExpression::Variable("b".to_string()),
2005                alias: None,
2006            }],
2007            distinct: false,
2008            input: Box::new(LogicalOperator::Expand(ExpandOp {
2009                from_variable: "x".to_string(),
2010                to_variable: "b".to_string(),
2011                edge_variable: None,
2012                direction: ExpandDirection::Outgoing,
2013                edge_type: None,
2014                min_hops: 1,
2015                max_hops: Some(1),
2016                input: Box::new(LogicalOperator::Unwind(UnwindOp {
2017                    expression: LogicalExpression::List(vec![]),
2018                    variable: "x".to_string(),
2019                    ordinality_var: None,
2020                    offset_var: None,
2021                    input: Box::new(LogicalOperator::Empty),
2022                })),
2023                path_alias: None,
2024            })),
2025        }));
2026
2027        let mut binder = Binder::new();
2028        let err = binder.bind(&plan).unwrap_err();
2029        assert!(
2030            err.to_string().contains("not a node"),
2031            "Expanding from non-node should fail, got: {err}"
2032        );
2033    }
2034
2035    #[test]
2036    fn test_add_label_rejects_undefined_variable() {
2037        use crate::query::plan::AddLabelOp;
2038
2039        let plan = LogicalPlan::new(LogicalOperator::AddLabel(AddLabelOp {
2040            variable: "missing".to_string(),
2041            labels: vec!["Admin".to_string()],
2042            input: Box::new(LogicalOperator::Empty),
2043        }));
2044
2045        let mut binder = Binder::new();
2046        let err = binder.bind(&plan).unwrap_err();
2047        assert!(err.to_string().contains("SET labels"));
2048    }
2049
2050    #[test]
2051    fn test_remove_label_rejects_undefined_variable() {
2052        use crate::query::plan::RemoveLabelOp;
2053
2054        let plan = LogicalPlan::new(LogicalOperator::RemoveLabel(RemoveLabelOp {
2055            variable: "missing".to_string(),
2056            labels: vec!["Admin".to_string()],
2057            input: Box::new(LogicalOperator::Empty),
2058        }));
2059
2060        let mut binder = Binder::new();
2061        let err = binder.bind(&plan).unwrap_err();
2062        assert!(err.to_string().contains("REMOVE labels"));
2063    }
2064
2065    #[test]
2066    fn test_sort_validates_key_expressions() {
2067        use crate::query::plan::{SortKey, SortOp, SortOrder};
2068
2069        let plan = LogicalPlan::new(LogicalOperator::Sort(SortOp {
2070            keys: vec![SortKey {
2071                expression: LogicalExpression::Property {
2072                    variable: "missing".to_string(),
2073                    property: "name".to_string(),
2074                },
2075                order: SortOrder::Ascending,
2076            }],
2077            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2078                variable: "n".to_string(),
2079                label: None,
2080                input: None,
2081            })),
2082        }));
2083
2084        let mut binder = Binder::new();
2085        assert!(
2086            binder.bind(&plan).is_err(),
2087            "ORDER BY on undefined variable should fail"
2088        );
2089    }
2090
2091    #[test]
2092    fn test_create_node_adds_variable_before_property_validation() {
2093        use crate::query::plan::CreateNodeOp;
2094
2095        // CREATE (n:Person {friend: n.name}) - referencing the node being created
2096        // The variable should be available for property expressions (self-reference)
2097        let plan = LogicalPlan::new(LogicalOperator::CreateNode(CreateNodeOp {
2098            variable: "n".to_string(),
2099            labels: vec!["Person".to_string()],
2100            properties: vec![(
2101                "self_ref".to_string(),
2102                LogicalExpression::Property {
2103                    variable: "n".to_string(),
2104                    property: "name".to_string(),
2105                },
2106            )],
2107            input: None,
2108        }));
2109
2110        let mut binder = Binder::new();
2111        // This should succeed because CreateNode adds the variable before validating properties
2112        let ctx = binder.bind(&plan).unwrap();
2113        assert!(ctx.get("n").unwrap().is_node);
2114    }
2115
2116    #[test]
2117    fn test_undefined_variable_suggests_similar() {
2118        // 'person' is defined, user types 'persn' - should get a suggestion
2119        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2120            items: vec![ReturnItem {
2121                expression: LogicalExpression::Variable("persn".to_string()),
2122                alias: None,
2123            }],
2124            distinct: false,
2125            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2126                variable: "person".to_string(),
2127                label: None,
2128                input: None,
2129            })),
2130        }));
2131
2132        let mut binder = Binder::new();
2133        let err = binder.bind(&plan).unwrap_err();
2134        let msg = err.to_string();
2135        // The error should contain the variable name at minimum
2136        assert!(
2137            msg.contains("persn"),
2138            "Error should mention the undefined variable"
2139        );
2140    }
2141
2142    #[test]
2143    fn test_anon_variables_skip_validation() {
2144        // Variables starting with _anon_ are anonymous and should be silently accepted
2145        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2146            items: vec![ReturnItem {
2147                expression: LogicalExpression::Variable("_anon_42".to_string()),
2148                alias: None,
2149            }],
2150            distinct: false,
2151            input: Box::new(LogicalOperator::Empty),
2152        }));
2153
2154        let mut binder = Binder::new();
2155        let result = binder.bind(&plan);
2156        assert!(
2157            result.is_ok(),
2158            "Anonymous variables should bypass validation"
2159        );
2160    }
2161
2162    #[test]
2163    fn test_map_expression_validates_values() {
2164        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2165            items: vec![ReturnItem {
2166                expression: LogicalExpression::Map(vec![(
2167                    "key".to_string(),
2168                    LogicalExpression::Variable("undefined".to_string()),
2169                )]),
2170                alias: None,
2171            }],
2172            distinct: false,
2173            input: Box::new(LogicalOperator::Empty),
2174        }));
2175
2176        let mut binder = Binder::new();
2177        assert!(
2178            binder.bind(&plan).is_err(),
2179            "Map values should be validated"
2180        );
2181    }
2182
2183    #[test]
2184    fn test_vector_scan_validates_query_vector() {
2185        use crate::query::plan::VectorScanOp;
2186
2187        let plan = LogicalPlan::new(LogicalOperator::VectorScan(VectorScanOp {
2188            variable: "result".to_string(),
2189            index_name: None,
2190            property: "embedding".to_string(),
2191            label: Some("Doc".to_string()),
2192            query_vector: LogicalExpression::Variable("undefined_vec".to_string()),
2193            k: 10,
2194            metric: None,
2195            min_similarity: None,
2196            max_distance: None,
2197            input: None,
2198        }));
2199
2200        let mut binder = Binder::new();
2201        let err = binder.bind(&plan).unwrap_err();
2202        assert!(err.to_string().contains("undefined_vec"));
2203    }
2204}