Skip to main content

grafeo_engine/query/
binder.rs

1//! Semantic validation - catching errors before execution.
2//!
3//! The binder walks the logical plan and validates that everything makes sense:
4//! - Is that variable actually defined? (You can't use `RETURN x` if `x` wasn't matched)
5//! - Does that property access make sense? (Accessing `.age` on an integer fails)
6//! - Are types compatible? (Can't compare a string to an integer)
7//!
8//! Better to catch these errors early than waste time executing a broken query.
9
10use crate::query::plan::{
11    ExpandOp, FilterOp, LogicalExpression, LogicalOperator, LogicalPlan, NodeScanOp, ReturnItem,
12    ReturnOp, TripleScanOp,
13};
14use grafeo_common::types::LogicalType;
15use grafeo_common::utils::error::{Error, QueryError, QueryErrorKind, Result};
16use grafeo_common::utils::strings::{find_similar, format_suggestion};
17use std::collections::HashMap;
18
19/// Creates a semantic binding error.
20fn binding_error(message: impl Into<String>) -> Error {
21    Error::Query(QueryError::new(QueryErrorKind::Semantic, message))
22}
23
24/// Creates a semantic binding error with a hint.
25fn binding_error_with_hint(message: impl Into<String>, hint: impl Into<String>) -> Error {
26    Error::Query(QueryError::new(QueryErrorKind::Semantic, message).with_hint(hint))
27}
28
29/// Creates an "undefined variable" error with a suggestion if a similar variable exists.
30fn undefined_variable_error(variable: &str, context: &BindingContext, suffix: &str) -> Error {
31    let candidates: Vec<String> = context.variable_names().to_vec();
32    let candidates_ref: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
33
34    if let Some(suggestion) = find_similar(variable, &candidates_ref) {
35        binding_error_with_hint(
36            format!("Undefined variable '{variable}'{suffix}"),
37            format_suggestion(suggestion),
38        )
39    } else {
40        binding_error(format!("Undefined variable '{variable}'{suffix}"))
41    }
42}
43
44/// Information about a bound variable.
45#[derive(Debug, Clone)]
46pub struct VariableInfo {
47    /// The name of the variable.
48    pub name: String,
49    /// The inferred type of the variable.
50    pub data_type: LogicalType,
51    /// Whether this variable is a node.
52    pub is_node: bool,
53    /// Whether this variable is an edge.
54    pub is_edge: bool,
55}
56
57/// Context containing all bound variables and their information.
58#[derive(Debug, Clone, Default)]
59pub struct BindingContext {
60    /// Map from variable name to its info.
61    variables: HashMap<String, VariableInfo>,
62    /// Variables in order of definition.
63    order: Vec<String>,
64}
65
66impl BindingContext {
67    /// Creates a new empty binding context.
68    #[must_use]
69    pub fn new() -> Self {
70        Self {
71            variables: HashMap::new(),
72            order: Vec::new(),
73        }
74    }
75
76    /// Adds a variable to the context.
77    pub fn add_variable(&mut self, name: String, info: VariableInfo) {
78        if !self.variables.contains_key(&name) {
79            self.order.push(name.clone());
80        }
81        self.variables.insert(name, info);
82    }
83
84    /// Looks up a variable by name.
85    #[must_use]
86    pub fn get(&self, name: &str) -> Option<&VariableInfo> {
87        self.variables.get(name)
88    }
89
90    /// Checks if a variable is defined.
91    #[must_use]
92    pub fn contains(&self, name: &str) -> bool {
93        self.variables.contains_key(name)
94    }
95
96    /// Returns all variable names in definition order.
97    #[must_use]
98    pub fn variable_names(&self) -> &[String] {
99        &self.order
100    }
101
102    /// Returns the number of bound variables.
103    #[must_use]
104    pub fn len(&self) -> usize {
105        self.variables.len()
106    }
107
108    /// Returns true if no variables are bound.
109    #[must_use]
110    pub fn is_empty(&self) -> bool {
111        self.variables.is_empty()
112    }
113
114    /// Removes a variable from the context (used for temporary scoping).
115    pub fn remove_variable(&mut self, name: &str) {
116        self.variables.remove(name);
117        self.order.retain(|n| n != name);
118    }
119}
120
121/// Semantic binder for query plans.
122///
123/// The binder walks the logical plan and:
124/// 1. Collects all variable definitions
125/// 2. Validates that all variable references are valid
126/// 3. Infers types where possible
127/// 4. Reports semantic errors
128pub struct Binder {
129    /// The current binding context.
130    context: BindingContext,
131}
132
133impl Binder {
134    /// Creates a new binder.
135    #[must_use]
136    pub fn new() -> Self {
137        Self {
138            context: BindingContext::new(),
139        }
140    }
141
142    /// Binds a logical plan, returning the binding context.
143    ///
144    /// # Errors
145    ///
146    /// Returns an error if semantic validation fails.
147    pub fn bind(&mut self, plan: &LogicalPlan) -> Result<BindingContext> {
148        self.bind_operator(&plan.root)?;
149        Ok(self.context.clone())
150    }
151
152    /// Binds a single logical operator.
153    fn bind_operator(&mut self, op: &LogicalOperator) -> Result<()> {
154        match op {
155            LogicalOperator::NodeScan(scan) => self.bind_node_scan(scan),
156            LogicalOperator::Expand(expand) => self.bind_expand(expand),
157            LogicalOperator::Filter(filter) => self.bind_filter(filter),
158            LogicalOperator::Return(ret) => self.bind_return(ret),
159            LogicalOperator::Project(project) => {
160                self.bind_operator(&project.input)?;
161                for projection in &project.projections {
162                    self.validate_expression(&projection.expression)?;
163                    // Add the projection alias to the context (for WITH clause support)
164                    if let Some(ref alias) = projection.alias {
165                        // Determine the type from the expression
166                        let data_type = self.infer_expression_type(&projection.expression);
167                        self.context.add_variable(
168                            alias.clone(),
169                            VariableInfo {
170                                name: alias.clone(),
171                                data_type,
172                                is_node: false,
173                                is_edge: false,
174                            },
175                        );
176                    }
177                }
178                Ok(())
179            }
180            LogicalOperator::Limit(limit) => self.bind_operator(&limit.input),
181            LogicalOperator::Skip(skip) => self.bind_operator(&skip.input),
182            LogicalOperator::Sort(sort) => {
183                self.bind_operator(&sort.input)?;
184                for key in &sort.keys {
185                    self.validate_expression(&key.expression)?;
186                }
187                Ok(())
188            }
189            LogicalOperator::CreateNode(create) => {
190                // CreateNode introduces a new variable
191                if let Some(ref input) = create.input {
192                    self.bind_operator(input)?;
193                }
194                self.context.add_variable(
195                    create.variable.clone(),
196                    VariableInfo {
197                        name: create.variable.clone(),
198                        data_type: LogicalType::Node,
199                        is_node: true,
200                        is_edge: false,
201                    },
202                );
203                // Validate property expressions
204                for (_, expr) in &create.properties {
205                    self.validate_expression(expr)?;
206                }
207                Ok(())
208            }
209            LogicalOperator::EdgeScan(scan) => {
210                if let Some(ref input) = scan.input {
211                    self.bind_operator(input)?;
212                }
213                self.context.add_variable(
214                    scan.variable.clone(),
215                    VariableInfo {
216                        name: scan.variable.clone(),
217                        data_type: LogicalType::Edge,
218                        is_node: false,
219                        is_edge: true,
220                    },
221                );
222                Ok(())
223            }
224            LogicalOperator::Distinct(distinct) => self.bind_operator(&distinct.input),
225            LogicalOperator::Join(join) => self.bind_join(join),
226            LogicalOperator::Aggregate(agg) => self.bind_aggregate(agg),
227            LogicalOperator::CreateEdge(create) => {
228                self.bind_operator(&create.input)?;
229                // Validate that source and target variables are defined
230                if !self.context.contains(&create.from_variable) {
231                    return Err(undefined_variable_error(
232                        &create.from_variable,
233                        &self.context,
234                        " (source in CREATE EDGE)",
235                    ));
236                }
237                if !self.context.contains(&create.to_variable) {
238                    return Err(undefined_variable_error(
239                        &create.to_variable,
240                        &self.context,
241                        " (target in CREATE EDGE)",
242                    ));
243                }
244                // Add edge variable if present
245                if let Some(ref var) = create.variable {
246                    self.context.add_variable(
247                        var.clone(),
248                        VariableInfo {
249                            name: var.clone(),
250                            data_type: LogicalType::Edge,
251                            is_node: false,
252                            is_edge: true,
253                        },
254                    );
255                }
256                // Validate property expressions
257                for (_, expr) in &create.properties {
258                    self.validate_expression(expr)?;
259                }
260                Ok(())
261            }
262            LogicalOperator::DeleteNode(delete) => {
263                self.bind_operator(&delete.input)?;
264                // Validate that the variable to delete is defined
265                if !self.context.contains(&delete.variable) {
266                    return Err(undefined_variable_error(
267                        &delete.variable,
268                        &self.context,
269                        " in DELETE",
270                    ));
271                }
272                Ok(())
273            }
274            LogicalOperator::DeleteEdge(delete) => {
275                self.bind_operator(&delete.input)?;
276                // Validate that the variable to delete is defined
277                if !self.context.contains(&delete.variable) {
278                    return Err(undefined_variable_error(
279                        &delete.variable,
280                        &self.context,
281                        " in DELETE",
282                    ));
283                }
284                Ok(())
285            }
286            LogicalOperator::SetProperty(set) => {
287                self.bind_operator(&set.input)?;
288                // Validate that the variable to update is defined
289                if !self.context.contains(&set.variable) {
290                    return Err(undefined_variable_error(
291                        &set.variable,
292                        &self.context,
293                        " in SET",
294                    ));
295                }
296                // Validate property value expressions
297                for (_, expr) in &set.properties {
298                    self.validate_expression(expr)?;
299                }
300                Ok(())
301            }
302            LogicalOperator::Empty => Ok(()),
303
304            LogicalOperator::Unwind(unwind) => {
305                // First bind the input
306                self.bind_operator(&unwind.input)?;
307                // Validate the expression being unwound
308                self.validate_expression(&unwind.expression)?;
309                // Add the new variable to the context
310                self.context.add_variable(
311                    unwind.variable.clone(),
312                    VariableInfo {
313                        name: unwind.variable.clone(),
314                        data_type: LogicalType::Any, // Unwound elements can be any type
315                        is_node: false,
316                        is_edge: false,
317                    },
318                );
319                // Add ORDINALITY variable if present (1-based index)
320                if let Some(ref ord_var) = unwind.ordinality_var {
321                    self.context.add_variable(
322                        ord_var.clone(),
323                        VariableInfo {
324                            name: ord_var.clone(),
325                            data_type: LogicalType::Int64,
326                            is_node: false,
327                            is_edge: false,
328                        },
329                    );
330                }
331                // Add OFFSET variable if present (0-based index)
332                if let Some(ref off_var) = unwind.offset_var {
333                    self.context.add_variable(
334                        off_var.clone(),
335                        VariableInfo {
336                            name: off_var.clone(),
337                            data_type: LogicalType::Int64,
338                            is_node: false,
339                            is_edge: false,
340                        },
341                    );
342                }
343                Ok(())
344            }
345
346            // RDF/SPARQL operators
347            LogicalOperator::TripleScan(scan) => self.bind_triple_scan(scan),
348            LogicalOperator::Union(union) => {
349                for input in &union.inputs {
350                    self.bind_operator(input)?;
351                }
352                Ok(())
353            }
354            LogicalOperator::LeftJoin(lj) => {
355                self.bind_operator(&lj.left)?;
356                self.bind_operator(&lj.right)?;
357                if let Some(ref cond) = lj.condition {
358                    self.validate_expression(cond)?;
359                }
360                Ok(())
361            }
362            LogicalOperator::AntiJoin(aj) => {
363                self.bind_operator(&aj.left)?;
364                self.bind_operator(&aj.right)?;
365                Ok(())
366            }
367            LogicalOperator::Bind(bind) => {
368                self.bind_operator(&bind.input)?;
369                self.validate_expression(&bind.expression)?;
370                self.context.add_variable(
371                    bind.variable.clone(),
372                    VariableInfo {
373                        name: bind.variable.clone(),
374                        data_type: LogicalType::Any,
375                        is_node: false,
376                        is_edge: false,
377                    },
378                );
379                Ok(())
380            }
381            LogicalOperator::Merge(merge) => {
382                // First bind the input
383                self.bind_operator(&merge.input)?;
384                // Validate the match property expressions
385                for (_, expr) in &merge.match_properties {
386                    self.validate_expression(expr)?;
387                }
388                // Validate the ON CREATE property expressions
389                for (_, expr) in &merge.on_create {
390                    self.validate_expression(expr)?;
391                }
392                // Validate the ON MATCH property expressions
393                for (_, expr) in &merge.on_match {
394                    self.validate_expression(expr)?;
395                }
396                // MERGE introduces a new variable
397                self.context.add_variable(
398                    merge.variable.clone(),
399                    VariableInfo {
400                        name: merge.variable.clone(),
401                        data_type: LogicalType::Node,
402                        is_node: true,
403                        is_edge: false,
404                    },
405                );
406                Ok(())
407            }
408            LogicalOperator::MergeRelationship(merge_rel) => {
409                self.bind_operator(&merge_rel.input)?;
410                // Validate source and target variables exist
411                if !self.context.contains(&merge_rel.source_variable) {
412                    return Err(undefined_variable_error(
413                        &merge_rel.source_variable,
414                        &self.context,
415                        " in MERGE relationship source",
416                    ));
417                }
418                if !self.context.contains(&merge_rel.target_variable) {
419                    return Err(undefined_variable_error(
420                        &merge_rel.target_variable,
421                        &self.context,
422                        " in MERGE relationship target",
423                    ));
424                }
425                for (_, expr) in &merge_rel.match_properties {
426                    self.validate_expression(expr)?;
427                }
428                for (_, expr) in &merge_rel.on_create {
429                    self.validate_expression(expr)?;
430                }
431                for (_, expr) in &merge_rel.on_match {
432                    self.validate_expression(expr)?;
433                }
434                // MERGE relationship introduces the edge variable
435                self.context.add_variable(
436                    merge_rel.variable.clone(),
437                    VariableInfo {
438                        name: merge_rel.variable.clone(),
439                        data_type: LogicalType::Edge,
440                        is_node: false,
441                        is_edge: true,
442                    },
443                );
444                Ok(())
445            }
446            LogicalOperator::AddLabel(add_label) => {
447                self.bind_operator(&add_label.input)?;
448                // Validate that the variable exists
449                if !self.context.contains(&add_label.variable) {
450                    return Err(undefined_variable_error(
451                        &add_label.variable,
452                        &self.context,
453                        " in SET labels",
454                    ));
455                }
456                Ok(())
457            }
458            LogicalOperator::RemoveLabel(remove_label) => {
459                self.bind_operator(&remove_label.input)?;
460                // Validate that the variable exists
461                if !self.context.contains(&remove_label.variable) {
462                    return Err(undefined_variable_error(
463                        &remove_label.variable,
464                        &self.context,
465                        " in REMOVE labels",
466                    ));
467                }
468                Ok(())
469            }
470            LogicalOperator::ShortestPath(sp) => {
471                // First bind the input
472                self.bind_operator(&sp.input)?;
473                // Validate that source and target variables are defined
474                if !self.context.contains(&sp.source_var) {
475                    return Err(undefined_variable_error(
476                        &sp.source_var,
477                        &self.context,
478                        " (source in shortestPath)",
479                    ));
480                }
481                if !self.context.contains(&sp.target_var) {
482                    return Err(undefined_variable_error(
483                        &sp.target_var,
484                        &self.context,
485                        " (target in shortestPath)",
486                    ));
487                }
488                // Add the path alias variable to the context
489                self.context.add_variable(
490                    sp.path_alias.clone(),
491                    VariableInfo {
492                        name: sp.path_alias.clone(),
493                        data_type: LogicalType::Any, // Path is a complex type
494                        is_node: false,
495                        is_edge: false,
496                    },
497                );
498                // Also add the path length variable for length(p) calls
499                let path_length_var = format!("_path_length_{}", sp.path_alias);
500                self.context.add_variable(
501                    path_length_var.clone(),
502                    VariableInfo {
503                        name: path_length_var,
504                        data_type: LogicalType::Int64,
505                        is_node: false,
506                        is_edge: false,
507                    },
508                );
509                Ok(())
510            }
511            // SPARQL Update operators - these don't require variable binding
512            LogicalOperator::InsertTriple(insert) => {
513                if let Some(ref input) = insert.input {
514                    self.bind_operator(input)?;
515                }
516                Ok(())
517            }
518            LogicalOperator::DeleteTriple(delete) => {
519                if let Some(ref input) = delete.input {
520                    self.bind_operator(input)?;
521                }
522                Ok(())
523            }
524            LogicalOperator::Modify(modify) => {
525                self.bind_operator(&modify.where_clause)?;
526                Ok(())
527            }
528            LogicalOperator::ClearGraph(_)
529            | LogicalOperator::CreateGraph(_)
530            | LogicalOperator::DropGraph(_)
531            | LogicalOperator::LoadGraph(_)
532            | LogicalOperator::CopyGraph(_)
533            | LogicalOperator::MoveGraph(_)
534            | LogicalOperator::AddGraph(_)
535            | LogicalOperator::HorizontalAggregate(_) => Ok(()),
536            LogicalOperator::VectorScan(scan) => {
537                // VectorScan introduces a variable for matched nodes
538                if let Some(ref input) = scan.input {
539                    self.bind_operator(input)?;
540                }
541                self.context.add_variable(
542                    scan.variable.clone(),
543                    VariableInfo {
544                        name: scan.variable.clone(),
545                        data_type: LogicalType::Node,
546                        is_node: true,
547                        is_edge: false,
548                    },
549                );
550                // Validate the query vector expression
551                self.validate_expression(&scan.query_vector)?;
552                Ok(())
553            }
554            LogicalOperator::VectorJoin(join) => {
555                // VectorJoin takes input from left side and produces right-side matches
556                self.bind_operator(&join.input)?;
557                // Add right variable for matched nodes
558                self.context.add_variable(
559                    join.right_variable.clone(),
560                    VariableInfo {
561                        name: join.right_variable.clone(),
562                        data_type: LogicalType::Node,
563                        is_node: true,
564                        is_edge: false,
565                    },
566                );
567                // Optionally add score variable
568                if let Some(ref score_var) = join.score_variable {
569                    self.context.add_variable(
570                        score_var.clone(),
571                        VariableInfo {
572                            name: score_var.clone(),
573                            data_type: LogicalType::Float64,
574                            is_node: false,
575                            is_edge: false,
576                        },
577                    );
578                }
579                // Validate the query vector expression
580                self.validate_expression(&join.query_vector)?;
581                Ok(())
582            }
583            LogicalOperator::MapCollect(mc) => {
584                self.bind_operator(&mc.input)?;
585                self.context.add_variable(
586                    mc.alias.clone(),
587                    VariableInfo {
588                        name: mc.alias.clone(),
589                        data_type: LogicalType::Any,
590                        is_node: false,
591                        is_edge: false,
592                    },
593                );
594                Ok(())
595            }
596            LogicalOperator::Except(except) => {
597                self.bind_operator(&except.left)?;
598                self.bind_operator(&except.right)?;
599                Ok(())
600            }
601            LogicalOperator::Intersect(intersect) => {
602                self.bind_operator(&intersect.left)?;
603                self.bind_operator(&intersect.right)?;
604                Ok(())
605            }
606            LogicalOperator::Otherwise(otherwise) => {
607                self.bind_operator(&otherwise.left)?;
608                self.bind_operator(&otherwise.right)?;
609                Ok(())
610            }
611            LogicalOperator::Apply(apply) => {
612                self.bind_operator(&apply.input)?;
613                self.bind_operator(&apply.subplan)?;
614                // Register output columns from the subplan so the outer Return
615                // can reference them (e.g., VALUE subquery lifting).
616                Self::register_subplan_columns(&apply.subplan, &mut self.context);
617                Ok(())
618            }
619            LogicalOperator::MultiWayJoin(mwj) => {
620                for input in &mwj.inputs {
621                    self.bind_operator(input)?;
622                }
623                for cond in &mwj.conditions {
624                    self.validate_expression(&cond.left)?;
625                    self.validate_expression(&cond.right)?;
626                }
627                Ok(())
628            }
629            LogicalOperator::ParameterScan(param_scan) => {
630                // Register parameter columns as variables (injected by outer Apply)
631                for col in &param_scan.columns {
632                    self.context.add_variable(
633                        col.clone(),
634                        VariableInfo {
635                            name: col.clone(),
636                            data_type: LogicalType::Any,
637                            is_node: true,
638                            is_edge: false,
639                        },
640                    );
641                }
642                Ok(())
643            }
644            // DDL operators don't need binding: they're handled before the binder
645            LogicalOperator::CreatePropertyGraph(_) => Ok(()),
646            // Procedure calls: register yielded columns as variables for downstream operators
647            LogicalOperator::CallProcedure(call) => {
648                if let Some(yields) = &call.yield_items {
649                    for item in yields {
650                        let var_name = item.alias.as_deref().unwrap_or(&item.field_name);
651                        self.context.add_variable(
652                            var_name.to_string(),
653                            VariableInfo {
654                                name: var_name.to_string(),
655                                data_type: LogicalType::Any,
656                                is_node: false,
657                                is_edge: false,
658                            },
659                        );
660                    }
661                }
662                Ok(())
663            }
664            LogicalOperator::LoadData(load) => {
665                // The row variable is bound as Any (Map or List depending on WITH HEADERS)
666                self.context.add_variable(
667                    load.variable.clone(),
668                    VariableInfo {
669                        name: load.variable.clone(),
670                        data_type: LogicalType::Any,
671                        is_node: false,
672                        is_edge: false,
673                    },
674                );
675                Ok(())
676            }
677        }
678    }
679
680    /// Binds a triple scan operator (for RDF/SPARQL).
681    fn bind_triple_scan(&mut self, scan: &TripleScanOp) -> Result<()> {
682        use crate::query::plan::TripleComponent;
683
684        // First bind the input if present
685        if let Some(ref input) = scan.input {
686            self.bind_operator(input)?;
687        }
688
689        // Add variables for subject, predicate, object
690        if let TripleComponent::Variable(name) = &scan.subject
691            && !self.context.contains(name)
692        {
693            self.context.add_variable(
694                name.clone(),
695                VariableInfo {
696                    name: name.clone(),
697                    data_type: LogicalType::Any, // RDF term
698                    is_node: false,
699                    is_edge: false,
700                },
701            );
702        }
703
704        if let TripleComponent::Variable(name) = &scan.predicate
705            && !self.context.contains(name)
706        {
707            self.context.add_variable(
708                name.clone(),
709                VariableInfo {
710                    name: name.clone(),
711                    data_type: LogicalType::Any, // IRI
712                    is_node: false,
713                    is_edge: false,
714                },
715            );
716        }
717
718        if let TripleComponent::Variable(name) = &scan.object
719            && !self.context.contains(name)
720        {
721            self.context.add_variable(
722                name.clone(),
723                VariableInfo {
724                    name: name.clone(),
725                    data_type: LogicalType::Any, // RDF term
726                    is_node: false,
727                    is_edge: false,
728                },
729            );
730        }
731
732        if let Some(TripleComponent::Variable(name)) = &scan.graph
733            && !self.context.contains(name)
734        {
735            self.context.add_variable(
736                name.clone(),
737                VariableInfo {
738                    name: name.clone(),
739                    data_type: LogicalType::Any, // IRI
740                    is_node: false,
741                    is_edge: false,
742                },
743            );
744        }
745
746        Ok(())
747    }
748
749    /// Binds a node scan operator.
750    fn bind_node_scan(&mut self, scan: &NodeScanOp) -> Result<()> {
751        // First bind the input if present
752        if let Some(ref input) = scan.input {
753            self.bind_operator(input)?;
754        }
755
756        // Add the scanned variable to scope
757        self.context.add_variable(
758            scan.variable.clone(),
759            VariableInfo {
760                name: scan.variable.clone(),
761                data_type: LogicalType::Node,
762                is_node: true,
763                is_edge: false,
764            },
765        );
766
767        Ok(())
768    }
769
770    /// Binds an expand operator.
771    fn bind_expand(&mut self, expand: &ExpandOp) -> Result<()> {
772        // First bind the input
773        self.bind_operator(&expand.input)?;
774
775        // Validate that the source variable is defined
776        if !self.context.contains(&expand.from_variable) {
777            return Err(undefined_variable_error(
778                &expand.from_variable,
779                &self.context,
780                " in EXPAND",
781            ));
782        }
783
784        // Validate that the source is a node
785        if let Some(info) = self.context.get(&expand.from_variable)
786            && !info.is_node
787        {
788            return Err(binding_error(format!(
789                "Variable '{}' is not a node, cannot expand from it",
790                expand.from_variable
791            )));
792        }
793
794        // Add edge variable if present
795        if let Some(ref edge_var) = expand.edge_variable {
796            self.context.add_variable(
797                edge_var.clone(),
798                VariableInfo {
799                    name: edge_var.clone(),
800                    data_type: LogicalType::Edge,
801                    is_node: false,
802                    is_edge: true,
803                },
804            );
805        }
806
807        // Add target variable
808        self.context.add_variable(
809            expand.to_variable.clone(),
810            VariableInfo {
811                name: expand.to_variable.clone(),
812                data_type: LogicalType::Node,
813                is_node: true,
814                is_edge: false,
815            },
816        );
817
818        // Add path variables for variable-length paths
819        if let Some(ref path_alias) = expand.path_alias {
820            // Register the path variable itself (e.g. p in MATCH p=...)
821            self.context.add_variable(
822                path_alias.clone(),
823                VariableInfo {
824                    name: path_alias.clone(),
825                    data_type: LogicalType::Any,
826                    is_node: false,
827                    is_edge: false,
828                },
829            );
830            // length(p) → _path_length_p
831            let path_length_var = format!("_path_length_{}", path_alias);
832            self.context.add_variable(
833                path_length_var.clone(),
834                VariableInfo {
835                    name: path_length_var,
836                    data_type: LogicalType::Int64,
837                    is_node: false,
838                    is_edge: false,
839                },
840            );
841            // nodes(p) → _path_nodes_p
842            let path_nodes_var = format!("_path_nodes_{}", path_alias);
843            self.context.add_variable(
844                path_nodes_var.clone(),
845                VariableInfo {
846                    name: path_nodes_var,
847                    data_type: LogicalType::Any,
848                    is_node: false,
849                    is_edge: false,
850                },
851            );
852            // edges(p) → _path_edges_p
853            let path_edges_var = format!("_path_edges_{}", path_alias);
854            self.context.add_variable(
855                path_edges_var.clone(),
856                VariableInfo {
857                    name: path_edges_var,
858                    data_type: LogicalType::Any,
859                    is_node: false,
860                    is_edge: false,
861                },
862            );
863        }
864
865        Ok(())
866    }
867
868    /// Binds a filter operator.
869    fn bind_filter(&mut self, filter: &FilterOp) -> Result<()> {
870        // First bind the input
871        self.bind_operator(&filter.input)?;
872
873        // Validate the predicate expression
874        self.validate_expression(&filter.predicate)?;
875
876        Ok(())
877    }
878
879    /// Registers output columns from a subplan into the binding context.
880    /// Walks through wrapping operators to find a Return and extracts column names.
881    fn register_subplan_columns(plan: &LogicalOperator, ctx: &mut BindingContext) {
882        match plan {
883            LogicalOperator::Return(ret) => {
884                for item in &ret.items {
885                    let col_name = if let Some(alias) = &item.alias {
886                        alias.clone()
887                    } else {
888                        match &item.expression {
889                            LogicalExpression::Variable(name) => name.clone(),
890                            LogicalExpression::Property { variable, property } => {
891                                format!("{variable}.{property}")
892                            }
893                            _ => continue,
894                        }
895                    };
896                    ctx.add_variable(
897                        col_name.clone(),
898                        VariableInfo {
899                            name: col_name,
900                            data_type: LogicalType::Any,
901                            is_node: false,
902                            is_edge: false,
903                        },
904                    );
905                }
906            }
907            LogicalOperator::Sort(s) => Self::register_subplan_columns(&s.input, ctx),
908            LogicalOperator::Limit(l) => Self::register_subplan_columns(&l.input, ctx),
909            LogicalOperator::Distinct(d) => Self::register_subplan_columns(&d.input, ctx),
910            LogicalOperator::Aggregate(agg) => {
911                // Aggregate produces named output columns
912                for expr in &agg.aggregates {
913                    if let Some(alias) = &expr.alias {
914                        ctx.add_variable(
915                            alias.clone(),
916                            VariableInfo {
917                                name: alias.clone(),
918                                data_type: LogicalType::Any,
919                                is_node: false,
920                                is_edge: false,
921                            },
922                        );
923                    }
924                }
925            }
926            _ => {}
927        }
928    }
929
930    /// Binds a return operator.
931    fn bind_return(&mut self, ret: &ReturnOp) -> Result<()> {
932        // First bind the input
933        self.bind_operator(&ret.input)?;
934
935        // Validate all return expressions and register aliases
936        // (aliases must be visible to parent Sort for ORDER BY resolution)
937        for item in &ret.items {
938            self.validate_return_item(item)?;
939            if let Some(ref alias) = item.alias {
940                let data_type = self.infer_expression_type(&item.expression);
941                self.context.add_variable(
942                    alias.clone(),
943                    VariableInfo {
944                        name: alias.clone(),
945                        data_type,
946                        is_node: false,
947                        is_edge: false,
948                    },
949                );
950            }
951        }
952
953        Ok(())
954    }
955
956    /// Validates a return item.
957    fn validate_return_item(&mut self, item: &ReturnItem) -> Result<()> {
958        self.validate_expression(&item.expression)
959    }
960
961    /// Validates that an expression only references defined variables.
962    fn validate_expression(&mut self, expr: &LogicalExpression) -> Result<()> {
963        match expr {
964            LogicalExpression::Variable(name) => {
965                // "*" is a wildcard marker for RETURN *, expanded by the planner
966                if name == "*" {
967                    return Ok(());
968                }
969                if !self.context.contains(name) && !name.starts_with("_anon_") {
970                    return Err(undefined_variable_error(name, &self.context, ""));
971                }
972                Ok(())
973            }
974            LogicalExpression::Property { variable, .. } => {
975                if !self.context.contains(variable) && !variable.starts_with("_anon_") {
976                    return Err(undefined_variable_error(
977                        variable,
978                        &self.context,
979                        " in property access",
980                    ));
981                }
982                Ok(())
983            }
984            LogicalExpression::Literal(_) => Ok(()),
985            LogicalExpression::Binary { left, right, .. } => {
986                self.validate_expression(left)?;
987                self.validate_expression(right)
988            }
989            LogicalExpression::Unary { operand, .. } => self.validate_expression(operand),
990            LogicalExpression::FunctionCall { args, .. } => {
991                for arg in args {
992                    self.validate_expression(arg)?;
993                }
994                Ok(())
995            }
996            LogicalExpression::List(items) => {
997                for item in items {
998                    self.validate_expression(item)?;
999                }
1000                Ok(())
1001            }
1002            LogicalExpression::Map(pairs) => {
1003                for (_, value) in pairs {
1004                    self.validate_expression(value)?;
1005                }
1006                Ok(())
1007            }
1008            LogicalExpression::IndexAccess { base, index } => {
1009                self.validate_expression(base)?;
1010                self.validate_expression(index)
1011            }
1012            LogicalExpression::SliceAccess { base, start, end } => {
1013                self.validate_expression(base)?;
1014                if let Some(s) = start {
1015                    self.validate_expression(s)?;
1016                }
1017                if let Some(e) = end {
1018                    self.validate_expression(e)?;
1019                }
1020                Ok(())
1021            }
1022            LogicalExpression::Case {
1023                operand,
1024                when_clauses,
1025                else_clause,
1026            } => {
1027                if let Some(op) = operand {
1028                    self.validate_expression(op)?;
1029                }
1030                for (cond, result) in when_clauses {
1031                    self.validate_expression(cond)?;
1032                    self.validate_expression(result)?;
1033                }
1034                if let Some(else_expr) = else_clause {
1035                    self.validate_expression(else_expr)?;
1036                }
1037                Ok(())
1038            }
1039            // Parameter references are validated externally
1040            LogicalExpression::Parameter(_) => Ok(()),
1041            // labels(n), type(e), id(n) need the variable to be defined
1042            LogicalExpression::Labels(var)
1043            | LogicalExpression::Type(var)
1044            | LogicalExpression::Id(var) => {
1045                if !self.context.contains(var) && !var.starts_with("_anon_") {
1046                    return Err(undefined_variable_error(var, &self.context, " in function"));
1047                }
1048                Ok(())
1049            }
1050            LogicalExpression::ListComprehension { list_expr, .. } => {
1051                // Validate the list expression against the outer context.
1052                // The filter and map expressions use the iteration variable
1053                // which is locally scoped, so we skip validating them here.
1054                self.validate_expression(list_expr)?;
1055                Ok(())
1056            }
1057            LogicalExpression::ListPredicate { list_expr, .. } => {
1058                // Validate the list expression against the outer context.
1059                // The predicate uses the iteration variable which is locally
1060                // scoped, so we skip validating it against the outer context.
1061                self.validate_expression(list_expr)?;
1062                Ok(())
1063            }
1064            LogicalExpression::ExistsSubquery(subquery)
1065            | LogicalExpression::CountSubquery(subquery)
1066            | LogicalExpression::ValueSubquery(subquery) => {
1067                // Subqueries have their own binding context
1068                // For now, just validate the structure exists
1069                let _ = subquery; // Would need recursive binding
1070                Ok(())
1071            }
1072            LogicalExpression::PatternComprehension {
1073                subplan,
1074                projection,
1075            } => {
1076                // Bind the subplan to register pattern variables (e.g., `f` in `(p)-[:KNOWS]->(f)`)
1077                self.bind_operator(subplan)?;
1078                // Now validate the projection expression (e.g., `f.name`)
1079                self.validate_expression(projection)
1080            }
1081            LogicalExpression::MapProjection { base, entries } => {
1082                if !self.context.contains(base) && !base.starts_with("_anon_") {
1083                    return Err(undefined_variable_error(
1084                        base,
1085                        &self.context,
1086                        " in map projection",
1087                    ));
1088                }
1089                for entry in entries {
1090                    if let crate::query::plan::MapProjectionEntry::LiteralEntry(_, expr) = entry {
1091                        self.validate_expression(expr)?;
1092                    }
1093                }
1094                Ok(())
1095            }
1096            LogicalExpression::Reduce {
1097                accumulator,
1098                initial,
1099                variable,
1100                list,
1101                expression,
1102            } => {
1103                self.validate_expression(initial)?;
1104                self.validate_expression(list)?;
1105                // accumulator and variable are locally scoped: inject them
1106                // into context, validate body, then remove
1107                let had_acc = self.context.contains(accumulator);
1108                let had_var = self.context.contains(variable);
1109                if !had_acc {
1110                    self.context.add_variable(
1111                        accumulator.clone(),
1112                        VariableInfo {
1113                            name: accumulator.clone(),
1114                            data_type: LogicalType::Any,
1115                            is_node: false,
1116                            is_edge: false,
1117                        },
1118                    );
1119                }
1120                if !had_var {
1121                    self.context.add_variable(
1122                        variable.clone(),
1123                        VariableInfo {
1124                            name: variable.clone(),
1125                            data_type: LogicalType::Any,
1126                            is_node: false,
1127                            is_edge: false,
1128                        },
1129                    );
1130                }
1131                self.validate_expression(expression)?;
1132                if !had_acc {
1133                    self.context.remove_variable(accumulator);
1134                }
1135                if !had_var {
1136                    self.context.remove_variable(variable);
1137                }
1138                Ok(())
1139            }
1140        }
1141    }
1142
1143    /// Infers the type of an expression for use in WITH clause aliasing.
1144    fn infer_expression_type(&self, expr: &LogicalExpression) -> LogicalType {
1145        match expr {
1146            LogicalExpression::Variable(name) => {
1147                // Look up the variable type from context
1148                self.context
1149                    .get(name)
1150                    .map_or(LogicalType::Any, |info| info.data_type.clone())
1151            }
1152            LogicalExpression::Property { .. } => LogicalType::Any, // Properties can be any type
1153            LogicalExpression::Literal(value) => {
1154                // Infer type from literal value
1155                use grafeo_common::types::Value;
1156                match value {
1157                    Value::Bool(_) => LogicalType::Bool,
1158                    Value::Int64(_) => LogicalType::Int64,
1159                    Value::Float64(_) => LogicalType::Float64,
1160                    Value::String(_) => LogicalType::String,
1161                    Value::List(_) => LogicalType::Any, // Complex type
1162                    Value::Map(_) => LogicalType::Any,  // Complex type
1163                    Value::Null => LogicalType::Any,
1164                    _ => LogicalType::Any,
1165                }
1166            }
1167            LogicalExpression::Binary { .. } => LogicalType::Any, // Could be bool or numeric
1168            LogicalExpression::Unary { .. } => LogicalType::Any,
1169            LogicalExpression::FunctionCall { name, .. } => {
1170                // Infer based on function name
1171                match name.to_lowercase().as_str() {
1172                    "count" | "sum" | "id" => LogicalType::Int64,
1173                    "avg" => LogicalType::Float64,
1174                    "type" => LogicalType::String,
1175                    // List-returning functions use Any since we don't track element type
1176                    "labels" | "collect" => LogicalType::Any,
1177                    _ => LogicalType::Any,
1178                }
1179            }
1180            LogicalExpression::List(_) => LogicalType::Any, // Complex type
1181            LogicalExpression::Map(_) => LogicalType::Any,  // Complex type
1182            _ => LogicalType::Any,
1183        }
1184    }
1185
1186    /// Binds a join operator.
1187    fn bind_join(&mut self, join: &crate::query::plan::JoinOp) -> Result<()> {
1188        // Bind both sides of the join
1189        self.bind_operator(&join.left)?;
1190        self.bind_operator(&join.right)?;
1191
1192        // Validate join conditions
1193        for condition in &join.conditions {
1194            self.validate_expression(&condition.left)?;
1195            self.validate_expression(&condition.right)?;
1196        }
1197
1198        Ok(())
1199    }
1200
1201    /// Binds an aggregate operator.
1202    fn bind_aggregate(&mut self, agg: &crate::query::plan::AggregateOp) -> Result<()> {
1203        // Bind the input first
1204        self.bind_operator(&agg.input)?;
1205
1206        // Validate group by expressions
1207        for expr in &agg.group_by {
1208            self.validate_expression(expr)?;
1209        }
1210
1211        // Validate aggregate expressions
1212        for agg_expr in &agg.aggregates {
1213            if let Some(ref expr) = agg_expr.expression {
1214                self.validate_expression(expr)?;
1215            }
1216            // Add the alias as a new variable if present
1217            if let Some(ref alias) = agg_expr.alias {
1218                self.context.add_variable(
1219                    alias.clone(),
1220                    VariableInfo {
1221                        name: alias.clone(),
1222                        data_type: LogicalType::Any,
1223                        is_node: false,
1224                        is_edge: false,
1225                    },
1226                );
1227            }
1228        }
1229
1230        // Register group-by output column names so ORDER BY / HAVING
1231        // can reference them (e.g. "n.city" from Property(n, city)).
1232        for expr in &agg.group_by {
1233            let col_name = crate::query::planner::common::expression_to_string(expr);
1234            if !self.context.contains(&col_name) {
1235                self.context.add_variable(
1236                    col_name.clone(),
1237                    VariableInfo {
1238                        name: col_name,
1239                        data_type: LogicalType::Any,
1240                        is_node: false,
1241                        is_edge: false,
1242                    },
1243                );
1244            }
1245        }
1246
1247        Ok(())
1248    }
1249}
1250
1251impl Default for Binder {
1252    fn default() -> Self {
1253        Self::new()
1254    }
1255}
1256
1257#[cfg(test)]
1258mod tests {
1259    use super::*;
1260    use crate::query::plan::{BinaryOp, FilterOp};
1261
1262    #[test]
1263    fn test_bind_simple_scan() {
1264        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1265            items: vec![ReturnItem {
1266                expression: LogicalExpression::Variable("n".to_string()),
1267                alias: None,
1268            }],
1269            distinct: false,
1270            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1271                variable: "n".to_string(),
1272                label: Some("Person".to_string()),
1273                input: None,
1274            })),
1275        }));
1276
1277        let mut binder = Binder::new();
1278        let result = binder.bind(&plan);
1279
1280        assert!(result.is_ok());
1281        let ctx = result.unwrap();
1282        assert!(ctx.contains("n"));
1283        assert!(ctx.get("n").unwrap().is_node);
1284    }
1285
1286    #[test]
1287    fn test_bind_undefined_variable() {
1288        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1289            items: vec![ReturnItem {
1290                expression: LogicalExpression::Variable("undefined".to_string()),
1291                alias: None,
1292            }],
1293            distinct: false,
1294            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1295                variable: "n".to_string(),
1296                label: None,
1297                input: None,
1298            })),
1299        }));
1300
1301        let mut binder = Binder::new();
1302        let result = binder.bind(&plan);
1303
1304        assert!(result.is_err());
1305        let err = result.unwrap_err();
1306        assert!(err.to_string().contains("Undefined variable"));
1307    }
1308
1309    #[test]
1310    fn test_bind_property_access() {
1311        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1312            items: vec![ReturnItem {
1313                expression: LogicalExpression::Property {
1314                    variable: "n".to_string(),
1315                    property: "name".to_string(),
1316                },
1317                alias: None,
1318            }],
1319            distinct: false,
1320            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1321                variable: "n".to_string(),
1322                label: Some("Person".to_string()),
1323                input: None,
1324            })),
1325        }));
1326
1327        let mut binder = Binder::new();
1328        let result = binder.bind(&plan);
1329
1330        assert!(result.is_ok());
1331    }
1332
1333    #[test]
1334    fn test_bind_filter_with_undefined_variable() {
1335        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1336            items: vec![ReturnItem {
1337                expression: LogicalExpression::Variable("n".to_string()),
1338                alias: None,
1339            }],
1340            distinct: false,
1341            input: Box::new(LogicalOperator::Filter(FilterOp {
1342                predicate: LogicalExpression::Binary {
1343                    left: Box::new(LogicalExpression::Property {
1344                        variable: "m".to_string(), // undefined!
1345                        property: "age".to_string(),
1346                    }),
1347                    op: BinaryOp::Gt,
1348                    right: Box::new(LogicalExpression::Literal(
1349                        grafeo_common::types::Value::Int64(30),
1350                    )),
1351                },
1352                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1353                    variable: "n".to_string(),
1354                    label: None,
1355                    input: None,
1356                })),
1357                pushdown_hint: None,
1358            })),
1359        }));
1360
1361        let mut binder = Binder::new();
1362        let result = binder.bind(&plan);
1363
1364        assert!(result.is_err());
1365        let err = result.unwrap_err();
1366        assert!(err.to_string().contains("Undefined variable 'm'"));
1367    }
1368
1369    #[test]
1370    fn test_bind_expand() {
1371        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode};
1372
1373        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1374            items: vec![
1375                ReturnItem {
1376                    expression: LogicalExpression::Variable("a".to_string()),
1377                    alias: None,
1378                },
1379                ReturnItem {
1380                    expression: LogicalExpression::Variable("b".to_string()),
1381                    alias: None,
1382                },
1383            ],
1384            distinct: false,
1385            input: Box::new(LogicalOperator::Expand(ExpandOp {
1386                from_variable: "a".to_string(),
1387                to_variable: "b".to_string(),
1388                edge_variable: Some("e".to_string()),
1389                direction: ExpandDirection::Outgoing,
1390                edge_types: vec!["KNOWS".to_string()],
1391                min_hops: 1,
1392                max_hops: Some(1),
1393                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1394                    variable: "a".to_string(),
1395                    label: Some("Person".to_string()),
1396                    input: None,
1397                })),
1398                path_alias: None,
1399                path_mode: PathMode::Walk,
1400            })),
1401        }));
1402
1403        let mut binder = Binder::new();
1404        let result = binder.bind(&plan);
1405
1406        assert!(result.is_ok());
1407        let ctx = result.unwrap();
1408        assert!(ctx.contains("a"));
1409        assert!(ctx.contains("b"));
1410        assert!(ctx.contains("e"));
1411        assert!(ctx.get("a").unwrap().is_node);
1412        assert!(ctx.get("b").unwrap().is_node);
1413        assert!(ctx.get("e").unwrap().is_edge);
1414    }
1415
1416    #[test]
1417    fn test_bind_expand_from_undefined_variable() {
1418        // Tests that expanding from an undefined variable produces a clear error
1419        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode};
1420
1421        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1422            items: vec![ReturnItem {
1423                expression: LogicalExpression::Variable("b".to_string()),
1424                alias: None,
1425            }],
1426            distinct: false,
1427            input: Box::new(LogicalOperator::Expand(ExpandOp {
1428                from_variable: "undefined".to_string(), // not defined!
1429                to_variable: "b".to_string(),
1430                edge_variable: None,
1431                direction: ExpandDirection::Outgoing,
1432                edge_types: vec![],
1433                min_hops: 1,
1434                max_hops: Some(1),
1435                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1436                    variable: "a".to_string(),
1437                    label: None,
1438                    input: None,
1439                })),
1440                path_alias: None,
1441                path_mode: PathMode::Walk,
1442            })),
1443        }));
1444
1445        let mut binder = Binder::new();
1446        let result = binder.bind(&plan);
1447
1448        assert!(result.is_err());
1449        let err = result.unwrap_err();
1450        assert!(
1451            err.to_string().contains("Undefined variable 'undefined'"),
1452            "Expected error about undefined variable, got: {}",
1453            err
1454        );
1455    }
1456
1457    #[test]
1458    fn test_bind_return_with_aggregate_and_non_aggregate() {
1459        // Tests binding of aggregate functions alongside regular expressions
1460        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1461            items: vec![
1462                ReturnItem {
1463                    expression: LogicalExpression::FunctionCall {
1464                        name: "count".to_string(),
1465                        args: vec![LogicalExpression::Variable("n".to_string())],
1466                        distinct: false,
1467                    },
1468                    alias: Some("cnt".to_string()),
1469                },
1470                ReturnItem {
1471                    expression: LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1472                    alias: Some("one".to_string()),
1473                },
1474            ],
1475            distinct: false,
1476            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1477                variable: "n".to_string(),
1478                label: Some("Person".to_string()),
1479                input: None,
1480            })),
1481        }));
1482
1483        let mut binder = Binder::new();
1484        let result = binder.bind(&plan);
1485
1486        // This should succeed - count(n) with literal is valid
1487        assert!(result.is_ok());
1488    }
1489
1490    #[test]
1491    fn test_bind_nested_property_access() {
1492        // Tests that nested property access on the same variable works
1493        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1494            items: vec![
1495                ReturnItem {
1496                    expression: LogicalExpression::Property {
1497                        variable: "n".to_string(),
1498                        property: "name".to_string(),
1499                    },
1500                    alias: None,
1501                },
1502                ReturnItem {
1503                    expression: LogicalExpression::Property {
1504                        variable: "n".to_string(),
1505                        property: "age".to_string(),
1506                    },
1507                    alias: None,
1508                },
1509            ],
1510            distinct: false,
1511            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1512                variable: "n".to_string(),
1513                label: Some("Person".to_string()),
1514                input: None,
1515            })),
1516        }));
1517
1518        let mut binder = Binder::new();
1519        let result = binder.bind(&plan);
1520
1521        assert!(result.is_ok());
1522    }
1523
1524    #[test]
1525    fn test_bind_binary_expression_with_undefined() {
1526        // Tests that binary expressions with undefined variables produce errors
1527        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1528            items: vec![ReturnItem {
1529                expression: LogicalExpression::Binary {
1530                    left: Box::new(LogicalExpression::Property {
1531                        variable: "n".to_string(),
1532                        property: "age".to_string(),
1533                    }),
1534                    op: BinaryOp::Add,
1535                    right: Box::new(LogicalExpression::Property {
1536                        variable: "m".to_string(), // undefined!
1537                        property: "age".to_string(),
1538                    }),
1539                },
1540                alias: Some("total".to_string()),
1541            }],
1542            distinct: false,
1543            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1544                variable: "n".to_string(),
1545                label: None,
1546                input: None,
1547            })),
1548        }));
1549
1550        let mut binder = Binder::new();
1551        let result = binder.bind(&plan);
1552
1553        assert!(result.is_err());
1554        assert!(
1555            result
1556                .unwrap_err()
1557                .to_string()
1558                .contains("Undefined variable 'm'")
1559        );
1560    }
1561
1562    #[test]
1563    fn test_bind_duplicate_variable_definition() {
1564        // Tests behavior when the same variable is defined twice (via two NodeScans)
1565        // This is typically not allowed or the second shadows the first
1566        use crate::query::plan::{JoinOp, JoinType};
1567
1568        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1569            items: vec![ReturnItem {
1570                expression: LogicalExpression::Variable("n".to_string()),
1571                alias: None,
1572            }],
1573            distinct: false,
1574            input: Box::new(LogicalOperator::Join(JoinOp {
1575                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1576                    variable: "n".to_string(),
1577                    label: Some("A".to_string()),
1578                    input: None,
1579                })),
1580                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1581                    variable: "m".to_string(), // different variable is fine
1582                    label: Some("B".to_string()),
1583                    input: None,
1584                })),
1585                join_type: JoinType::Inner,
1586                conditions: vec![],
1587            })),
1588        }));
1589
1590        let mut binder = Binder::new();
1591        let result = binder.bind(&plan);
1592
1593        // Join with different variables should work
1594        assert!(result.is_ok());
1595        let ctx = result.unwrap();
1596        assert!(ctx.contains("n"));
1597        assert!(ctx.contains("m"));
1598    }
1599
1600    #[test]
1601    fn test_bind_function_with_wrong_arity() {
1602        // Tests that functions with wrong number of arguments are handled
1603        // (behavior depends on whether binder validates arity)
1604        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1605            items: vec![ReturnItem {
1606                expression: LogicalExpression::FunctionCall {
1607                    name: "count".to_string(),
1608                    args: vec![], // count() needs an argument
1609                    distinct: false,
1610                },
1611                alias: None,
1612            }],
1613            distinct: false,
1614            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1615                variable: "n".to_string(),
1616                label: None,
1617                input: None,
1618            })),
1619        }));
1620
1621        let mut binder = Binder::new();
1622        let result = binder.bind(&plan);
1623
1624        // The binder may or may not catch this - if it passes, execution will fail
1625        // This test documents current behavior
1626        // If binding fails, that's fine; if it passes, execution will handle it
1627        let _ = result; // We're just testing it doesn't panic
1628    }
1629
1630    // --- Mutation operator validation ---
1631
1632    #[test]
1633    fn test_create_edge_rejects_undefined_source() {
1634        use crate::query::plan::CreateEdgeOp;
1635
1636        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1637            variable: Some("e".to_string()),
1638            from_variable: "ghost".to_string(), // not defined!
1639            to_variable: "b".to_string(),
1640            edge_type: "KNOWS".to_string(),
1641            properties: vec![],
1642            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1643                variable: "b".to_string(),
1644                label: None,
1645                input: None,
1646            })),
1647        }));
1648
1649        let mut binder = Binder::new();
1650        let err = binder.bind(&plan).unwrap_err();
1651        assert!(
1652            err.to_string().contains("Undefined variable 'ghost'"),
1653            "Should reject undefined source variable, got: {err}"
1654        );
1655    }
1656
1657    #[test]
1658    fn test_create_edge_rejects_undefined_target() {
1659        use crate::query::plan::CreateEdgeOp;
1660
1661        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1662            variable: None,
1663            from_variable: "a".to_string(),
1664            to_variable: "missing".to_string(), // not defined!
1665            edge_type: "KNOWS".to_string(),
1666            properties: vec![],
1667            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1668                variable: "a".to_string(),
1669                label: None,
1670                input: None,
1671            })),
1672        }));
1673
1674        let mut binder = Binder::new();
1675        let err = binder.bind(&plan).unwrap_err();
1676        assert!(
1677            err.to_string().contains("Undefined variable 'missing'"),
1678            "Should reject undefined target variable, got: {err}"
1679        );
1680    }
1681
1682    #[test]
1683    fn test_create_edge_validates_property_expressions() {
1684        use crate::query::plan::CreateEdgeOp;
1685
1686        // Source and target defined, but property references undefined variable
1687        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1688            variable: Some("e".to_string()),
1689            from_variable: "a".to_string(),
1690            to_variable: "b".to_string(),
1691            edge_type: "KNOWS".to_string(),
1692            properties: vec![(
1693                "since".to_string(),
1694                LogicalExpression::Property {
1695                    variable: "x".to_string(), // undefined!
1696                    property: "year".to_string(),
1697                },
1698            )],
1699            input: Box::new(LogicalOperator::Join(crate::query::plan::JoinOp {
1700                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1701                    variable: "a".to_string(),
1702                    label: None,
1703                    input: None,
1704                })),
1705                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1706                    variable: "b".to_string(),
1707                    label: None,
1708                    input: None,
1709                })),
1710                join_type: crate::query::plan::JoinType::Inner,
1711                conditions: vec![],
1712            })),
1713        }));
1714
1715        let mut binder = Binder::new();
1716        let err = binder.bind(&plan).unwrap_err();
1717        assert!(err.to_string().contains("Undefined variable 'x'"));
1718    }
1719
1720    #[test]
1721    fn test_set_property_rejects_undefined_variable() {
1722        use crate::query::plan::SetPropertyOp;
1723
1724        let plan = LogicalPlan::new(LogicalOperator::SetProperty(SetPropertyOp {
1725            variable: "ghost".to_string(),
1726            properties: vec![(
1727                "name".to_string(),
1728                LogicalExpression::Literal(grafeo_common::types::Value::String("Alix".into())),
1729            )],
1730            replace: false,
1731            is_edge: false,
1732            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1733                variable: "n".to_string(),
1734                label: None,
1735                input: None,
1736            })),
1737        }));
1738
1739        let mut binder = Binder::new();
1740        let err = binder.bind(&plan).unwrap_err();
1741        assert!(
1742            err.to_string().contains("in SET"),
1743            "Error should indicate SET context, got: {err}"
1744        );
1745    }
1746
1747    #[test]
1748    fn test_delete_node_rejects_undefined_variable() {
1749        use crate::query::plan::DeleteNodeOp;
1750
1751        let plan = LogicalPlan::new(LogicalOperator::DeleteNode(DeleteNodeOp {
1752            variable: "phantom".to_string(),
1753            detach: false,
1754            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1755                variable: "n".to_string(),
1756                label: None,
1757                input: None,
1758            })),
1759        }));
1760
1761        let mut binder = Binder::new();
1762        let err = binder.bind(&plan).unwrap_err();
1763        assert!(err.to_string().contains("Undefined variable 'phantom'"));
1764    }
1765
1766    #[test]
1767    fn test_delete_edge_rejects_undefined_variable() {
1768        use crate::query::plan::DeleteEdgeOp;
1769
1770        let plan = LogicalPlan::new(LogicalOperator::DeleteEdge(DeleteEdgeOp {
1771            variable: "gone".to_string(),
1772            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1773                variable: "n".to_string(),
1774                label: None,
1775                input: None,
1776            })),
1777        }));
1778
1779        let mut binder = Binder::new();
1780        let err = binder.bind(&plan).unwrap_err();
1781        assert!(err.to_string().contains("Undefined variable 'gone'"));
1782    }
1783
1784    // --- WITH/Project clause ---
1785
1786    #[test]
1787    fn test_project_alias_becomes_available_downstream() {
1788        use crate::query::plan::{ProjectOp, Projection};
1789
1790        // WITH n.name AS person_name RETURN person_name
1791        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1792            items: vec![ReturnItem {
1793                expression: LogicalExpression::Variable("person_name".to_string()),
1794                alias: None,
1795            }],
1796            distinct: false,
1797            input: Box::new(LogicalOperator::Project(ProjectOp {
1798                projections: vec![Projection {
1799                    expression: LogicalExpression::Property {
1800                        variable: "n".to_string(),
1801                        property: "name".to_string(),
1802                    },
1803                    alias: Some("person_name".to_string()),
1804                }],
1805                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1806                    variable: "n".to_string(),
1807                    label: None,
1808                    input: None,
1809                })),
1810                pass_through_input: false,
1811            })),
1812        }));
1813
1814        let mut binder = Binder::new();
1815        let ctx = binder.bind(&plan).unwrap();
1816        assert!(
1817            ctx.contains("person_name"),
1818            "WITH alias should be available to RETURN"
1819        );
1820    }
1821
1822    #[test]
1823    fn test_project_rejects_undefined_expression() {
1824        use crate::query::plan::{ProjectOp, Projection};
1825
1826        let plan = LogicalPlan::new(LogicalOperator::Project(ProjectOp {
1827            projections: vec![Projection {
1828                expression: LogicalExpression::Variable("nope".to_string()),
1829                alias: Some("x".to_string()),
1830            }],
1831            input: Box::new(LogicalOperator::Empty),
1832            pass_through_input: false,
1833        }));
1834
1835        let mut binder = Binder::new();
1836        let result = binder.bind(&plan);
1837        assert!(result.is_err(), "WITH on undefined variable should fail");
1838    }
1839
1840    // --- UNWIND ---
1841
1842    #[test]
1843    fn test_unwind_adds_element_variable() {
1844        use crate::query::plan::UnwindOp;
1845
1846        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1847            items: vec![ReturnItem {
1848                expression: LogicalExpression::Variable("item".to_string()),
1849                alias: None,
1850            }],
1851            distinct: false,
1852            input: Box::new(LogicalOperator::Unwind(UnwindOp {
1853                expression: LogicalExpression::List(vec![
1854                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1855                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(2)),
1856                ]),
1857                variable: "item".to_string(),
1858                ordinality_var: None,
1859                offset_var: None,
1860                input: Box::new(LogicalOperator::Empty),
1861            })),
1862        }));
1863
1864        let mut binder = Binder::new();
1865        let ctx = binder.bind(&plan).unwrap();
1866        assert!(ctx.contains("item"), "UNWIND variable should be in scope");
1867        let info = ctx.get("item").unwrap();
1868        assert!(
1869            !info.is_node && !info.is_edge,
1870            "UNWIND variable is not a graph element"
1871        );
1872    }
1873
1874    // --- MERGE ---
1875
1876    #[test]
1877    fn test_merge_adds_variable_and_validates_properties() {
1878        use crate::query::plan::MergeOp;
1879
1880        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1881            items: vec![ReturnItem {
1882                expression: LogicalExpression::Variable("m".to_string()),
1883                alias: None,
1884            }],
1885            distinct: false,
1886            input: Box::new(LogicalOperator::Merge(MergeOp {
1887                variable: "m".to_string(),
1888                labels: vec!["Person".to_string()],
1889                match_properties: vec![(
1890                    "name".to_string(),
1891                    LogicalExpression::Literal(grafeo_common::types::Value::String("Alix".into())),
1892                )],
1893                on_create: vec![(
1894                    "created".to_string(),
1895                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1896                )],
1897                on_match: vec![(
1898                    "updated".to_string(),
1899                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1900                )],
1901                input: Box::new(LogicalOperator::Empty),
1902            })),
1903        }));
1904
1905        let mut binder = Binder::new();
1906        let ctx = binder.bind(&plan).unwrap();
1907        assert!(ctx.contains("m"));
1908        assert!(
1909            ctx.get("m").unwrap().is_node,
1910            "MERGE variable should be a node"
1911        );
1912    }
1913
1914    #[test]
1915    fn test_merge_rejects_undefined_in_on_create() {
1916        use crate::query::plan::MergeOp;
1917
1918        let plan = LogicalPlan::new(LogicalOperator::Merge(MergeOp {
1919            variable: "m".to_string(),
1920            labels: vec![],
1921            match_properties: vec![],
1922            on_create: vec![(
1923                "name".to_string(),
1924                LogicalExpression::Property {
1925                    variable: "other".to_string(), // undefined!
1926                    property: "name".to_string(),
1927                },
1928            )],
1929            on_match: vec![],
1930            input: Box::new(LogicalOperator::Empty),
1931        }));
1932
1933        let mut binder = Binder::new();
1934        let result = binder.bind(&plan);
1935        assert!(
1936            result.is_err(),
1937            "ON CREATE referencing undefined variable should fail"
1938        );
1939    }
1940
1941    // --- ShortestPath ---
1942
1943    #[test]
1944    fn test_shortest_path_rejects_undefined_source() {
1945        use crate::query::plan::{ExpandDirection, ShortestPathOp};
1946
1947        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1948            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1949                variable: "b".to_string(),
1950                label: None,
1951                input: None,
1952            })),
1953            source_var: "missing".to_string(), // not defined
1954            target_var: "b".to_string(),
1955            edge_types: vec![],
1956            direction: ExpandDirection::Both,
1957            path_alias: "p".to_string(),
1958            all_paths: false,
1959        }));
1960
1961        let mut binder = Binder::new();
1962        let err = binder.bind(&plan).unwrap_err();
1963        assert!(
1964            err.to_string().contains("source in shortestPath"),
1965            "Error should mention shortestPath source context, got: {err}"
1966        );
1967    }
1968
1969    #[test]
1970    fn test_shortest_path_adds_path_and_length_variables() {
1971        use crate::query::plan::{ExpandDirection, JoinOp, JoinType, ShortestPathOp};
1972
1973        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1974            input: Box::new(LogicalOperator::Join(JoinOp {
1975                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1976                    variable: "a".to_string(),
1977                    label: None,
1978                    input: None,
1979                })),
1980                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1981                    variable: "b".to_string(),
1982                    label: None,
1983                    input: None,
1984                })),
1985                join_type: JoinType::Cross,
1986                conditions: vec![],
1987            })),
1988            source_var: "a".to_string(),
1989            target_var: "b".to_string(),
1990            edge_types: vec!["ROAD".to_string()],
1991            direction: ExpandDirection::Outgoing,
1992            path_alias: "p".to_string(),
1993            all_paths: false,
1994        }));
1995
1996        let mut binder = Binder::new();
1997        let ctx = binder.bind(&plan).unwrap();
1998        assert!(ctx.contains("p"), "Path alias should be bound");
1999        assert!(
2000            ctx.contains("_path_length_p"),
2001            "Path length variable should be auto-created"
2002        );
2003    }
2004
2005    // --- Expression validation edge cases ---
2006
2007    #[test]
2008    fn test_case_expression_validates_all_branches() {
2009        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2010            items: vec![ReturnItem {
2011                expression: LogicalExpression::Case {
2012                    operand: None,
2013                    when_clauses: vec![
2014                        (
2015                            LogicalExpression::Binary {
2016                                left: Box::new(LogicalExpression::Property {
2017                                    variable: "n".to_string(),
2018                                    property: "age".to_string(),
2019                                }),
2020                                op: BinaryOp::Gt,
2021                                right: Box::new(LogicalExpression::Literal(
2022                                    grafeo_common::types::Value::Int64(18),
2023                                )),
2024                            },
2025                            LogicalExpression::Literal(grafeo_common::types::Value::String(
2026                                "adult".into(),
2027                            )),
2028                        ),
2029                        (
2030                            // This branch references undefined variable
2031                            LogicalExpression::Property {
2032                                variable: "ghost".to_string(),
2033                                property: "flag".to_string(),
2034                            },
2035                            LogicalExpression::Literal(grafeo_common::types::Value::String(
2036                                "flagged".into(),
2037                            )),
2038                        ),
2039                    ],
2040                    else_clause: Some(Box::new(LogicalExpression::Literal(
2041                        grafeo_common::types::Value::String("other".into()),
2042                    ))),
2043                },
2044                alias: None,
2045            }],
2046            distinct: false,
2047            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2048                variable: "n".to_string(),
2049                label: None,
2050                input: None,
2051            })),
2052        }));
2053
2054        let mut binder = Binder::new();
2055        let err = binder.bind(&plan).unwrap_err();
2056        assert!(
2057            err.to_string().contains("ghost"),
2058            "CASE should validate all when-clause conditions"
2059        );
2060    }
2061
2062    #[test]
2063    fn test_case_expression_validates_else_clause() {
2064        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2065            items: vec![ReturnItem {
2066                expression: LogicalExpression::Case {
2067                    operand: None,
2068                    when_clauses: vec![(
2069                        LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
2070                        LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
2071                    )],
2072                    else_clause: Some(Box::new(LogicalExpression::Property {
2073                        variable: "missing".to_string(),
2074                        property: "x".to_string(),
2075                    })),
2076                },
2077                alias: None,
2078            }],
2079            distinct: false,
2080            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2081                variable: "n".to_string(),
2082                label: None,
2083                input: None,
2084            })),
2085        }));
2086
2087        let mut binder = Binder::new();
2088        let err = binder.bind(&plan).unwrap_err();
2089        assert!(
2090            err.to_string().contains("missing"),
2091            "CASE ELSE should validate its expression too"
2092        );
2093    }
2094
2095    #[test]
2096    fn test_slice_access_validates_expressions() {
2097        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2098            items: vec![ReturnItem {
2099                expression: LogicalExpression::SliceAccess {
2100                    base: Box::new(LogicalExpression::Variable("n".to_string())),
2101                    start: Some(Box::new(LogicalExpression::Variable(
2102                        "undefined_start".to_string(),
2103                    ))),
2104                    end: None,
2105                },
2106                alias: None,
2107            }],
2108            distinct: false,
2109            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2110                variable: "n".to_string(),
2111                label: None,
2112                input: None,
2113            })),
2114        }));
2115
2116        let mut binder = Binder::new();
2117        let err = binder.bind(&plan).unwrap_err();
2118        assert!(err.to_string().contains("undefined_start"));
2119    }
2120
2121    #[test]
2122    fn test_list_comprehension_validates_list_source() {
2123        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2124            items: vec![ReturnItem {
2125                expression: LogicalExpression::ListComprehension {
2126                    variable: "x".to_string(),
2127                    list_expr: Box::new(LogicalExpression::Variable("not_defined".to_string())),
2128                    filter_expr: None,
2129                    map_expr: Box::new(LogicalExpression::Variable("x".to_string())),
2130                },
2131                alias: None,
2132            }],
2133            distinct: false,
2134            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2135                variable: "n".to_string(),
2136                label: None,
2137                input: None,
2138            })),
2139        }));
2140
2141        let mut binder = Binder::new();
2142        let err = binder.bind(&plan).unwrap_err();
2143        assert!(
2144            err.to_string().contains("not_defined"),
2145            "List comprehension should validate source list expression"
2146        );
2147    }
2148
2149    #[test]
2150    fn test_labels_type_id_reject_undefined() {
2151        // labels(x) where x is not defined
2152        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2153            items: vec![ReturnItem {
2154                expression: LogicalExpression::Labels("x".to_string()),
2155                alias: None,
2156            }],
2157            distinct: false,
2158            input: Box::new(LogicalOperator::Empty),
2159        }));
2160
2161        let mut binder = Binder::new();
2162        assert!(
2163            binder.bind(&plan).is_err(),
2164            "labels(x) on undefined x should fail"
2165        );
2166
2167        // type(e) where e is not defined
2168        let plan2 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2169            items: vec![ReturnItem {
2170                expression: LogicalExpression::Type("e".to_string()),
2171                alias: None,
2172            }],
2173            distinct: false,
2174            input: Box::new(LogicalOperator::Empty),
2175        }));
2176
2177        let mut binder2 = Binder::new();
2178        assert!(
2179            binder2.bind(&plan2).is_err(),
2180            "type(e) on undefined e should fail"
2181        );
2182
2183        // id(n) where n is not defined
2184        let plan3 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2185            items: vec![ReturnItem {
2186                expression: LogicalExpression::Id("n".to_string()),
2187                alias: None,
2188            }],
2189            distinct: false,
2190            input: Box::new(LogicalOperator::Empty),
2191        }));
2192
2193        let mut binder3 = Binder::new();
2194        assert!(
2195            binder3.bind(&plan3).is_err(),
2196            "id(n) on undefined n should fail"
2197        );
2198    }
2199
2200    #[test]
2201    fn test_expand_rejects_non_node_source() {
2202        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode, UnwindOp};
2203
2204        // UNWIND [1,2] AS x  -- x is not a node
2205        // MATCH (x)-[:E]->(b)  -- should fail: x isn't a node
2206        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2207            items: vec![ReturnItem {
2208                expression: LogicalExpression::Variable("b".to_string()),
2209                alias: None,
2210            }],
2211            distinct: false,
2212            input: Box::new(LogicalOperator::Expand(ExpandOp {
2213                from_variable: "x".to_string(),
2214                to_variable: "b".to_string(),
2215                edge_variable: None,
2216                direction: ExpandDirection::Outgoing,
2217                edge_types: vec![],
2218                min_hops: 1,
2219                max_hops: Some(1),
2220                input: Box::new(LogicalOperator::Unwind(UnwindOp {
2221                    expression: LogicalExpression::List(vec![]),
2222                    variable: "x".to_string(),
2223                    ordinality_var: None,
2224                    offset_var: None,
2225                    input: Box::new(LogicalOperator::Empty),
2226                })),
2227                path_alias: None,
2228                path_mode: PathMode::Walk,
2229            })),
2230        }));
2231
2232        let mut binder = Binder::new();
2233        let err = binder.bind(&plan).unwrap_err();
2234        assert!(
2235            err.to_string().contains("not a node"),
2236            "Expanding from non-node should fail, got: {err}"
2237        );
2238    }
2239
2240    #[test]
2241    fn test_add_label_rejects_undefined_variable() {
2242        use crate::query::plan::AddLabelOp;
2243
2244        let plan = LogicalPlan::new(LogicalOperator::AddLabel(AddLabelOp {
2245            variable: "missing".to_string(),
2246            labels: vec!["Admin".to_string()],
2247            input: Box::new(LogicalOperator::Empty),
2248        }));
2249
2250        let mut binder = Binder::new();
2251        let err = binder.bind(&plan).unwrap_err();
2252        assert!(err.to_string().contains("SET labels"));
2253    }
2254
2255    #[test]
2256    fn test_remove_label_rejects_undefined_variable() {
2257        use crate::query::plan::RemoveLabelOp;
2258
2259        let plan = LogicalPlan::new(LogicalOperator::RemoveLabel(RemoveLabelOp {
2260            variable: "missing".to_string(),
2261            labels: vec!["Admin".to_string()],
2262            input: Box::new(LogicalOperator::Empty),
2263        }));
2264
2265        let mut binder = Binder::new();
2266        let err = binder.bind(&plan).unwrap_err();
2267        assert!(err.to_string().contains("REMOVE labels"));
2268    }
2269
2270    #[test]
2271    fn test_sort_validates_key_expressions() {
2272        use crate::query::plan::{SortKey, SortOp, SortOrder};
2273
2274        let plan = LogicalPlan::new(LogicalOperator::Sort(SortOp {
2275            keys: vec![SortKey {
2276                expression: LogicalExpression::Property {
2277                    variable: "missing".to_string(),
2278                    property: "name".to_string(),
2279                },
2280                order: SortOrder::Ascending,
2281                nulls: None,
2282            }],
2283            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2284                variable: "n".to_string(),
2285                label: None,
2286                input: None,
2287            })),
2288        }));
2289
2290        let mut binder = Binder::new();
2291        assert!(
2292            binder.bind(&plan).is_err(),
2293            "ORDER BY on undefined variable should fail"
2294        );
2295    }
2296
2297    #[test]
2298    fn test_create_node_adds_variable_before_property_validation() {
2299        use crate::query::plan::CreateNodeOp;
2300
2301        // CREATE (n:Person {friend: n.name}) - referencing the node being created
2302        // The variable should be available for property expressions (self-reference)
2303        let plan = LogicalPlan::new(LogicalOperator::CreateNode(CreateNodeOp {
2304            variable: "n".to_string(),
2305            labels: vec!["Person".to_string()],
2306            properties: vec![(
2307                "self_ref".to_string(),
2308                LogicalExpression::Property {
2309                    variable: "n".to_string(),
2310                    property: "name".to_string(),
2311                },
2312            )],
2313            input: None,
2314        }));
2315
2316        let mut binder = Binder::new();
2317        // This should succeed because CreateNode adds the variable before validating properties
2318        let ctx = binder.bind(&plan).unwrap();
2319        assert!(ctx.get("n").unwrap().is_node);
2320    }
2321
2322    #[test]
2323    fn test_undefined_variable_suggests_similar() {
2324        // 'person' is defined, user types 'persn' - should get a suggestion
2325        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2326            items: vec![ReturnItem {
2327                expression: LogicalExpression::Variable("persn".to_string()),
2328                alias: None,
2329            }],
2330            distinct: false,
2331            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2332                variable: "person".to_string(),
2333                label: None,
2334                input: None,
2335            })),
2336        }));
2337
2338        let mut binder = Binder::new();
2339        let err = binder.bind(&plan).unwrap_err();
2340        let msg = err.to_string();
2341        // The error should contain the variable name at minimum
2342        assert!(
2343            msg.contains("persn"),
2344            "Error should mention the undefined variable"
2345        );
2346    }
2347
2348    #[test]
2349    fn test_anon_variables_skip_validation() {
2350        // Variables starting with _anon_ are anonymous and should be silently accepted
2351        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2352            items: vec![ReturnItem {
2353                expression: LogicalExpression::Variable("_anon_42".to_string()),
2354                alias: None,
2355            }],
2356            distinct: false,
2357            input: Box::new(LogicalOperator::Empty),
2358        }));
2359
2360        let mut binder = Binder::new();
2361        let result = binder.bind(&plan);
2362        assert!(
2363            result.is_ok(),
2364            "Anonymous variables should bypass validation"
2365        );
2366    }
2367
2368    #[test]
2369    fn test_map_expression_validates_values() {
2370        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2371            items: vec![ReturnItem {
2372                expression: LogicalExpression::Map(vec![(
2373                    "key".to_string(),
2374                    LogicalExpression::Variable("undefined".to_string()),
2375                )]),
2376                alias: None,
2377            }],
2378            distinct: false,
2379            input: Box::new(LogicalOperator::Empty),
2380        }));
2381
2382        let mut binder = Binder::new();
2383        assert!(
2384            binder.bind(&plan).is_err(),
2385            "Map values should be validated"
2386        );
2387    }
2388
2389    #[test]
2390    fn test_vector_scan_validates_query_vector() {
2391        use crate::query::plan::VectorScanOp;
2392
2393        let plan = LogicalPlan::new(LogicalOperator::VectorScan(VectorScanOp {
2394            variable: "result".to_string(),
2395            index_name: None,
2396            property: "embedding".to_string(),
2397            label: Some("Doc".to_string()),
2398            query_vector: LogicalExpression::Variable("undefined_vec".to_string()),
2399            k: 10,
2400            metric: None,
2401            min_similarity: None,
2402            max_distance: None,
2403            input: None,
2404        }));
2405
2406        let mut binder = Binder::new();
2407        let err = binder.bind(&plan).unwrap_err();
2408        assert!(err.to_string().contains("undefined_vec"));
2409    }
2410}