Skip to main content

grafeo_engine/query/
binder.rs

1//! Semantic validation - catching errors before execution.
2//!
3//! The binder walks the logical plan and validates that everything makes sense:
4//! - Is that variable actually defined? (You can't use `RETURN x` if `x` wasn't matched)
5//! - Does that property access make sense? (Accessing `.age` on an integer fails)
6//! - Are types compatible? (Can't compare a string to an integer)
7//!
8//! Better to catch these errors early than waste time executing a broken query.
9
10use crate::query::plan::{
11    ExpandOp, FilterOp, LogicalExpression, LogicalOperator, LogicalPlan, NodeScanOp, ReturnItem,
12    ReturnOp, TripleScanOp,
13};
14use grafeo_common::types::LogicalType;
15use grafeo_common::utils::error::{Error, QueryError, QueryErrorKind, Result};
16use grafeo_common::utils::strings::{find_similar, format_suggestion};
17use indexmap::IndexMap;
18use std::collections::HashSet;
19
20/// Creates a semantic binding error.
21fn binding_error(message: impl Into<String>) -> Error {
22    Error::Query(QueryError::new(QueryErrorKind::Semantic, message))
23}
24
25/// Creates a semantic binding error with a hint.
26fn binding_error_with_hint(message: impl Into<String>, hint: impl Into<String>) -> Error {
27    Error::Query(QueryError::new(QueryErrorKind::Semantic, message).with_hint(hint))
28}
29
30/// Creates an "undefined variable" error with a suggestion if a similar variable exists.
31fn undefined_variable_error(variable: &str, context: &BindingContext, suffix: &str) -> Error {
32    let candidates: Vec<String> = context.variable_names();
33    let candidates_ref: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
34
35    if let Some(suggestion) = find_similar(variable, &candidates_ref) {
36        binding_error_with_hint(
37            format!("Undefined variable '{variable}'{suffix}"),
38            format_suggestion(suggestion),
39        )
40    } else {
41        binding_error(format!("Undefined variable '{variable}'{suffix}"))
42    }
43}
44
45/// Information about a bound variable.
46#[derive(Debug, Clone)]
47pub struct VariableInfo {
48    /// The name of the variable.
49    pub name: String,
50    /// The inferred type of the variable.
51    pub data_type: LogicalType,
52    /// Whether this variable is a node.
53    pub is_node: bool,
54    /// Whether this variable is an edge.
55    pub is_edge: bool,
56}
57
58/// Context containing all bound variables and their information.
59///
60/// Uses `IndexMap` to maintain insertion order without a separate `Vec`,
61/// removing redundant storage and making `remove_variable` O(n) instead of
62/// two separate O(n) operations.
63#[derive(Debug, Clone, Default)]
64pub struct BindingContext {
65    /// Map from variable name to its info, in definition order.
66    variables: IndexMap<String, VariableInfo>,
67}
68
69impl BindingContext {
70    /// Creates a new empty binding context.
71    #[must_use]
72    pub fn new() -> Self {
73        Self {
74            variables: IndexMap::new(),
75        }
76    }
77
78    /// Adds a variable to the context.
79    ///
80    /// If the variable is already defined, replaces its info but preserves its
81    /// position in definition order.
82    pub fn add_variable(&mut self, name: String, info: VariableInfo) {
83        self.variables.insert(name, info);
84    }
85
86    /// Looks up a variable by name.
87    #[must_use]
88    pub fn get(&self, name: &str) -> Option<&VariableInfo> {
89        self.variables.get(name)
90    }
91
92    /// Checks if a variable is defined.
93    #[must_use]
94    pub fn contains(&self, name: &str) -> bool {
95        self.variables.contains_key(name)
96    }
97
98    /// Returns all variable names in definition order.
99    #[must_use]
100    pub fn variable_names(&self) -> Vec<String> {
101        self.variables.keys().cloned().collect()
102    }
103
104    /// Returns the number of bound variables.
105    #[must_use]
106    pub fn len(&self) -> usize {
107        self.variables.len()
108    }
109
110    /// Returns true if no variables are bound.
111    #[must_use]
112    pub fn is_empty(&self) -> bool {
113        self.variables.is_empty()
114    }
115
116    /// Removes a variable from the context (used for temporary scoping).
117    pub fn remove_variable(&mut self, name: &str) {
118        self.variables.shift_remove(name);
119    }
120}
121
122/// Semantic binder for query plans.
123///
124/// The binder walks the logical plan and:
125/// 1. Collects all variable definitions
126/// 2. Validates that all variable references are valid
127/// 3. Infers types where possible
128/// 4. Reports semantic errors
129pub struct Binder {
130    /// The current binding context.
131    context: BindingContext,
132}
133
134impl Binder {
135    /// Creates a new binder.
136    #[must_use]
137    pub fn new() -> Self {
138        Self {
139            context: BindingContext::new(),
140        }
141    }
142
143    /// Binds a logical plan, returning the binding context.
144    ///
145    /// # Errors
146    ///
147    /// Returns an error if semantic validation fails.
148    pub fn bind(&mut self, plan: &LogicalPlan) -> Result<BindingContext> {
149        self.bind_operator(&plan.root)?;
150        Ok(self.context.clone())
151    }
152
153    /// Binds a single logical operator.
154    fn bind_operator(&mut self, op: &LogicalOperator) -> Result<()> {
155        match op {
156            LogicalOperator::NodeScan(scan) => self.bind_node_scan(scan),
157            LogicalOperator::Expand(expand) => self.bind_expand(expand),
158            LogicalOperator::Filter(filter) => self.bind_filter(filter),
159            LogicalOperator::Return(ret) => self.bind_return(ret),
160            LogicalOperator::Project(project) => {
161                self.bind_operator(&project.input)?;
162                for projection in &project.projections {
163                    self.validate_expression(&projection.expression)?;
164                    // Add the projection alias to the context (for WITH clause support)
165                    if let Some(ref alias) = projection.alias {
166                        // Determine the type from the expression
167                        let data_type = self.infer_expression_type(&projection.expression);
168                        // Propagate node/edge status when projecting a variable
169                        // or a Case that selects between node variables (used
170                        // by optional() and union() translations).
171                        let (is_node, is_edge) = self.infer_entity_status(&projection.expression);
172                        self.context.add_variable(
173                            alias.clone(),
174                            VariableInfo {
175                                name: alias.clone(),
176                                data_type,
177                                is_node,
178                                is_edge,
179                            },
180                        );
181                    }
182                }
183                Ok(())
184            }
185            LogicalOperator::Limit(limit) => self.bind_operator(&limit.input),
186            LogicalOperator::Skip(skip) => self.bind_operator(&skip.input),
187            LogicalOperator::Sort(sort) => {
188                self.bind_operator(&sort.input)?;
189                for key in &sort.keys {
190                    self.validate_expression(&key.expression)?;
191                }
192                Ok(())
193            }
194            LogicalOperator::CreateNode(create) => {
195                // CreateNode introduces a new variable
196                if let Some(ref input) = create.input {
197                    self.bind_operator(input)?;
198                }
199                self.context.add_variable(
200                    create.variable.clone(),
201                    VariableInfo {
202                        name: create.variable.clone(),
203                        data_type: LogicalType::Node,
204                        is_node: true,
205                        is_edge: false,
206                    },
207                );
208                // Validate property expressions
209                for (_, expr) in &create.properties {
210                    self.validate_expression(expr)?;
211                }
212                Ok(())
213            }
214            LogicalOperator::EdgeScan(scan) => {
215                if let Some(ref input) = scan.input {
216                    self.bind_operator(input)?;
217                }
218                self.context.add_variable(
219                    scan.variable.clone(),
220                    VariableInfo {
221                        name: scan.variable.clone(),
222                        data_type: LogicalType::Edge,
223                        is_node: false,
224                        is_edge: true,
225                    },
226                );
227                Ok(())
228            }
229            LogicalOperator::Distinct(distinct) => self.bind_operator(&distinct.input),
230            LogicalOperator::Join(join) => self.bind_join(join),
231            LogicalOperator::Aggregate(agg) => self.bind_aggregate(agg),
232            LogicalOperator::CreateEdge(create) => {
233                self.bind_operator(&create.input)?;
234                // Validate that source and target variables are defined
235                if !self.context.contains(&create.from_variable) {
236                    return Err(undefined_variable_error(
237                        &create.from_variable,
238                        &self.context,
239                        " (source in CREATE EDGE)",
240                    ));
241                }
242                if !self.context.contains(&create.to_variable) {
243                    return Err(undefined_variable_error(
244                        &create.to_variable,
245                        &self.context,
246                        " (target in CREATE EDGE)",
247                    ));
248                }
249                // Add edge variable if present
250                if let Some(ref var) = create.variable {
251                    self.context.add_variable(
252                        var.clone(),
253                        VariableInfo {
254                            name: var.clone(),
255                            data_type: LogicalType::Edge,
256                            is_node: false,
257                            is_edge: true,
258                        },
259                    );
260                }
261                // Validate property expressions
262                for (_, expr) in &create.properties {
263                    self.validate_expression(expr)?;
264                }
265                Ok(())
266            }
267            LogicalOperator::DeleteNode(delete) => {
268                self.bind_operator(&delete.input)?;
269                // Validate that the variable to delete is defined
270                if !self.context.contains(&delete.variable) {
271                    return Err(undefined_variable_error(
272                        &delete.variable,
273                        &self.context,
274                        " in DELETE",
275                    ));
276                }
277                Ok(())
278            }
279            LogicalOperator::DeleteEdge(delete) => {
280                self.bind_operator(&delete.input)?;
281                // Validate that the variable to delete is defined
282                if !self.context.contains(&delete.variable) {
283                    return Err(undefined_variable_error(
284                        &delete.variable,
285                        &self.context,
286                        " in DELETE",
287                    ));
288                }
289                Ok(())
290            }
291            LogicalOperator::SetProperty(set) => {
292                self.bind_operator(&set.input)?;
293                // Validate that the variable to update is defined
294                if !self.context.contains(&set.variable) {
295                    return Err(undefined_variable_error(
296                        &set.variable,
297                        &self.context,
298                        " in SET",
299                    ));
300                }
301                // Validate property value expressions
302                for (_, expr) in &set.properties {
303                    self.validate_expression(expr)?;
304                }
305                Ok(())
306            }
307            LogicalOperator::Empty => Ok(()),
308
309            LogicalOperator::Unwind(unwind) => {
310                // First bind the input
311                self.bind_operator(&unwind.input)?;
312                // Validate the expression being unwound
313                self.validate_expression(&unwind.expression)?;
314                // Add the new variable to the context
315                self.context.add_variable(
316                    unwind.variable.clone(),
317                    VariableInfo {
318                        name: unwind.variable.clone(),
319                        data_type: LogicalType::Any, // Unwound elements can be any type
320                        is_node: false,
321                        is_edge: false,
322                    },
323                );
324                // Add ORDINALITY variable if present (1-based index)
325                if let Some(ref ord_var) = unwind.ordinality_var {
326                    self.context.add_variable(
327                        ord_var.clone(),
328                        VariableInfo {
329                            name: ord_var.clone(),
330                            data_type: LogicalType::Int64,
331                            is_node: false,
332                            is_edge: false,
333                        },
334                    );
335                }
336                // Add OFFSET variable if present (0-based index)
337                if let Some(ref off_var) = unwind.offset_var {
338                    self.context.add_variable(
339                        off_var.clone(),
340                        VariableInfo {
341                            name: off_var.clone(),
342                            data_type: LogicalType::Int64,
343                            is_node: false,
344                            is_edge: false,
345                        },
346                    );
347                }
348                Ok(())
349            }
350
351            // RDF/SPARQL operators
352            LogicalOperator::TripleScan(scan) => self.bind_triple_scan(scan),
353            LogicalOperator::Union(union) => {
354                for input in &union.inputs {
355                    self.bind_operator(input)?;
356                }
357                Ok(())
358            }
359            LogicalOperator::LeftJoin(lj) => {
360                self.bind_operator(&lj.left)?;
361                self.bind_operator(&lj.right)?;
362                if let Some(ref cond) = lj.condition {
363                    self.validate_expression(cond)?;
364                }
365                Ok(())
366            }
367            LogicalOperator::AntiJoin(aj) => {
368                self.bind_operator(&aj.left)?;
369                self.bind_operator(&aj.right)?;
370                Ok(())
371            }
372            LogicalOperator::Bind(bind) => {
373                self.bind_operator(&bind.input)?;
374                self.validate_expression(&bind.expression)?;
375                self.context.add_variable(
376                    bind.variable.clone(),
377                    VariableInfo {
378                        name: bind.variable.clone(),
379                        data_type: LogicalType::Any,
380                        is_node: false,
381                        is_edge: false,
382                    },
383                );
384                Ok(())
385            }
386            LogicalOperator::Merge(merge) => {
387                // First bind the input
388                self.bind_operator(&merge.input)?;
389                // Validate the match property expressions
390                for (_, expr) in &merge.match_properties {
391                    self.validate_expression(expr)?;
392                }
393                // Validate the ON CREATE property expressions
394                for (_, expr) in &merge.on_create {
395                    self.validate_expression(expr)?;
396                }
397                // Validate the ON MATCH property expressions
398                for (_, expr) in &merge.on_match {
399                    self.validate_expression(expr)?;
400                }
401                // MERGE introduces a new variable
402                self.context.add_variable(
403                    merge.variable.clone(),
404                    VariableInfo {
405                        name: merge.variable.clone(),
406                        data_type: LogicalType::Node,
407                        is_node: true,
408                        is_edge: false,
409                    },
410                );
411                Ok(())
412            }
413            LogicalOperator::MergeRelationship(merge_rel) => {
414                self.bind_operator(&merge_rel.input)?;
415                // Validate source and target variables exist
416                if !self.context.contains(&merge_rel.source_variable) {
417                    return Err(undefined_variable_error(
418                        &merge_rel.source_variable,
419                        &self.context,
420                        " in MERGE relationship source",
421                    ));
422                }
423                if !self.context.contains(&merge_rel.target_variable) {
424                    return Err(undefined_variable_error(
425                        &merge_rel.target_variable,
426                        &self.context,
427                        " in MERGE relationship target",
428                    ));
429                }
430                for (_, expr) in &merge_rel.match_properties {
431                    self.validate_expression(expr)?;
432                }
433                for (_, expr) in &merge_rel.on_create {
434                    self.validate_expression(expr)?;
435                }
436                for (_, expr) in &merge_rel.on_match {
437                    self.validate_expression(expr)?;
438                }
439                // MERGE relationship introduces the edge variable
440                self.context.add_variable(
441                    merge_rel.variable.clone(),
442                    VariableInfo {
443                        name: merge_rel.variable.clone(),
444                        data_type: LogicalType::Edge,
445                        is_node: false,
446                        is_edge: true,
447                    },
448                );
449                Ok(())
450            }
451            LogicalOperator::AddLabel(add_label) => {
452                self.bind_operator(&add_label.input)?;
453                // Validate that the variable exists
454                if !self.context.contains(&add_label.variable) {
455                    return Err(undefined_variable_error(
456                        &add_label.variable,
457                        &self.context,
458                        " in SET labels",
459                    ));
460                }
461                Ok(())
462            }
463            LogicalOperator::RemoveLabel(remove_label) => {
464                self.bind_operator(&remove_label.input)?;
465                // Validate that the variable exists
466                if !self.context.contains(&remove_label.variable) {
467                    return Err(undefined_variable_error(
468                        &remove_label.variable,
469                        &self.context,
470                        " in REMOVE labels",
471                    ));
472                }
473                Ok(())
474            }
475            LogicalOperator::ShortestPath(sp) => {
476                // First bind the input
477                self.bind_operator(&sp.input)?;
478                // Validate that source and target variables are defined
479                if !self.context.contains(&sp.source_var) {
480                    return Err(undefined_variable_error(
481                        &sp.source_var,
482                        &self.context,
483                        " (source in shortestPath)",
484                    ));
485                }
486                if !self.context.contains(&sp.target_var) {
487                    return Err(undefined_variable_error(
488                        &sp.target_var,
489                        &self.context,
490                        " (target in shortestPath)",
491                    ));
492                }
493                // Add the path alias variable to the context
494                self.context.add_variable(
495                    sp.path_alias.clone(),
496                    VariableInfo {
497                        name: sp.path_alias.clone(),
498                        data_type: LogicalType::Any, // Path is a complex type
499                        is_node: false,
500                        is_edge: false,
501                    },
502                );
503                // Also add the path length variable for length(p) calls
504                let path_length_var = format!("_path_length_{}", sp.path_alias);
505                self.context.add_variable(
506                    path_length_var.clone(),
507                    VariableInfo {
508                        name: path_length_var,
509                        data_type: LogicalType::Int64,
510                        is_node: false,
511                        is_edge: false,
512                    },
513                );
514                Ok(())
515            }
516            // SPARQL Update operators - these don't require variable binding
517            LogicalOperator::InsertTriple(insert) => {
518                if let Some(ref input) = insert.input {
519                    self.bind_operator(input)?;
520                }
521                Ok(())
522            }
523            LogicalOperator::DeleteTriple(delete) => {
524                if let Some(ref input) = delete.input {
525                    self.bind_operator(input)?;
526                }
527                Ok(())
528            }
529            LogicalOperator::Modify(modify) => {
530                self.bind_operator(&modify.where_clause)?;
531                Ok(())
532            }
533            LogicalOperator::ClearGraph(_)
534            | LogicalOperator::CreateGraph(_)
535            | LogicalOperator::DropGraph(_)
536            | LogicalOperator::LoadGraph(_)
537            | LogicalOperator::CopyGraph(_)
538            | LogicalOperator::MoveGraph(_)
539            | LogicalOperator::AddGraph(_)
540            | LogicalOperator::HorizontalAggregate(_) => Ok(()),
541            LogicalOperator::VectorScan(scan) => {
542                // VectorScan introduces a variable for matched nodes
543                if let Some(ref input) = scan.input {
544                    self.bind_operator(input)?;
545                }
546                self.context.add_variable(
547                    scan.variable.clone(),
548                    VariableInfo {
549                        name: scan.variable.clone(),
550                        data_type: LogicalType::Node,
551                        is_node: true,
552                        is_edge: false,
553                    },
554                );
555                // Validate the query vector expression
556                self.validate_expression(&scan.query_vector)?;
557                Ok(())
558            }
559            LogicalOperator::VectorJoin(join) => {
560                // VectorJoin takes input from left side and produces right-side matches
561                self.bind_operator(&join.input)?;
562                // Add right variable for matched nodes
563                self.context.add_variable(
564                    join.right_variable.clone(),
565                    VariableInfo {
566                        name: join.right_variable.clone(),
567                        data_type: LogicalType::Node,
568                        is_node: true,
569                        is_edge: false,
570                    },
571                );
572                // Optionally add score variable
573                if let Some(ref score_var) = join.score_variable {
574                    self.context.add_variable(
575                        score_var.clone(),
576                        VariableInfo {
577                            name: score_var.clone(),
578                            data_type: LogicalType::Float64,
579                            is_node: false,
580                            is_edge: false,
581                        },
582                    );
583                }
584                // Validate the query vector expression
585                self.validate_expression(&join.query_vector)?;
586                Ok(())
587            }
588            LogicalOperator::MapCollect(mc) => {
589                self.bind_operator(&mc.input)?;
590                self.context.add_variable(
591                    mc.alias.clone(),
592                    VariableInfo {
593                        name: mc.alias.clone(),
594                        data_type: LogicalType::Any,
595                        is_node: false,
596                        is_edge: false,
597                    },
598                );
599                Ok(())
600            }
601            LogicalOperator::Except(except) => {
602                self.bind_operator(&except.left)?;
603                self.bind_operator(&except.right)?;
604                Ok(())
605            }
606            LogicalOperator::Intersect(intersect) => {
607                self.bind_operator(&intersect.left)?;
608                self.bind_operator(&intersect.right)?;
609                Ok(())
610            }
611            LogicalOperator::Otherwise(otherwise) => {
612                self.bind_operator(&otherwise.left)?;
613                self.bind_operator(&otherwise.right)?;
614                Ok(())
615            }
616            LogicalOperator::Apply(apply) => {
617                // Snapshot context BEFORE binding the input, so we can detect
618                // which variables were added by the input plan.
619                let pre_apply_names: HashSet<String> =
620                    self.context.variable_names().iter().cloned().collect();
621
622                self.bind_operator(&apply.input)?;
623
624                // Scope down: when the input plan exposes a Return/Aggregate
625                // projection (not a raw scan/expand), remove its internal-only
626                // variables. Only the projected output columns should be visible
627                // to the subplan — this prevents variables internal to a sibling
628                // CALL block from leaking into the next CALL block.
629                let mut input_output_ctx = BindingContext::new();
630                Self::register_subplan_columns(&apply.input, &mut input_output_ctx);
631                let input_output_names: HashSet<String> =
632                    input_output_ctx.variable_names().iter().cloned().collect();
633
634                if !input_output_names.is_empty() {
635                    // Input has an explicit projection: remove its internals.
636                    let input_internals: Vec<String> = self
637                        .context
638                        .variable_names()
639                        .iter()
640                        .filter(|n| {
641                            !pre_apply_names.contains(*n) && !input_output_names.contains(*n)
642                        })
643                        .cloned()
644                        .collect();
645                    for name in input_internals {
646                        self.context.remove_variable(&name);
647                    }
648                }
649
650                // Snapshot the permitted outer context for the subplan.
651                let outer_names: HashSet<String> =
652                    self.context.variable_names().iter().cloned().collect();
653
654                self.bind_operator(&apply.subplan)?;
655
656                // Remove internal-only variables added by the subplan (those that
657                // are not output columns). Prevents subplan internals from leaking
658                // into the outer query or sibling CALL blocks.
659                let mut subplan_output_ctx = BindingContext::new();
660                Self::register_subplan_columns(&apply.subplan, &mut subplan_output_ctx);
661                let subplan_output_names: HashSet<String> = subplan_output_ctx
662                    .variable_names()
663                    .iter()
664                    .cloned()
665                    .collect();
666
667                let to_remove: Vec<String> = self
668                    .context
669                    .variable_names()
670                    .iter()
671                    .filter(|n| !outer_names.contains(*n) && !subplan_output_names.contains(*n))
672                    .cloned()
673                    .collect();
674                for name in to_remove {
675                    self.context.remove_variable(&name);
676                }
677
678                // Register output columns so downstream operators can reference them.
679                Self::register_subplan_columns(&apply.subplan, &mut self.context);
680                Ok(())
681            }
682            LogicalOperator::MultiWayJoin(mwj) => {
683                for input in &mwj.inputs {
684                    self.bind_operator(input)?;
685                }
686                for cond in &mwj.conditions {
687                    self.validate_expression(&cond.left)?;
688                    self.validate_expression(&cond.right)?;
689                }
690                Ok(())
691            }
692            LogicalOperator::ParameterScan(param_scan) => {
693                // Register parameter columns as variables (injected by outer Apply)
694                for col in &param_scan.columns {
695                    self.context.add_variable(
696                        col.clone(),
697                        VariableInfo {
698                            name: col.clone(),
699                            data_type: LogicalType::Any,
700                            is_node: true,
701                            is_edge: false,
702                        },
703                    );
704                }
705                Ok(())
706            }
707            // DDL operators don't need binding: they're handled before the binder
708            LogicalOperator::CreatePropertyGraph(_) => Ok(()),
709            // Procedure calls: register yielded columns as variables for downstream operators
710            LogicalOperator::CallProcedure(call) => {
711                if let Some(yields) = &call.yield_items {
712                    for item in yields {
713                        let var_name = item.alias.as_deref().unwrap_or(&item.field_name);
714                        self.context.add_variable(
715                            var_name.to_string(),
716                            VariableInfo {
717                                name: var_name.to_string(),
718                                data_type: LogicalType::Any,
719                                is_node: false,
720                                is_edge: false,
721                            },
722                        );
723                    }
724                }
725                Ok(())
726            }
727            LogicalOperator::LoadData(load) => {
728                // The row variable is bound as Any (Map or List depending on WITH HEADERS)
729                self.context.add_variable(
730                    load.variable.clone(),
731                    VariableInfo {
732                        name: load.variable.clone(),
733                        data_type: LogicalType::Any,
734                        is_node: false,
735                        is_edge: false,
736                    },
737                );
738                Ok(())
739            }
740            LogicalOperator::Construct(construct) => self.bind_operator(&construct.input),
741        }
742    }
743
744    /// Binds a triple scan operator (for RDF/SPARQL).
745    fn bind_triple_scan(&mut self, scan: &TripleScanOp) -> Result<()> {
746        use crate::query::plan::TripleComponent;
747
748        // First bind the input if present
749        if let Some(ref input) = scan.input {
750            self.bind_operator(input)?;
751        }
752
753        // Add variables for subject, predicate, object
754        if let TripleComponent::Variable(name) = &scan.subject
755            && !self.context.contains(name)
756        {
757            self.context.add_variable(
758                name.clone(),
759                VariableInfo {
760                    name: name.clone(),
761                    data_type: LogicalType::Any, // RDF term
762                    is_node: false,
763                    is_edge: false,
764                },
765            );
766        }
767
768        if let TripleComponent::Variable(name) = &scan.predicate
769            && !self.context.contains(name)
770        {
771            self.context.add_variable(
772                name.clone(),
773                VariableInfo {
774                    name: name.clone(),
775                    data_type: LogicalType::Any, // IRI
776                    is_node: false,
777                    is_edge: false,
778                },
779            );
780        }
781
782        if let TripleComponent::Variable(name) = &scan.object
783            && !self.context.contains(name)
784        {
785            self.context.add_variable(
786                name.clone(),
787                VariableInfo {
788                    name: name.clone(),
789                    data_type: LogicalType::Any, // RDF term
790                    is_node: false,
791                    is_edge: false,
792                },
793            );
794        }
795
796        if let Some(TripleComponent::Variable(name)) = &scan.graph
797            && !self.context.contains(name)
798        {
799            self.context.add_variable(
800                name.clone(),
801                VariableInfo {
802                    name: name.clone(),
803                    data_type: LogicalType::Any, // IRI
804                    is_node: false,
805                    is_edge: false,
806                },
807            );
808        }
809
810        Ok(())
811    }
812
813    /// Binds a node scan operator.
814    fn bind_node_scan(&mut self, scan: &NodeScanOp) -> Result<()> {
815        // First bind the input if present
816        if let Some(ref input) = scan.input {
817            self.bind_operator(input)?;
818        }
819
820        // Add the scanned variable to scope
821        self.context.add_variable(
822            scan.variable.clone(),
823            VariableInfo {
824                name: scan.variable.clone(),
825                data_type: LogicalType::Node,
826                is_node: true,
827                is_edge: false,
828            },
829        );
830
831        Ok(())
832    }
833
834    /// Binds an expand operator.
835    fn bind_expand(&mut self, expand: &ExpandOp) -> Result<()> {
836        // First bind the input
837        self.bind_operator(&expand.input)?;
838
839        // Validate that the source variable is defined
840        if !self.context.contains(&expand.from_variable) {
841            return Err(undefined_variable_error(
842                &expand.from_variable,
843                &self.context,
844                " in EXPAND",
845            ));
846        }
847
848        // Validate that the source is a node
849        if let Some(info) = self.context.get(&expand.from_variable)
850            && !info.is_node
851        {
852            return Err(binding_error(format!(
853                "Variable '{}' is not a node, cannot expand from it",
854                expand.from_variable
855            )));
856        }
857
858        // Add edge variable if present
859        if let Some(ref edge_var) = expand.edge_variable {
860            self.context.add_variable(
861                edge_var.clone(),
862                VariableInfo {
863                    name: edge_var.clone(),
864                    data_type: LogicalType::Edge,
865                    is_node: false,
866                    is_edge: true,
867                },
868            );
869        }
870
871        // Add target variable
872        self.context.add_variable(
873            expand.to_variable.clone(),
874            VariableInfo {
875                name: expand.to_variable.clone(),
876                data_type: LogicalType::Node,
877                is_node: true,
878                is_edge: false,
879            },
880        );
881
882        // Add path variables for variable-length paths
883        if let Some(ref path_alias) = expand.path_alias {
884            // Register the path variable itself (e.g. p in MATCH p=...)
885            self.context.add_variable(
886                path_alias.clone(),
887                VariableInfo {
888                    name: path_alias.clone(),
889                    data_type: LogicalType::Any,
890                    is_node: false,
891                    is_edge: false,
892                },
893            );
894            // length(p) → _path_length_p
895            let path_length_var = format!("_path_length_{}", path_alias);
896            self.context.add_variable(
897                path_length_var.clone(),
898                VariableInfo {
899                    name: path_length_var,
900                    data_type: LogicalType::Int64,
901                    is_node: false,
902                    is_edge: false,
903                },
904            );
905            // nodes(p) → _path_nodes_p
906            let path_nodes_var = format!("_path_nodes_{}", path_alias);
907            self.context.add_variable(
908                path_nodes_var.clone(),
909                VariableInfo {
910                    name: path_nodes_var,
911                    data_type: LogicalType::Any,
912                    is_node: false,
913                    is_edge: false,
914                },
915            );
916            // edges(p) → _path_edges_p
917            let path_edges_var = format!("_path_edges_{}", path_alias);
918            self.context.add_variable(
919                path_edges_var.clone(),
920                VariableInfo {
921                    name: path_edges_var,
922                    data_type: LogicalType::Any,
923                    is_node: false,
924                    is_edge: false,
925                },
926            );
927        }
928
929        Ok(())
930    }
931
932    /// Binds a filter operator.
933    fn bind_filter(&mut self, filter: &FilterOp) -> Result<()> {
934        // First bind the input
935        self.bind_operator(&filter.input)?;
936
937        // Validate the predicate expression
938        self.validate_expression(&filter.predicate)?;
939
940        Ok(())
941    }
942
943    /// Registers output columns from a subplan into the binding context.
944    /// Walks through wrapping operators to find a Return and extracts column names.
945    fn register_subplan_columns(plan: &LogicalOperator, ctx: &mut BindingContext) {
946        match plan {
947            LogicalOperator::Return(ret) => {
948                for item in &ret.items {
949                    let col_name = if let Some(alias) = &item.alias {
950                        alias.clone()
951                    } else {
952                        match &item.expression {
953                            LogicalExpression::Variable(name) => name.clone(),
954                            LogicalExpression::Property { variable, property } => {
955                                format!("{variable}.{property}")
956                            }
957                            _ => continue,
958                        }
959                    };
960                    ctx.add_variable(
961                        col_name.clone(),
962                        VariableInfo {
963                            name: col_name,
964                            data_type: LogicalType::Any,
965                            is_node: false,
966                            is_edge: false,
967                        },
968                    );
969                }
970            }
971            LogicalOperator::Sort(s) => Self::register_subplan_columns(&s.input, ctx),
972            LogicalOperator::Limit(l) => Self::register_subplan_columns(&l.input, ctx),
973            LogicalOperator::Distinct(d) => Self::register_subplan_columns(&d.input, ctx),
974            LogicalOperator::Aggregate(agg) => {
975                // Aggregate produces named output columns
976                for expr in &agg.aggregates {
977                    if let Some(alias) = &expr.alias {
978                        ctx.add_variable(
979                            alias.clone(),
980                            VariableInfo {
981                                name: alias.clone(),
982                                data_type: LogicalType::Any,
983                                is_node: false,
984                                is_edge: false,
985                            },
986                        );
987                    }
988                }
989            }
990            _ => {}
991        }
992    }
993
994    /// Binds a return operator.
995    fn bind_return(&mut self, ret: &ReturnOp) -> Result<()> {
996        // First bind the input
997        self.bind_operator(&ret.input)?;
998
999        // Validate all return expressions and register aliases
1000        // (aliases must be visible to parent Sort for ORDER BY resolution)
1001        for item in &ret.items {
1002            self.validate_return_item(item)?;
1003            if let Some(ref alias) = item.alias {
1004                let data_type = self.infer_expression_type(&item.expression);
1005                self.context.add_variable(
1006                    alias.clone(),
1007                    VariableInfo {
1008                        name: alias.clone(),
1009                        data_type,
1010                        is_node: false,
1011                        is_edge: false,
1012                    },
1013                );
1014            }
1015        }
1016
1017        Ok(())
1018    }
1019
1020    /// Validates a return item.
1021    fn validate_return_item(&mut self, item: &ReturnItem) -> Result<()> {
1022        self.validate_expression(&item.expression)
1023    }
1024
1025    /// Validates that an expression only references defined variables.
1026    fn validate_expression(&mut self, expr: &LogicalExpression) -> Result<()> {
1027        match expr {
1028            LogicalExpression::Variable(name) => {
1029                // "*" is a wildcard marker for RETURN *, expanded by the planner
1030                if name == "*" {
1031                    return Ok(());
1032                }
1033                if !self.context.contains(name) && !name.starts_with("_anon_") {
1034                    return Err(undefined_variable_error(name, &self.context, ""));
1035                }
1036                Ok(())
1037            }
1038            LogicalExpression::Property { variable, .. } => {
1039                if !self.context.contains(variable) && !variable.starts_with("_anon_") {
1040                    return Err(undefined_variable_error(
1041                        variable,
1042                        &self.context,
1043                        " in property access",
1044                    ));
1045                }
1046                Ok(())
1047            }
1048            LogicalExpression::Literal(_) => Ok(()),
1049            LogicalExpression::Binary { left, right, .. } => {
1050                self.validate_expression(left)?;
1051                self.validate_expression(right)
1052            }
1053            LogicalExpression::Unary { operand, .. } => self.validate_expression(operand),
1054            LogicalExpression::FunctionCall { args, .. } => {
1055                for arg in args {
1056                    self.validate_expression(arg)?;
1057                }
1058                Ok(())
1059            }
1060            LogicalExpression::List(items) => {
1061                for item in items {
1062                    self.validate_expression(item)?;
1063                }
1064                Ok(())
1065            }
1066            LogicalExpression::Map(pairs) => {
1067                for (_, value) in pairs {
1068                    self.validate_expression(value)?;
1069                }
1070                Ok(())
1071            }
1072            LogicalExpression::IndexAccess { base, index } => {
1073                self.validate_expression(base)?;
1074                self.validate_expression(index)
1075            }
1076            LogicalExpression::SliceAccess { base, start, end } => {
1077                self.validate_expression(base)?;
1078                if let Some(s) = start {
1079                    self.validate_expression(s)?;
1080                }
1081                if let Some(e) = end {
1082                    self.validate_expression(e)?;
1083                }
1084                Ok(())
1085            }
1086            LogicalExpression::Case {
1087                operand,
1088                when_clauses,
1089                else_clause,
1090            } => {
1091                if let Some(op) = operand {
1092                    self.validate_expression(op)?;
1093                }
1094                for (cond, result) in when_clauses {
1095                    self.validate_expression(cond)?;
1096                    self.validate_expression(result)?;
1097                }
1098                if let Some(else_expr) = else_clause {
1099                    self.validate_expression(else_expr)?;
1100                }
1101                Ok(())
1102            }
1103            // Parameter references are validated externally
1104            LogicalExpression::Parameter(_) => Ok(()),
1105            // labels(n), type(e), id(n) need the variable to be defined
1106            LogicalExpression::Labels(var)
1107            | LogicalExpression::Type(var)
1108            | LogicalExpression::Id(var) => {
1109                if !self.context.contains(var) && !var.starts_with("_anon_") {
1110                    return Err(undefined_variable_error(var, &self.context, " in function"));
1111                }
1112                Ok(())
1113            }
1114            LogicalExpression::ListComprehension { list_expr, .. } => {
1115                // Validate the list expression against the outer context.
1116                // The filter and map expressions use the iteration variable
1117                // which is locally scoped, so we skip validating them here.
1118                self.validate_expression(list_expr)?;
1119                Ok(())
1120            }
1121            LogicalExpression::ListPredicate { list_expr, .. } => {
1122                // Validate the list expression against the outer context.
1123                // The predicate uses the iteration variable which is locally
1124                // scoped, so we skip validating it against the outer context.
1125                self.validate_expression(list_expr)?;
1126                Ok(())
1127            }
1128            LogicalExpression::ExistsSubquery(subquery)
1129            | LogicalExpression::CountSubquery(subquery)
1130            | LogicalExpression::ValueSubquery(subquery) => {
1131                // Subqueries have their own binding context
1132                // For now, just validate the structure exists
1133                let _ = subquery; // Would need recursive binding
1134                Ok(())
1135            }
1136            LogicalExpression::PatternComprehension {
1137                subplan,
1138                projection,
1139            } => {
1140                // Bind the subplan to register pattern variables (e.g., `f` in `(p)-[:KNOWS]->(f)`)
1141                self.bind_operator(subplan)?;
1142                // Now validate the projection expression (e.g., `f.name`)
1143                self.validate_expression(projection)
1144            }
1145            LogicalExpression::MapProjection { base, entries } => {
1146                if !self.context.contains(base) && !base.starts_with("_anon_") {
1147                    return Err(undefined_variable_error(
1148                        base,
1149                        &self.context,
1150                        " in map projection",
1151                    ));
1152                }
1153                for entry in entries {
1154                    if let crate::query::plan::MapProjectionEntry::LiteralEntry(_, expr) = entry {
1155                        self.validate_expression(expr)?;
1156                    }
1157                }
1158                Ok(())
1159            }
1160            LogicalExpression::Reduce {
1161                accumulator,
1162                initial,
1163                variable,
1164                list,
1165                expression,
1166            } => {
1167                self.validate_expression(initial)?;
1168                self.validate_expression(list)?;
1169                // accumulator and variable are locally scoped: inject them
1170                // into context, validate body, then remove
1171                let had_acc = self.context.contains(accumulator);
1172                let had_var = self.context.contains(variable);
1173                if !had_acc {
1174                    self.context.add_variable(
1175                        accumulator.clone(),
1176                        VariableInfo {
1177                            name: accumulator.clone(),
1178                            data_type: LogicalType::Any,
1179                            is_node: false,
1180                            is_edge: false,
1181                        },
1182                    );
1183                }
1184                if !had_var {
1185                    self.context.add_variable(
1186                        variable.clone(),
1187                        VariableInfo {
1188                            name: variable.clone(),
1189                            data_type: LogicalType::Any,
1190                            is_node: false,
1191                            is_edge: false,
1192                        },
1193                    );
1194                }
1195                self.validate_expression(expression)?;
1196                if !had_acc {
1197                    self.context.remove_variable(accumulator);
1198                }
1199                if !had_var {
1200                    self.context.remove_variable(variable);
1201                }
1202                Ok(())
1203            }
1204        }
1205    }
1206
1207    /// Infers the type of an expression for use in WITH clause aliasing.
1208    fn infer_expression_type(&self, expr: &LogicalExpression) -> LogicalType {
1209        match expr {
1210            LogicalExpression::Variable(name) => {
1211                // Look up the variable type from context
1212                self.context
1213                    .get(name)
1214                    .map_or(LogicalType::Any, |info| info.data_type.clone())
1215            }
1216            LogicalExpression::Property { .. } => LogicalType::Any, // Properties can be any type
1217            LogicalExpression::Literal(value) => {
1218                // Infer type from literal value
1219                use grafeo_common::types::Value;
1220                match value {
1221                    Value::Bool(_) => LogicalType::Bool,
1222                    Value::Int64(_) => LogicalType::Int64,
1223                    Value::Float64(_) => LogicalType::Float64,
1224                    Value::String(_) => LogicalType::String,
1225                    Value::List(_) => LogicalType::Any, // Complex type
1226                    Value::Map(_) => LogicalType::Any,  // Complex type
1227                    Value::Null => LogicalType::Any,
1228                    _ => LogicalType::Any,
1229                }
1230            }
1231            LogicalExpression::Binary { .. } => LogicalType::Any, // Could be bool or numeric
1232            LogicalExpression::Unary { .. } => LogicalType::Any,
1233            LogicalExpression::FunctionCall { name, .. } => {
1234                // Infer based on function name
1235                match name.to_lowercase().as_str() {
1236                    "count" | "sum" | "id" => LogicalType::Int64,
1237                    "avg" => LogicalType::Float64,
1238                    "type" => LogicalType::String,
1239                    // List-returning functions use Any since we don't track element type
1240                    "labels" | "collect" => LogicalType::Any,
1241                    _ => LogicalType::Any,
1242                }
1243            }
1244            LogicalExpression::List(_) => LogicalType::Any, // Complex type
1245            LogicalExpression::Map(_) => LogicalType::Any,  // Complex type
1246            _ => LogicalType::Any,
1247        }
1248    }
1249
1250    /// Infers whether an expression resolves to a node or edge entity.
1251    ///
1252    /// Returns `(is_node, is_edge)`. This propagates entity status through
1253    /// simple Variable references and Case expressions whose branches all
1254    /// agree on entity kind (used by optional() translation).
1255    fn infer_entity_status(&self, expr: &LogicalExpression) -> (bool, bool) {
1256        match expr {
1257            LogicalExpression::Variable(src) => self
1258                .context
1259                .get(src)
1260                .map_or((false, false), |info| (info.is_node, info.is_edge)),
1261            LogicalExpression::Case {
1262                when_clauses,
1263                else_clause,
1264                ..
1265            } => {
1266                // Collect entity status from all THEN and ELSE branches
1267                let mut all_node = true;
1268                let mut all_edge = true;
1269                let mut any_branch = false;
1270                for (_, then_expr) in when_clauses {
1271                    let (n, e) = self.infer_entity_status(then_expr);
1272                    all_node &= n;
1273                    all_edge &= e;
1274                    any_branch = true;
1275                }
1276                if let Some(else_expr) = else_clause {
1277                    let (n, e) = self.infer_entity_status(else_expr);
1278                    all_node &= n;
1279                    all_edge &= e;
1280                    any_branch = true;
1281                }
1282                if any_branch {
1283                    (all_node, all_edge)
1284                } else {
1285                    (false, false)
1286                }
1287            }
1288            _ => (false, false),
1289        }
1290    }
1291
1292    /// Binds a join operator.
1293    fn bind_join(&mut self, join: &crate::query::plan::JoinOp) -> Result<()> {
1294        // Bind both sides of the join
1295        self.bind_operator(&join.left)?;
1296        self.bind_operator(&join.right)?;
1297
1298        // Validate join conditions
1299        for condition in &join.conditions {
1300            self.validate_expression(&condition.left)?;
1301            self.validate_expression(&condition.right)?;
1302        }
1303
1304        Ok(())
1305    }
1306
1307    /// Binds an aggregate operator.
1308    fn bind_aggregate(&mut self, agg: &crate::query::plan::AggregateOp) -> Result<()> {
1309        // Bind the input first
1310        self.bind_operator(&agg.input)?;
1311
1312        // Validate group by expressions
1313        for expr in &agg.group_by {
1314            self.validate_expression(expr)?;
1315        }
1316
1317        // Validate aggregate expressions
1318        for agg_expr in &agg.aggregates {
1319            if let Some(ref expr) = agg_expr.expression {
1320                self.validate_expression(expr)?;
1321            }
1322            // Add the alias as a new variable if present
1323            if let Some(ref alias) = agg_expr.alias {
1324                self.context.add_variable(
1325                    alias.clone(),
1326                    VariableInfo {
1327                        name: alias.clone(),
1328                        data_type: LogicalType::Any,
1329                        is_node: false,
1330                        is_edge: false,
1331                    },
1332                );
1333            }
1334        }
1335
1336        // Register group-by output column names so ORDER BY / HAVING
1337        // can reference them (e.g. "n.city" from Property(n, city)).
1338        for expr in &agg.group_by {
1339            let col_name = crate::query::planner::common::expression_to_string(expr);
1340            if !self.context.contains(&col_name) {
1341                self.context.add_variable(
1342                    col_name.clone(),
1343                    VariableInfo {
1344                        name: col_name,
1345                        data_type: LogicalType::Any,
1346                        is_node: false,
1347                        is_edge: false,
1348                    },
1349                );
1350            }
1351        }
1352
1353        Ok(())
1354    }
1355}
1356
1357impl Default for Binder {
1358    fn default() -> Self {
1359        Self::new()
1360    }
1361}
1362
1363#[cfg(test)]
1364mod tests {
1365    use super::*;
1366    use crate::query::plan::{BinaryOp, FilterOp};
1367
1368    #[test]
1369    fn test_bind_simple_scan() {
1370        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1371            items: vec![ReturnItem {
1372                expression: LogicalExpression::Variable("n".to_string()),
1373                alias: None,
1374            }],
1375            distinct: false,
1376            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1377                variable: "n".to_string(),
1378                label: Some("Person".to_string()),
1379                input: None,
1380            })),
1381        }));
1382
1383        let mut binder = Binder::new();
1384        let result = binder.bind(&plan);
1385
1386        assert!(result.is_ok());
1387        let ctx = result.unwrap();
1388        assert!(ctx.contains("n"));
1389        assert!(ctx.get("n").unwrap().is_node);
1390    }
1391
1392    #[test]
1393    fn test_bind_undefined_variable() {
1394        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1395            items: vec![ReturnItem {
1396                expression: LogicalExpression::Variable("undefined".to_string()),
1397                alias: None,
1398            }],
1399            distinct: false,
1400            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1401                variable: "n".to_string(),
1402                label: None,
1403                input: None,
1404            })),
1405        }));
1406
1407        let mut binder = Binder::new();
1408        let result = binder.bind(&plan);
1409
1410        assert!(result.is_err());
1411        let err = result.unwrap_err();
1412        assert!(err.to_string().contains("Undefined variable"));
1413    }
1414
1415    #[test]
1416    fn test_bind_property_access() {
1417        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1418            items: vec![ReturnItem {
1419                expression: LogicalExpression::Property {
1420                    variable: "n".to_string(),
1421                    property: "name".to_string(),
1422                },
1423                alias: None,
1424            }],
1425            distinct: false,
1426            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1427                variable: "n".to_string(),
1428                label: Some("Person".to_string()),
1429                input: None,
1430            })),
1431        }));
1432
1433        let mut binder = Binder::new();
1434        let result = binder.bind(&plan);
1435
1436        assert!(result.is_ok());
1437    }
1438
1439    #[test]
1440    fn test_bind_filter_with_undefined_variable() {
1441        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1442            items: vec![ReturnItem {
1443                expression: LogicalExpression::Variable("n".to_string()),
1444                alias: None,
1445            }],
1446            distinct: false,
1447            input: Box::new(LogicalOperator::Filter(FilterOp {
1448                predicate: LogicalExpression::Binary {
1449                    left: Box::new(LogicalExpression::Property {
1450                        variable: "m".to_string(), // undefined!
1451                        property: "age".to_string(),
1452                    }),
1453                    op: BinaryOp::Gt,
1454                    right: Box::new(LogicalExpression::Literal(
1455                        grafeo_common::types::Value::Int64(30),
1456                    )),
1457                },
1458                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1459                    variable: "n".to_string(),
1460                    label: None,
1461                    input: None,
1462                })),
1463                pushdown_hint: None,
1464            })),
1465        }));
1466
1467        let mut binder = Binder::new();
1468        let result = binder.bind(&plan);
1469
1470        assert!(result.is_err());
1471        let err = result.unwrap_err();
1472        assert!(err.to_string().contains("Undefined variable 'm'"));
1473    }
1474
1475    #[test]
1476    fn test_bind_expand() {
1477        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode};
1478
1479        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1480            items: vec![
1481                ReturnItem {
1482                    expression: LogicalExpression::Variable("a".to_string()),
1483                    alias: None,
1484                },
1485                ReturnItem {
1486                    expression: LogicalExpression::Variable("b".to_string()),
1487                    alias: None,
1488                },
1489            ],
1490            distinct: false,
1491            input: Box::new(LogicalOperator::Expand(ExpandOp {
1492                from_variable: "a".to_string(),
1493                to_variable: "b".to_string(),
1494                edge_variable: Some("e".to_string()),
1495                direction: ExpandDirection::Outgoing,
1496                edge_types: vec!["KNOWS".to_string()],
1497                min_hops: 1,
1498                max_hops: Some(1),
1499                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1500                    variable: "a".to_string(),
1501                    label: Some("Person".to_string()),
1502                    input: None,
1503                })),
1504                path_alias: None,
1505                path_mode: PathMode::Walk,
1506            })),
1507        }));
1508
1509        let mut binder = Binder::new();
1510        let result = binder.bind(&plan);
1511
1512        assert!(result.is_ok());
1513        let ctx = result.unwrap();
1514        assert!(ctx.contains("a"));
1515        assert!(ctx.contains("b"));
1516        assert!(ctx.contains("e"));
1517        assert!(ctx.get("a").unwrap().is_node);
1518        assert!(ctx.get("b").unwrap().is_node);
1519        assert!(ctx.get("e").unwrap().is_edge);
1520    }
1521
1522    #[test]
1523    fn test_bind_expand_from_undefined_variable() {
1524        // Tests that expanding from an undefined variable produces a clear error
1525        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode};
1526
1527        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1528            items: vec![ReturnItem {
1529                expression: LogicalExpression::Variable("b".to_string()),
1530                alias: None,
1531            }],
1532            distinct: false,
1533            input: Box::new(LogicalOperator::Expand(ExpandOp {
1534                from_variable: "undefined".to_string(), // not defined!
1535                to_variable: "b".to_string(),
1536                edge_variable: None,
1537                direction: ExpandDirection::Outgoing,
1538                edge_types: vec![],
1539                min_hops: 1,
1540                max_hops: Some(1),
1541                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1542                    variable: "a".to_string(),
1543                    label: None,
1544                    input: None,
1545                })),
1546                path_alias: None,
1547                path_mode: PathMode::Walk,
1548            })),
1549        }));
1550
1551        let mut binder = Binder::new();
1552        let result = binder.bind(&plan);
1553
1554        assert!(result.is_err());
1555        let err = result.unwrap_err();
1556        assert!(
1557            err.to_string().contains("Undefined variable 'undefined'"),
1558            "Expected error about undefined variable, got: {}",
1559            err
1560        );
1561    }
1562
1563    #[test]
1564    fn test_bind_return_with_aggregate_and_non_aggregate() {
1565        // Tests binding of aggregate functions alongside regular expressions
1566        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1567            items: vec![
1568                ReturnItem {
1569                    expression: LogicalExpression::FunctionCall {
1570                        name: "count".to_string(),
1571                        args: vec![LogicalExpression::Variable("n".to_string())],
1572                        distinct: false,
1573                    },
1574                    alias: Some("cnt".to_string()),
1575                },
1576                ReturnItem {
1577                    expression: LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1578                    alias: Some("one".to_string()),
1579                },
1580            ],
1581            distinct: false,
1582            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1583                variable: "n".to_string(),
1584                label: Some("Person".to_string()),
1585                input: None,
1586            })),
1587        }));
1588
1589        let mut binder = Binder::new();
1590        let result = binder.bind(&plan);
1591
1592        // This should succeed - count(n) with literal is valid
1593        assert!(result.is_ok());
1594    }
1595
1596    #[test]
1597    fn test_bind_nested_property_access() {
1598        // Tests that nested property access on the same variable works
1599        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1600            items: vec![
1601                ReturnItem {
1602                    expression: LogicalExpression::Property {
1603                        variable: "n".to_string(),
1604                        property: "name".to_string(),
1605                    },
1606                    alias: None,
1607                },
1608                ReturnItem {
1609                    expression: LogicalExpression::Property {
1610                        variable: "n".to_string(),
1611                        property: "age".to_string(),
1612                    },
1613                    alias: None,
1614                },
1615            ],
1616            distinct: false,
1617            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1618                variable: "n".to_string(),
1619                label: Some("Person".to_string()),
1620                input: None,
1621            })),
1622        }));
1623
1624        let mut binder = Binder::new();
1625        let result = binder.bind(&plan);
1626
1627        assert!(result.is_ok());
1628    }
1629
1630    #[test]
1631    fn test_bind_binary_expression_with_undefined() {
1632        // Tests that binary expressions with undefined variables produce errors
1633        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1634            items: vec![ReturnItem {
1635                expression: LogicalExpression::Binary {
1636                    left: Box::new(LogicalExpression::Property {
1637                        variable: "n".to_string(),
1638                        property: "age".to_string(),
1639                    }),
1640                    op: BinaryOp::Add,
1641                    right: Box::new(LogicalExpression::Property {
1642                        variable: "m".to_string(), // undefined!
1643                        property: "age".to_string(),
1644                    }),
1645                },
1646                alias: Some("total".to_string()),
1647            }],
1648            distinct: false,
1649            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1650                variable: "n".to_string(),
1651                label: None,
1652                input: None,
1653            })),
1654        }));
1655
1656        let mut binder = Binder::new();
1657        let result = binder.bind(&plan);
1658
1659        assert!(result.is_err());
1660        assert!(
1661            result
1662                .unwrap_err()
1663                .to_string()
1664                .contains("Undefined variable 'm'")
1665        );
1666    }
1667
1668    #[test]
1669    fn test_bind_duplicate_variable_definition() {
1670        // Tests behavior when the same variable is defined twice (via two NodeScans)
1671        // This is typically not allowed or the second shadows the first
1672        use crate::query::plan::{JoinOp, JoinType};
1673
1674        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1675            items: vec![ReturnItem {
1676                expression: LogicalExpression::Variable("n".to_string()),
1677                alias: None,
1678            }],
1679            distinct: false,
1680            input: Box::new(LogicalOperator::Join(JoinOp {
1681                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1682                    variable: "n".to_string(),
1683                    label: Some("A".to_string()),
1684                    input: None,
1685                })),
1686                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1687                    variable: "m".to_string(), // different variable is fine
1688                    label: Some("B".to_string()),
1689                    input: None,
1690                })),
1691                join_type: JoinType::Inner,
1692                conditions: vec![],
1693            })),
1694        }));
1695
1696        let mut binder = Binder::new();
1697        let result = binder.bind(&plan);
1698
1699        // Join with different variables should work
1700        assert!(result.is_ok());
1701        let ctx = result.unwrap();
1702        assert!(ctx.contains("n"));
1703        assert!(ctx.contains("m"));
1704    }
1705
1706    #[test]
1707    fn test_bind_function_with_wrong_arity() {
1708        // Tests that functions with wrong number of arguments are handled
1709        // (behavior depends on whether binder validates arity)
1710        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1711            items: vec![ReturnItem {
1712                expression: LogicalExpression::FunctionCall {
1713                    name: "count".to_string(),
1714                    args: vec![], // count() needs an argument
1715                    distinct: false,
1716                },
1717                alias: None,
1718            }],
1719            distinct: false,
1720            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1721                variable: "n".to_string(),
1722                label: None,
1723                input: None,
1724            })),
1725        }));
1726
1727        let mut binder = Binder::new();
1728        let result = binder.bind(&plan);
1729
1730        // The binder may or may not catch this - if it passes, execution will fail
1731        // This test documents current behavior
1732        // If binding fails, that's fine; if it passes, execution will handle it
1733        let _ = result; // We're just testing it doesn't panic
1734    }
1735
1736    // --- Mutation operator validation ---
1737
1738    #[test]
1739    fn test_create_edge_rejects_undefined_source() {
1740        use crate::query::plan::CreateEdgeOp;
1741
1742        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1743            variable: Some("e".to_string()),
1744            from_variable: "ghost".to_string(), // not defined!
1745            to_variable: "b".to_string(),
1746            edge_type: "KNOWS".to_string(),
1747            properties: vec![],
1748            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1749                variable: "b".to_string(),
1750                label: None,
1751                input: None,
1752            })),
1753        }));
1754
1755        let mut binder = Binder::new();
1756        let err = binder.bind(&plan).unwrap_err();
1757        assert!(
1758            err.to_string().contains("Undefined variable 'ghost'"),
1759            "Should reject undefined source variable, got: {err}"
1760        );
1761    }
1762
1763    #[test]
1764    fn test_create_edge_rejects_undefined_target() {
1765        use crate::query::plan::CreateEdgeOp;
1766
1767        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1768            variable: None,
1769            from_variable: "a".to_string(),
1770            to_variable: "missing".to_string(), // not defined!
1771            edge_type: "KNOWS".to_string(),
1772            properties: vec![],
1773            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1774                variable: "a".to_string(),
1775                label: None,
1776                input: None,
1777            })),
1778        }));
1779
1780        let mut binder = Binder::new();
1781        let err = binder.bind(&plan).unwrap_err();
1782        assert!(
1783            err.to_string().contains("Undefined variable 'missing'"),
1784            "Should reject undefined target variable, got: {err}"
1785        );
1786    }
1787
1788    #[test]
1789    fn test_create_edge_validates_property_expressions() {
1790        use crate::query::plan::CreateEdgeOp;
1791
1792        // Source and target defined, but property references undefined variable
1793        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1794            variable: Some("e".to_string()),
1795            from_variable: "a".to_string(),
1796            to_variable: "b".to_string(),
1797            edge_type: "KNOWS".to_string(),
1798            properties: vec![(
1799                "since".to_string(),
1800                LogicalExpression::Property {
1801                    variable: "x".to_string(), // undefined!
1802                    property: "year".to_string(),
1803                },
1804            )],
1805            input: Box::new(LogicalOperator::Join(crate::query::plan::JoinOp {
1806                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1807                    variable: "a".to_string(),
1808                    label: None,
1809                    input: None,
1810                })),
1811                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1812                    variable: "b".to_string(),
1813                    label: None,
1814                    input: None,
1815                })),
1816                join_type: crate::query::plan::JoinType::Inner,
1817                conditions: vec![],
1818            })),
1819        }));
1820
1821        let mut binder = Binder::new();
1822        let err = binder.bind(&plan).unwrap_err();
1823        assert!(err.to_string().contains("Undefined variable 'x'"));
1824    }
1825
1826    #[test]
1827    fn test_set_property_rejects_undefined_variable() {
1828        use crate::query::plan::SetPropertyOp;
1829
1830        let plan = LogicalPlan::new(LogicalOperator::SetProperty(SetPropertyOp {
1831            variable: "ghost".to_string(),
1832            properties: vec![(
1833                "name".to_string(),
1834                LogicalExpression::Literal(grafeo_common::types::Value::String("Alix".into())),
1835            )],
1836            replace: false,
1837            is_edge: false,
1838            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1839                variable: "n".to_string(),
1840                label: None,
1841                input: None,
1842            })),
1843        }));
1844
1845        let mut binder = Binder::new();
1846        let err = binder.bind(&plan).unwrap_err();
1847        assert!(
1848            err.to_string().contains("in SET"),
1849            "Error should indicate SET context, got: {err}"
1850        );
1851    }
1852
1853    #[test]
1854    fn test_delete_node_rejects_undefined_variable() {
1855        use crate::query::plan::DeleteNodeOp;
1856
1857        let plan = LogicalPlan::new(LogicalOperator::DeleteNode(DeleteNodeOp {
1858            variable: "phantom".to_string(),
1859            detach: false,
1860            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1861                variable: "n".to_string(),
1862                label: None,
1863                input: None,
1864            })),
1865        }));
1866
1867        let mut binder = Binder::new();
1868        let err = binder.bind(&plan).unwrap_err();
1869        assert!(err.to_string().contains("Undefined variable 'phantom'"));
1870    }
1871
1872    #[test]
1873    fn test_delete_edge_rejects_undefined_variable() {
1874        use crate::query::plan::DeleteEdgeOp;
1875
1876        let plan = LogicalPlan::new(LogicalOperator::DeleteEdge(DeleteEdgeOp {
1877            variable: "gone".to_string(),
1878            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1879                variable: "n".to_string(),
1880                label: None,
1881                input: None,
1882            })),
1883        }));
1884
1885        let mut binder = Binder::new();
1886        let err = binder.bind(&plan).unwrap_err();
1887        assert!(err.to_string().contains("Undefined variable 'gone'"));
1888    }
1889
1890    // --- WITH/Project clause ---
1891
1892    #[test]
1893    fn test_project_alias_becomes_available_downstream() {
1894        use crate::query::plan::{ProjectOp, Projection};
1895
1896        // WITH n.name AS person_name RETURN person_name
1897        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1898            items: vec![ReturnItem {
1899                expression: LogicalExpression::Variable("person_name".to_string()),
1900                alias: None,
1901            }],
1902            distinct: false,
1903            input: Box::new(LogicalOperator::Project(ProjectOp {
1904                projections: vec![Projection {
1905                    expression: LogicalExpression::Property {
1906                        variable: "n".to_string(),
1907                        property: "name".to_string(),
1908                    },
1909                    alias: Some("person_name".to_string()),
1910                }],
1911                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1912                    variable: "n".to_string(),
1913                    label: None,
1914                    input: None,
1915                })),
1916                pass_through_input: false,
1917            })),
1918        }));
1919
1920        let mut binder = Binder::new();
1921        let ctx = binder.bind(&plan).unwrap();
1922        assert!(
1923            ctx.contains("person_name"),
1924            "WITH alias should be available to RETURN"
1925        );
1926    }
1927
1928    #[test]
1929    fn test_project_rejects_undefined_expression() {
1930        use crate::query::plan::{ProjectOp, Projection};
1931
1932        let plan = LogicalPlan::new(LogicalOperator::Project(ProjectOp {
1933            projections: vec![Projection {
1934                expression: LogicalExpression::Variable("nope".to_string()),
1935                alias: Some("x".to_string()),
1936            }],
1937            input: Box::new(LogicalOperator::Empty),
1938            pass_through_input: false,
1939        }));
1940
1941        let mut binder = Binder::new();
1942        let result = binder.bind(&plan);
1943        assert!(result.is_err(), "WITH on undefined variable should fail");
1944    }
1945
1946    // --- UNWIND ---
1947
1948    #[test]
1949    fn test_unwind_adds_element_variable() {
1950        use crate::query::plan::UnwindOp;
1951
1952        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1953            items: vec![ReturnItem {
1954                expression: LogicalExpression::Variable("item".to_string()),
1955                alias: None,
1956            }],
1957            distinct: false,
1958            input: Box::new(LogicalOperator::Unwind(UnwindOp {
1959                expression: LogicalExpression::List(vec![
1960                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1961                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(2)),
1962                ]),
1963                variable: "item".to_string(),
1964                ordinality_var: None,
1965                offset_var: None,
1966                input: Box::new(LogicalOperator::Empty),
1967            })),
1968        }));
1969
1970        let mut binder = Binder::new();
1971        let ctx = binder.bind(&plan).unwrap();
1972        assert!(ctx.contains("item"), "UNWIND variable should be in scope");
1973        let info = ctx.get("item").unwrap();
1974        assert!(
1975            !info.is_node && !info.is_edge,
1976            "UNWIND variable is not a graph element"
1977        );
1978    }
1979
1980    // --- MERGE ---
1981
1982    #[test]
1983    fn test_merge_adds_variable_and_validates_properties() {
1984        use crate::query::plan::MergeOp;
1985
1986        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1987            items: vec![ReturnItem {
1988                expression: LogicalExpression::Variable("m".to_string()),
1989                alias: None,
1990            }],
1991            distinct: false,
1992            input: Box::new(LogicalOperator::Merge(MergeOp {
1993                variable: "m".to_string(),
1994                labels: vec!["Person".to_string()],
1995                match_properties: vec![(
1996                    "name".to_string(),
1997                    LogicalExpression::Literal(grafeo_common::types::Value::String("Alix".into())),
1998                )],
1999                on_create: vec![(
2000                    "created".to_string(),
2001                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
2002                )],
2003                on_match: vec![(
2004                    "updated".to_string(),
2005                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
2006                )],
2007                input: Box::new(LogicalOperator::Empty),
2008            })),
2009        }));
2010
2011        let mut binder = Binder::new();
2012        let ctx = binder.bind(&plan).unwrap();
2013        assert!(ctx.contains("m"));
2014        assert!(
2015            ctx.get("m").unwrap().is_node,
2016            "MERGE variable should be a node"
2017        );
2018    }
2019
2020    #[test]
2021    fn test_merge_rejects_undefined_in_on_create() {
2022        use crate::query::plan::MergeOp;
2023
2024        let plan = LogicalPlan::new(LogicalOperator::Merge(MergeOp {
2025            variable: "m".to_string(),
2026            labels: vec![],
2027            match_properties: vec![],
2028            on_create: vec![(
2029                "name".to_string(),
2030                LogicalExpression::Property {
2031                    variable: "other".to_string(), // undefined!
2032                    property: "name".to_string(),
2033                },
2034            )],
2035            on_match: vec![],
2036            input: Box::new(LogicalOperator::Empty),
2037        }));
2038
2039        let mut binder = Binder::new();
2040        let result = binder.bind(&plan);
2041        assert!(
2042            result.is_err(),
2043            "ON CREATE referencing undefined variable should fail"
2044        );
2045    }
2046
2047    // --- ShortestPath ---
2048
2049    #[test]
2050    fn test_shortest_path_rejects_undefined_source() {
2051        use crate::query::plan::{ExpandDirection, ShortestPathOp};
2052
2053        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
2054            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2055                variable: "b".to_string(),
2056                label: None,
2057                input: None,
2058            })),
2059            source_var: "missing".to_string(), // not defined
2060            target_var: "b".to_string(),
2061            edge_types: vec![],
2062            direction: ExpandDirection::Both,
2063            path_alias: "p".to_string(),
2064            all_paths: false,
2065        }));
2066
2067        let mut binder = Binder::new();
2068        let err = binder.bind(&plan).unwrap_err();
2069        assert!(
2070            err.to_string().contains("source in shortestPath"),
2071            "Error should mention shortestPath source context, got: {err}"
2072        );
2073    }
2074
2075    #[test]
2076    fn test_shortest_path_adds_path_and_length_variables() {
2077        use crate::query::plan::{ExpandDirection, JoinOp, JoinType, ShortestPathOp};
2078
2079        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
2080            input: Box::new(LogicalOperator::Join(JoinOp {
2081                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2082                    variable: "a".to_string(),
2083                    label: None,
2084                    input: None,
2085                })),
2086                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2087                    variable: "b".to_string(),
2088                    label: None,
2089                    input: None,
2090                })),
2091                join_type: JoinType::Cross,
2092                conditions: vec![],
2093            })),
2094            source_var: "a".to_string(),
2095            target_var: "b".to_string(),
2096            edge_types: vec!["ROAD".to_string()],
2097            direction: ExpandDirection::Outgoing,
2098            path_alias: "p".to_string(),
2099            all_paths: false,
2100        }));
2101
2102        let mut binder = Binder::new();
2103        let ctx = binder.bind(&plan).unwrap();
2104        assert!(ctx.contains("p"), "Path alias should be bound");
2105        assert!(
2106            ctx.contains("_path_length_p"),
2107            "Path length variable should be auto-created"
2108        );
2109    }
2110
2111    // --- Expression validation edge cases ---
2112
2113    #[test]
2114    fn test_case_expression_validates_all_branches() {
2115        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2116            items: vec![ReturnItem {
2117                expression: LogicalExpression::Case {
2118                    operand: None,
2119                    when_clauses: vec![
2120                        (
2121                            LogicalExpression::Binary {
2122                                left: Box::new(LogicalExpression::Property {
2123                                    variable: "n".to_string(),
2124                                    property: "age".to_string(),
2125                                }),
2126                                op: BinaryOp::Gt,
2127                                right: Box::new(LogicalExpression::Literal(
2128                                    grafeo_common::types::Value::Int64(18),
2129                                )),
2130                            },
2131                            LogicalExpression::Literal(grafeo_common::types::Value::String(
2132                                "adult".into(),
2133                            )),
2134                        ),
2135                        (
2136                            // This branch references undefined variable
2137                            LogicalExpression::Property {
2138                                variable: "ghost".to_string(),
2139                                property: "flag".to_string(),
2140                            },
2141                            LogicalExpression::Literal(grafeo_common::types::Value::String(
2142                                "flagged".into(),
2143                            )),
2144                        ),
2145                    ],
2146                    else_clause: Some(Box::new(LogicalExpression::Literal(
2147                        grafeo_common::types::Value::String("other".into()),
2148                    ))),
2149                },
2150                alias: None,
2151            }],
2152            distinct: false,
2153            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2154                variable: "n".to_string(),
2155                label: None,
2156                input: None,
2157            })),
2158        }));
2159
2160        let mut binder = Binder::new();
2161        let err = binder.bind(&plan).unwrap_err();
2162        assert!(
2163            err.to_string().contains("ghost"),
2164            "CASE should validate all when-clause conditions"
2165        );
2166    }
2167
2168    #[test]
2169    fn test_case_expression_validates_else_clause() {
2170        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2171            items: vec![ReturnItem {
2172                expression: LogicalExpression::Case {
2173                    operand: None,
2174                    when_clauses: vec![(
2175                        LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
2176                        LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
2177                    )],
2178                    else_clause: Some(Box::new(LogicalExpression::Property {
2179                        variable: "missing".to_string(),
2180                        property: "x".to_string(),
2181                    })),
2182                },
2183                alias: None,
2184            }],
2185            distinct: false,
2186            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2187                variable: "n".to_string(),
2188                label: None,
2189                input: None,
2190            })),
2191        }));
2192
2193        let mut binder = Binder::new();
2194        let err = binder.bind(&plan).unwrap_err();
2195        assert!(
2196            err.to_string().contains("missing"),
2197            "CASE ELSE should validate its expression too"
2198        );
2199    }
2200
2201    #[test]
2202    fn test_slice_access_validates_expressions() {
2203        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2204            items: vec![ReturnItem {
2205                expression: LogicalExpression::SliceAccess {
2206                    base: Box::new(LogicalExpression::Variable("n".to_string())),
2207                    start: Some(Box::new(LogicalExpression::Variable(
2208                        "undefined_start".to_string(),
2209                    ))),
2210                    end: None,
2211                },
2212                alias: None,
2213            }],
2214            distinct: false,
2215            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2216                variable: "n".to_string(),
2217                label: None,
2218                input: None,
2219            })),
2220        }));
2221
2222        let mut binder = Binder::new();
2223        let err = binder.bind(&plan).unwrap_err();
2224        assert!(err.to_string().contains("undefined_start"));
2225    }
2226
2227    #[test]
2228    fn test_list_comprehension_validates_list_source() {
2229        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2230            items: vec![ReturnItem {
2231                expression: LogicalExpression::ListComprehension {
2232                    variable: "x".to_string(),
2233                    list_expr: Box::new(LogicalExpression::Variable("not_defined".to_string())),
2234                    filter_expr: None,
2235                    map_expr: Box::new(LogicalExpression::Variable("x".to_string())),
2236                },
2237                alias: None,
2238            }],
2239            distinct: false,
2240            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2241                variable: "n".to_string(),
2242                label: None,
2243                input: None,
2244            })),
2245        }));
2246
2247        let mut binder = Binder::new();
2248        let err = binder.bind(&plan).unwrap_err();
2249        assert!(
2250            err.to_string().contains("not_defined"),
2251            "List comprehension should validate source list expression"
2252        );
2253    }
2254
2255    #[test]
2256    fn test_labels_type_id_reject_undefined() {
2257        // labels(x) where x is not defined
2258        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2259            items: vec![ReturnItem {
2260                expression: LogicalExpression::Labels("x".to_string()),
2261                alias: None,
2262            }],
2263            distinct: false,
2264            input: Box::new(LogicalOperator::Empty),
2265        }));
2266
2267        let mut binder = Binder::new();
2268        assert!(
2269            binder.bind(&plan).is_err(),
2270            "labels(x) on undefined x should fail"
2271        );
2272
2273        // type(e) where e is not defined
2274        let plan2 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2275            items: vec![ReturnItem {
2276                expression: LogicalExpression::Type("e".to_string()),
2277                alias: None,
2278            }],
2279            distinct: false,
2280            input: Box::new(LogicalOperator::Empty),
2281        }));
2282
2283        let mut binder2 = Binder::new();
2284        assert!(
2285            binder2.bind(&plan2).is_err(),
2286            "type(e) on undefined e should fail"
2287        );
2288
2289        // id(n) where n is not defined
2290        let plan3 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2291            items: vec![ReturnItem {
2292                expression: LogicalExpression::Id("n".to_string()),
2293                alias: None,
2294            }],
2295            distinct: false,
2296            input: Box::new(LogicalOperator::Empty),
2297        }));
2298
2299        let mut binder3 = Binder::new();
2300        assert!(
2301            binder3.bind(&plan3).is_err(),
2302            "id(n) on undefined n should fail"
2303        );
2304    }
2305
2306    #[test]
2307    fn test_expand_rejects_non_node_source() {
2308        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode, UnwindOp};
2309
2310        // UNWIND [1,2] AS x  -- x is not a node
2311        // MATCH (x)-[:E]->(b)  -- should fail: x isn't a node
2312        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2313            items: vec![ReturnItem {
2314                expression: LogicalExpression::Variable("b".to_string()),
2315                alias: None,
2316            }],
2317            distinct: false,
2318            input: Box::new(LogicalOperator::Expand(ExpandOp {
2319                from_variable: "x".to_string(),
2320                to_variable: "b".to_string(),
2321                edge_variable: None,
2322                direction: ExpandDirection::Outgoing,
2323                edge_types: vec![],
2324                min_hops: 1,
2325                max_hops: Some(1),
2326                input: Box::new(LogicalOperator::Unwind(UnwindOp {
2327                    expression: LogicalExpression::List(vec![]),
2328                    variable: "x".to_string(),
2329                    ordinality_var: None,
2330                    offset_var: None,
2331                    input: Box::new(LogicalOperator::Empty),
2332                })),
2333                path_alias: None,
2334                path_mode: PathMode::Walk,
2335            })),
2336        }));
2337
2338        let mut binder = Binder::new();
2339        let err = binder.bind(&plan).unwrap_err();
2340        assert!(
2341            err.to_string().contains("not a node"),
2342            "Expanding from non-node should fail, got: {err}"
2343        );
2344    }
2345
2346    #[test]
2347    fn test_add_label_rejects_undefined_variable() {
2348        use crate::query::plan::AddLabelOp;
2349
2350        let plan = LogicalPlan::new(LogicalOperator::AddLabel(AddLabelOp {
2351            variable: "missing".to_string(),
2352            labels: vec!["Admin".to_string()],
2353            input: Box::new(LogicalOperator::Empty),
2354        }));
2355
2356        let mut binder = Binder::new();
2357        let err = binder.bind(&plan).unwrap_err();
2358        assert!(err.to_string().contains("SET labels"));
2359    }
2360
2361    #[test]
2362    fn test_remove_label_rejects_undefined_variable() {
2363        use crate::query::plan::RemoveLabelOp;
2364
2365        let plan = LogicalPlan::new(LogicalOperator::RemoveLabel(RemoveLabelOp {
2366            variable: "missing".to_string(),
2367            labels: vec!["Admin".to_string()],
2368            input: Box::new(LogicalOperator::Empty),
2369        }));
2370
2371        let mut binder = Binder::new();
2372        let err = binder.bind(&plan).unwrap_err();
2373        assert!(err.to_string().contains("REMOVE labels"));
2374    }
2375
2376    #[test]
2377    fn test_sort_validates_key_expressions() {
2378        use crate::query::plan::{SortKey, SortOp, SortOrder};
2379
2380        let plan = LogicalPlan::new(LogicalOperator::Sort(SortOp {
2381            keys: vec![SortKey {
2382                expression: LogicalExpression::Property {
2383                    variable: "missing".to_string(),
2384                    property: "name".to_string(),
2385                },
2386                order: SortOrder::Ascending,
2387                nulls: None,
2388            }],
2389            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2390                variable: "n".to_string(),
2391                label: None,
2392                input: None,
2393            })),
2394        }));
2395
2396        let mut binder = Binder::new();
2397        assert!(
2398            binder.bind(&plan).is_err(),
2399            "ORDER BY on undefined variable should fail"
2400        );
2401    }
2402
2403    #[test]
2404    fn test_create_node_adds_variable_before_property_validation() {
2405        use crate::query::plan::CreateNodeOp;
2406
2407        // CREATE (n:Person {friend: n.name}) - referencing the node being created
2408        // The variable should be available for property expressions (self-reference)
2409        let plan = LogicalPlan::new(LogicalOperator::CreateNode(CreateNodeOp {
2410            variable: "n".to_string(),
2411            labels: vec!["Person".to_string()],
2412            properties: vec![(
2413                "self_ref".to_string(),
2414                LogicalExpression::Property {
2415                    variable: "n".to_string(),
2416                    property: "name".to_string(),
2417                },
2418            )],
2419            input: None,
2420        }));
2421
2422        let mut binder = Binder::new();
2423        // This should succeed because CreateNode adds the variable before validating properties
2424        let ctx = binder.bind(&plan).unwrap();
2425        assert!(ctx.get("n").unwrap().is_node);
2426    }
2427
2428    #[test]
2429    fn test_undefined_variable_suggests_similar() {
2430        // 'person' is defined, user types 'persn' - should get a suggestion
2431        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2432            items: vec![ReturnItem {
2433                expression: LogicalExpression::Variable("persn".to_string()),
2434                alias: None,
2435            }],
2436            distinct: false,
2437            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2438                variable: "person".to_string(),
2439                label: None,
2440                input: None,
2441            })),
2442        }));
2443
2444        let mut binder = Binder::new();
2445        let err = binder.bind(&plan).unwrap_err();
2446        let msg = err.to_string();
2447        // The error should contain the variable name at minimum
2448        assert!(
2449            msg.contains("persn"),
2450            "Error should mention the undefined variable"
2451        );
2452    }
2453
2454    #[test]
2455    fn test_anon_variables_skip_validation() {
2456        // Variables starting with _anon_ are anonymous and should be silently accepted
2457        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2458            items: vec![ReturnItem {
2459                expression: LogicalExpression::Variable("_anon_42".to_string()),
2460                alias: None,
2461            }],
2462            distinct: false,
2463            input: Box::new(LogicalOperator::Empty),
2464        }));
2465
2466        let mut binder = Binder::new();
2467        let result = binder.bind(&plan);
2468        assert!(
2469            result.is_ok(),
2470            "Anonymous variables should bypass validation"
2471        );
2472    }
2473
2474    #[test]
2475    fn test_map_expression_validates_values() {
2476        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2477            items: vec![ReturnItem {
2478                expression: LogicalExpression::Map(vec![(
2479                    "key".to_string(),
2480                    LogicalExpression::Variable("undefined".to_string()),
2481                )]),
2482                alias: None,
2483            }],
2484            distinct: false,
2485            input: Box::new(LogicalOperator::Empty),
2486        }));
2487
2488        let mut binder = Binder::new();
2489        assert!(
2490            binder.bind(&plan).is_err(),
2491            "Map values should be validated"
2492        );
2493    }
2494
2495    #[test]
2496    fn test_vector_scan_validates_query_vector() {
2497        use crate::query::plan::VectorScanOp;
2498
2499        let plan = LogicalPlan::new(LogicalOperator::VectorScan(VectorScanOp {
2500            variable: "result".to_string(),
2501            index_name: None,
2502            property: "embedding".to_string(),
2503            label: Some("Doc".to_string()),
2504            query_vector: LogicalExpression::Variable("undefined_vec".to_string()),
2505            k: 10,
2506            metric: None,
2507            min_similarity: None,
2508            max_distance: None,
2509            input: None,
2510        }));
2511
2512        let mut binder = Binder::new();
2513        let err = binder.bind(&plan).unwrap_err();
2514        assert!(err.to_string().contains("undefined_vec"));
2515    }
2516}