Skip to main content

grafeo_engine/query/
binder.rs

1//! Semantic validation - catching errors before execution.
2//!
3//! The binder walks the logical plan and validates that everything makes sense:
4//! - Is that variable actually defined? (You can't use `RETURN x` if `x` wasn't matched)
5//! - Does that property access make sense? (Accessing `.age` on an integer fails)
6//! - Are types compatible? (Can't compare a string to an integer)
7//!
8//! Better to catch these errors early than waste time executing a broken query.
9
10use crate::query::plan::{
11    ExpandOp, FilterOp, LogicalExpression, LogicalOperator, LogicalPlan, NodeScanOp, ReturnItem,
12    ReturnOp, TripleScanOp,
13};
14use grafeo_common::types::LogicalType;
15use grafeo_common::utils::error::{Error, QueryError, QueryErrorKind, Result};
16use grafeo_common::utils::strings::{find_similar, format_suggestion};
17use indexmap::IndexMap;
18use std::collections::HashSet;
19
20/// Creates a semantic binding error.
21fn binding_error(message: impl Into<String>) -> Error {
22    Error::Query(QueryError::new(QueryErrorKind::Semantic, message))
23}
24
25/// Creates a semantic binding error with a hint.
26fn binding_error_with_hint(message: impl Into<String>, hint: impl Into<String>) -> Error {
27    Error::Query(QueryError::new(QueryErrorKind::Semantic, message).with_hint(hint))
28}
29
30/// Creates an "undefined variable" error with a suggestion if a similar variable exists.
31fn undefined_variable_error(variable: &str, context: &BindingContext, suffix: &str) -> Error {
32    let candidates: Vec<String> = context.variable_names();
33    let candidates_ref: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
34
35    if let Some(suggestion) = find_similar(variable, &candidates_ref) {
36        binding_error_with_hint(
37            format!("Undefined variable '{variable}'{suffix}"),
38            format_suggestion(suggestion),
39        )
40    } else {
41        binding_error(format!("Undefined variable '{variable}'{suffix}"))
42    }
43}
44
45/// Information about a bound variable.
46#[derive(Debug, Clone)]
47pub struct VariableInfo {
48    /// The name of the variable.
49    pub name: String,
50    /// The inferred type of the variable.
51    pub data_type: LogicalType,
52    /// Whether this variable is a node.
53    pub is_node: bool,
54    /// Whether this variable is an edge.
55    pub is_edge: bool,
56}
57
58/// Context containing all bound variables and their information.
59///
60/// Uses `IndexMap` to maintain insertion order without a separate `Vec`,
61/// removing redundant storage and making `remove_variable` O(n) instead of
62/// two separate O(n) operations.
63#[derive(Debug, Clone, Default)]
64pub struct BindingContext {
65    /// Map from variable name to its info, in definition order.
66    variables: IndexMap<String, VariableInfo>,
67}
68
69impl BindingContext {
70    /// Creates a new empty binding context.
71    #[must_use]
72    pub fn new() -> Self {
73        Self {
74            variables: IndexMap::new(),
75        }
76    }
77
78    /// Adds a variable to the context.
79    ///
80    /// If the variable is already defined, replaces its info but preserves its
81    /// position in definition order.
82    pub fn add_variable(&mut self, name: String, info: VariableInfo) {
83        self.variables.insert(name, info);
84    }
85
86    /// Looks up a variable by name.
87    #[must_use]
88    pub fn get(&self, name: &str) -> Option<&VariableInfo> {
89        self.variables.get(name)
90    }
91
92    /// Checks if a variable is defined.
93    #[must_use]
94    pub fn contains(&self, name: &str) -> bool {
95        self.variables.contains_key(name)
96    }
97
98    /// Returns all variable names in definition order.
99    #[must_use]
100    pub fn variable_names(&self) -> Vec<String> {
101        self.variables.keys().cloned().collect()
102    }
103
104    /// Returns the number of bound variables.
105    #[must_use]
106    pub fn len(&self) -> usize {
107        self.variables.len()
108    }
109
110    /// Returns true if no variables are bound.
111    #[must_use]
112    pub fn is_empty(&self) -> bool {
113        self.variables.is_empty()
114    }
115
116    /// Removes a variable from the context (used for temporary scoping).
117    pub fn remove_variable(&mut self, name: &str) {
118        self.variables.shift_remove(name);
119    }
120}
121
122/// Semantic binder for query plans.
123///
124/// The binder walks the logical plan and:
125/// 1. Collects all variable definitions
126/// 2. Validates that all variable references are valid
127/// 3. Infers types where possible
128/// 4. Reports semantic errors
129pub struct Binder {
130    /// The current binding context.
131    context: BindingContext,
132}
133
134impl Binder {
135    /// Creates a new binder.
136    #[must_use]
137    pub fn new() -> Self {
138        Self {
139            context: BindingContext::new(),
140        }
141    }
142
143    /// Binds a logical plan, returning the binding context.
144    ///
145    /// # Errors
146    ///
147    /// Returns an error if semantic validation fails.
148    pub fn bind(&mut self, plan: &LogicalPlan) -> Result<BindingContext> {
149        self.bind_operator(&plan.root)?;
150        Ok(self.context.clone())
151    }
152
153    /// Binds a single logical operator.
154    fn bind_operator(&mut self, op: &LogicalOperator) -> Result<()> {
155        match op {
156            LogicalOperator::NodeScan(scan) => self.bind_node_scan(scan),
157            LogicalOperator::Expand(expand) => self.bind_expand(expand),
158            LogicalOperator::Filter(filter) => self.bind_filter(filter),
159            LogicalOperator::Return(ret) => self.bind_return(ret),
160            LogicalOperator::Project(project) => {
161                self.bind_operator(&project.input)?;
162                for projection in &project.projections {
163                    self.validate_expression(&projection.expression)?;
164                    // Add the projection alias to the context (for WITH clause support)
165                    if let Some(ref alias) = projection.alias {
166                        // Determine the type from the expression
167                        let data_type = self.infer_expression_type(&projection.expression);
168                        // Propagate node/edge status when projecting a variable
169                        // or a Case that selects between node variables (used
170                        // by optional() and union() translations).
171                        let (is_node, is_edge) = self.infer_entity_status(&projection.expression);
172                        self.context.add_variable(
173                            alias.clone(),
174                            VariableInfo {
175                                name: alias.clone(),
176                                data_type,
177                                is_node,
178                                is_edge,
179                            },
180                        );
181                    }
182                }
183                Ok(())
184            }
185            LogicalOperator::Limit(limit) => self.bind_operator(&limit.input),
186            LogicalOperator::Skip(skip) => self.bind_operator(&skip.input),
187            LogicalOperator::Sort(sort) => {
188                self.bind_operator(&sort.input)?;
189                for key in &sort.keys {
190                    self.validate_expression(&key.expression)?;
191                }
192                Ok(())
193            }
194            LogicalOperator::CreateNode(create) => {
195                // CreateNode introduces a new variable
196                if let Some(ref input) = create.input {
197                    self.bind_operator(input)?;
198                }
199                self.context.add_variable(
200                    create.variable.clone(),
201                    VariableInfo {
202                        name: create.variable.clone(),
203                        data_type: LogicalType::Node,
204                        is_node: true,
205                        is_edge: false,
206                    },
207                );
208                // Validate property expressions
209                for (_, expr) in &create.properties {
210                    self.validate_expression(expr)?;
211                }
212                Ok(())
213            }
214            LogicalOperator::EdgeScan(scan) => {
215                if let Some(ref input) = scan.input {
216                    self.bind_operator(input)?;
217                }
218                self.context.add_variable(
219                    scan.variable.clone(),
220                    VariableInfo {
221                        name: scan.variable.clone(),
222                        data_type: LogicalType::Edge,
223                        is_node: false,
224                        is_edge: true,
225                    },
226                );
227                Ok(())
228            }
229            LogicalOperator::Distinct(distinct) => self.bind_operator(&distinct.input),
230            LogicalOperator::Join(join) => self.bind_join(join),
231            LogicalOperator::Aggregate(agg) => self.bind_aggregate(agg),
232            LogicalOperator::CreateEdge(create) => {
233                self.bind_operator(&create.input)?;
234                // Validate that source and target variables are defined
235                if !self.context.contains(&create.from_variable) {
236                    return Err(undefined_variable_error(
237                        &create.from_variable,
238                        &self.context,
239                        " (source in CREATE EDGE)",
240                    ));
241                }
242                if !self.context.contains(&create.to_variable) {
243                    return Err(undefined_variable_error(
244                        &create.to_variable,
245                        &self.context,
246                        " (target in CREATE EDGE)",
247                    ));
248                }
249                // Add edge variable if present
250                if let Some(ref var) = create.variable {
251                    self.context.add_variable(
252                        var.clone(),
253                        VariableInfo {
254                            name: var.clone(),
255                            data_type: LogicalType::Edge,
256                            is_node: false,
257                            is_edge: true,
258                        },
259                    );
260                }
261                // Validate property expressions
262                for (_, expr) in &create.properties {
263                    self.validate_expression(expr)?;
264                }
265                Ok(())
266            }
267            LogicalOperator::DeleteNode(delete) => {
268                self.bind_operator(&delete.input)?;
269                // Validate that the variable to delete is defined
270                if !self.context.contains(&delete.variable) {
271                    return Err(undefined_variable_error(
272                        &delete.variable,
273                        &self.context,
274                        " in DELETE",
275                    ));
276                }
277                Ok(())
278            }
279            LogicalOperator::DeleteEdge(delete) => {
280                self.bind_operator(&delete.input)?;
281                // Validate that the variable to delete is defined
282                if !self.context.contains(&delete.variable) {
283                    return Err(undefined_variable_error(
284                        &delete.variable,
285                        &self.context,
286                        " in DELETE",
287                    ));
288                }
289                Ok(())
290            }
291            LogicalOperator::SetProperty(set) => {
292                self.bind_operator(&set.input)?;
293                // Validate that the variable to update is defined
294                if !self.context.contains(&set.variable) {
295                    return Err(undefined_variable_error(
296                        &set.variable,
297                        &self.context,
298                        " in SET",
299                    ));
300                }
301                // Validate property value expressions
302                for (_, expr) in &set.properties {
303                    self.validate_expression(expr)?;
304                }
305                Ok(())
306            }
307            LogicalOperator::Empty => Ok(()),
308
309            LogicalOperator::Unwind(unwind) => {
310                // First bind the input
311                self.bind_operator(&unwind.input)?;
312                // Validate the expression being unwound
313                self.validate_expression(&unwind.expression)?;
314                // Add the new variable to the context
315                self.context.add_variable(
316                    unwind.variable.clone(),
317                    VariableInfo {
318                        name: unwind.variable.clone(),
319                        data_type: LogicalType::Any, // Unwound elements can be any type
320                        is_node: false,
321                        is_edge: false,
322                    },
323                );
324                // Add ORDINALITY variable if present (1-based index)
325                if let Some(ref ord_var) = unwind.ordinality_var {
326                    self.context.add_variable(
327                        ord_var.clone(),
328                        VariableInfo {
329                            name: ord_var.clone(),
330                            data_type: LogicalType::Int64,
331                            is_node: false,
332                            is_edge: false,
333                        },
334                    );
335                }
336                // Add OFFSET variable if present (0-based index)
337                if let Some(ref off_var) = unwind.offset_var {
338                    self.context.add_variable(
339                        off_var.clone(),
340                        VariableInfo {
341                            name: off_var.clone(),
342                            data_type: LogicalType::Int64,
343                            is_node: false,
344                            is_edge: false,
345                        },
346                    );
347                }
348                Ok(())
349            }
350
351            // RDF/SPARQL operators
352            LogicalOperator::TripleScan(scan) => self.bind_triple_scan(scan),
353            LogicalOperator::Union(union) => {
354                for input in &union.inputs {
355                    self.bind_operator(input)?;
356                }
357                Ok(())
358            }
359            LogicalOperator::LeftJoin(lj) => {
360                self.bind_operator(&lj.left)?;
361                self.bind_operator(&lj.right)?;
362                if let Some(ref cond) = lj.condition {
363                    self.validate_expression(cond)?;
364                }
365                Ok(())
366            }
367            LogicalOperator::AntiJoin(aj) => {
368                self.bind_operator(&aj.left)?;
369                self.bind_operator(&aj.right)?;
370                Ok(())
371            }
372            LogicalOperator::Bind(bind) => {
373                self.bind_operator(&bind.input)?;
374                self.validate_expression(&bind.expression)?;
375                self.context.add_variable(
376                    bind.variable.clone(),
377                    VariableInfo {
378                        name: bind.variable.clone(),
379                        data_type: LogicalType::Any,
380                        is_node: false,
381                        is_edge: false,
382                    },
383                );
384                Ok(())
385            }
386            LogicalOperator::Merge(merge) => {
387                // First bind the input
388                self.bind_operator(&merge.input)?;
389                // Validate the match property expressions
390                for (_, expr) in &merge.match_properties {
391                    self.validate_expression(expr)?;
392                }
393                // Validate the ON CREATE property expressions
394                for (_, expr) in &merge.on_create {
395                    self.validate_expression(expr)?;
396                }
397                // Validate the ON MATCH property expressions
398                for (_, expr) in &merge.on_match {
399                    self.validate_expression(expr)?;
400                }
401                // MERGE introduces a new variable
402                self.context.add_variable(
403                    merge.variable.clone(),
404                    VariableInfo {
405                        name: merge.variable.clone(),
406                        data_type: LogicalType::Node,
407                        is_node: true,
408                        is_edge: false,
409                    },
410                );
411                Ok(())
412            }
413            LogicalOperator::MergeRelationship(merge_rel) => {
414                self.bind_operator(&merge_rel.input)?;
415                // Validate source and target variables exist
416                if !self.context.contains(&merge_rel.source_variable) {
417                    return Err(undefined_variable_error(
418                        &merge_rel.source_variable,
419                        &self.context,
420                        " in MERGE relationship source",
421                    ));
422                }
423                if !self.context.contains(&merge_rel.target_variable) {
424                    return Err(undefined_variable_error(
425                        &merge_rel.target_variable,
426                        &self.context,
427                        " in MERGE relationship target",
428                    ));
429                }
430                for (_, expr) in &merge_rel.match_properties {
431                    self.validate_expression(expr)?;
432                }
433                for (_, expr) in &merge_rel.on_create {
434                    self.validate_expression(expr)?;
435                }
436                for (_, expr) in &merge_rel.on_match {
437                    self.validate_expression(expr)?;
438                }
439                // MERGE relationship introduces the edge variable
440                self.context.add_variable(
441                    merge_rel.variable.clone(),
442                    VariableInfo {
443                        name: merge_rel.variable.clone(),
444                        data_type: LogicalType::Edge,
445                        is_node: false,
446                        is_edge: true,
447                    },
448                );
449                Ok(())
450            }
451            LogicalOperator::AddLabel(add_label) => {
452                self.bind_operator(&add_label.input)?;
453                // Validate that the variable exists
454                if !self.context.contains(&add_label.variable) {
455                    return Err(undefined_variable_error(
456                        &add_label.variable,
457                        &self.context,
458                        " in SET labels",
459                    ));
460                }
461                Ok(())
462            }
463            LogicalOperator::RemoveLabel(remove_label) => {
464                self.bind_operator(&remove_label.input)?;
465                // Validate that the variable exists
466                if !self.context.contains(&remove_label.variable) {
467                    return Err(undefined_variable_error(
468                        &remove_label.variable,
469                        &self.context,
470                        " in REMOVE labels",
471                    ));
472                }
473                Ok(())
474            }
475            LogicalOperator::ShortestPath(sp) => {
476                // First bind the input
477                self.bind_operator(&sp.input)?;
478                // Validate that source and target variables are defined
479                if !self.context.contains(&sp.source_var) {
480                    return Err(undefined_variable_error(
481                        &sp.source_var,
482                        &self.context,
483                        " (source in shortestPath)",
484                    ));
485                }
486                if !self.context.contains(&sp.target_var) {
487                    return Err(undefined_variable_error(
488                        &sp.target_var,
489                        &self.context,
490                        " (target in shortestPath)",
491                    ));
492                }
493                // Add the path alias variable to the context
494                self.context.add_variable(
495                    sp.path_alias.clone(),
496                    VariableInfo {
497                        name: sp.path_alias.clone(),
498                        data_type: LogicalType::Any, // Path is a complex type
499                        is_node: false,
500                        is_edge: false,
501                    },
502                );
503                // Also add the path length variable for length(p) calls
504                let path_length_var = format!("_path_length_{}", sp.path_alias);
505                self.context.add_variable(
506                    path_length_var.clone(),
507                    VariableInfo {
508                        name: path_length_var,
509                        data_type: LogicalType::Int64,
510                        is_node: false,
511                        is_edge: false,
512                    },
513                );
514                Ok(())
515            }
516            // SPARQL Update operators - these don't require variable binding
517            LogicalOperator::InsertTriple(insert) => {
518                if let Some(ref input) = insert.input {
519                    self.bind_operator(input)?;
520                }
521                Ok(())
522            }
523            LogicalOperator::DeleteTriple(delete) => {
524                if let Some(ref input) = delete.input {
525                    self.bind_operator(input)?;
526                }
527                Ok(())
528            }
529            LogicalOperator::Modify(modify) => {
530                self.bind_operator(&modify.where_clause)?;
531                Ok(())
532            }
533            LogicalOperator::ClearGraph(_)
534            | LogicalOperator::CreateGraph(_)
535            | LogicalOperator::DropGraph(_)
536            | LogicalOperator::LoadGraph(_)
537            | LogicalOperator::CopyGraph(_)
538            | LogicalOperator::MoveGraph(_)
539            | LogicalOperator::AddGraph(_)
540            | LogicalOperator::HorizontalAggregate(_) => Ok(()),
541            LogicalOperator::VectorScan(scan) => {
542                // VectorScan introduces a variable for matched nodes
543                if let Some(ref input) = scan.input {
544                    self.bind_operator(input)?;
545                }
546                self.context.add_variable(
547                    scan.variable.clone(),
548                    VariableInfo {
549                        name: scan.variable.clone(),
550                        data_type: LogicalType::Node,
551                        is_node: true,
552                        is_edge: false,
553                    },
554                );
555                // Validate the query vector expression
556                self.validate_expression(&scan.query_vector)?;
557                Ok(())
558            }
559            LogicalOperator::VectorJoin(join) => {
560                // VectorJoin takes input from left side and produces right-side matches
561                self.bind_operator(&join.input)?;
562                // Add right variable for matched nodes
563                self.context.add_variable(
564                    join.right_variable.clone(),
565                    VariableInfo {
566                        name: join.right_variable.clone(),
567                        data_type: LogicalType::Node,
568                        is_node: true,
569                        is_edge: false,
570                    },
571                );
572                // Optionally add score variable
573                if let Some(ref score_var) = join.score_variable {
574                    self.context.add_variable(
575                        score_var.clone(),
576                        VariableInfo {
577                            name: score_var.clone(),
578                            data_type: LogicalType::Float64,
579                            is_node: false,
580                            is_edge: false,
581                        },
582                    );
583                }
584                // Validate the query vector expression
585                self.validate_expression(&join.query_vector)?;
586                Ok(())
587            }
588            LogicalOperator::MapCollect(mc) => {
589                self.bind_operator(&mc.input)?;
590                self.context.add_variable(
591                    mc.alias.clone(),
592                    VariableInfo {
593                        name: mc.alias.clone(),
594                        data_type: LogicalType::Any,
595                        is_node: false,
596                        is_edge: false,
597                    },
598                );
599                Ok(())
600            }
601            LogicalOperator::Except(except) => {
602                self.bind_operator(&except.left)?;
603                self.bind_operator(&except.right)?;
604                Ok(())
605            }
606            LogicalOperator::Intersect(intersect) => {
607                self.bind_operator(&intersect.left)?;
608                self.bind_operator(&intersect.right)?;
609                Ok(())
610            }
611            LogicalOperator::Otherwise(otherwise) => {
612                self.bind_operator(&otherwise.left)?;
613                self.bind_operator(&otherwise.right)?;
614                Ok(())
615            }
616            LogicalOperator::Apply(apply) => {
617                // Snapshot context BEFORE binding the input, so we can detect
618                // which variables were added by the input plan.
619                let pre_apply_names: HashSet<String> =
620                    self.context.variable_names().iter().cloned().collect();
621
622                self.bind_operator(&apply.input)?;
623
624                // Scope down: when the input plan exposes a Return/Aggregate
625                // projection (not a raw scan/expand), remove its internal-only
626                // variables. Only the projected output columns should be visible
627                // to the subplan — this prevents variables internal to a sibling
628                // CALL block from leaking into the next CALL block.
629                let mut input_output_ctx = BindingContext::new();
630                Self::register_subplan_columns(&apply.input, &mut input_output_ctx);
631                let input_output_names: HashSet<String> =
632                    input_output_ctx.variable_names().iter().cloned().collect();
633
634                if !input_output_names.is_empty() {
635                    // Input has an explicit projection: remove its internals.
636                    let input_internals: Vec<String> = self
637                        .context
638                        .variable_names()
639                        .iter()
640                        .filter(|n| {
641                            !pre_apply_names.contains(*n) && !input_output_names.contains(*n)
642                        })
643                        .cloned()
644                        .collect();
645                    for name in input_internals {
646                        self.context.remove_variable(&name);
647                    }
648                }
649
650                // Snapshot the permitted outer context for the subplan.
651                let outer_names: HashSet<String> =
652                    self.context.variable_names().iter().cloned().collect();
653
654                self.bind_operator(&apply.subplan)?;
655
656                // Remove internal-only variables added by the subplan (those that
657                // are not output columns). Prevents subplan internals from leaking
658                // into the outer query or sibling CALL blocks.
659                let mut subplan_output_ctx = BindingContext::new();
660                Self::register_subplan_columns(&apply.subplan, &mut subplan_output_ctx);
661                let subplan_output_names: HashSet<String> = subplan_output_ctx
662                    .variable_names()
663                    .iter()
664                    .cloned()
665                    .collect();
666
667                let to_remove: Vec<String> = self
668                    .context
669                    .variable_names()
670                    .iter()
671                    .filter(|n| !outer_names.contains(*n) && !subplan_output_names.contains(*n))
672                    .cloned()
673                    .collect();
674                for name in to_remove {
675                    self.context.remove_variable(&name);
676                }
677
678                // Register output columns so downstream operators can reference them.
679                Self::register_subplan_columns(&apply.subplan, &mut self.context);
680                Ok(())
681            }
682            LogicalOperator::MultiWayJoin(mwj) => {
683                for input in &mwj.inputs {
684                    self.bind_operator(input)?;
685                }
686                for cond in &mwj.conditions {
687                    self.validate_expression(&cond.left)?;
688                    self.validate_expression(&cond.right)?;
689                }
690                Ok(())
691            }
692            LogicalOperator::ParameterScan(param_scan) => {
693                // Register parameter columns as variables (injected by outer Apply)
694                for col in &param_scan.columns {
695                    self.context.add_variable(
696                        col.clone(),
697                        VariableInfo {
698                            name: col.clone(),
699                            data_type: LogicalType::Any,
700                            is_node: true,
701                            is_edge: false,
702                        },
703                    );
704                }
705                Ok(())
706            }
707            // DDL operators don't need binding: they're handled before the binder
708            LogicalOperator::CreatePropertyGraph(_) => Ok(()),
709            // Procedure calls: register yielded columns as variables for downstream operators
710            LogicalOperator::CallProcedure(call) => {
711                if let Some(yields) = &call.yield_items {
712                    for item in yields {
713                        let var_name = item.alias.as_deref().unwrap_or(&item.field_name);
714                        self.context.add_variable(
715                            var_name.to_string(),
716                            VariableInfo {
717                                name: var_name.to_string(),
718                                data_type: LogicalType::Any,
719                                is_node: false,
720                                is_edge: false,
721                            },
722                        );
723                    }
724                }
725                Ok(())
726            }
727            LogicalOperator::LoadData(load) => {
728                // The row variable is bound as Any (Map or List depending on WITH HEADERS)
729                self.context.add_variable(
730                    load.variable.clone(),
731                    VariableInfo {
732                        name: load.variable.clone(),
733                        data_type: LogicalType::Any,
734                        is_node: false,
735                        is_edge: false,
736                    },
737                );
738                Ok(())
739            }
740        }
741    }
742
743    /// Binds a triple scan operator (for RDF/SPARQL).
744    fn bind_triple_scan(&mut self, scan: &TripleScanOp) -> Result<()> {
745        use crate::query::plan::TripleComponent;
746
747        // First bind the input if present
748        if let Some(ref input) = scan.input {
749            self.bind_operator(input)?;
750        }
751
752        // Add variables for subject, predicate, object
753        if let TripleComponent::Variable(name) = &scan.subject
754            && !self.context.contains(name)
755        {
756            self.context.add_variable(
757                name.clone(),
758                VariableInfo {
759                    name: name.clone(),
760                    data_type: LogicalType::Any, // RDF term
761                    is_node: false,
762                    is_edge: false,
763                },
764            );
765        }
766
767        if let TripleComponent::Variable(name) = &scan.predicate
768            && !self.context.contains(name)
769        {
770            self.context.add_variable(
771                name.clone(),
772                VariableInfo {
773                    name: name.clone(),
774                    data_type: LogicalType::Any, // IRI
775                    is_node: false,
776                    is_edge: false,
777                },
778            );
779        }
780
781        if let TripleComponent::Variable(name) = &scan.object
782            && !self.context.contains(name)
783        {
784            self.context.add_variable(
785                name.clone(),
786                VariableInfo {
787                    name: name.clone(),
788                    data_type: LogicalType::Any, // RDF term
789                    is_node: false,
790                    is_edge: false,
791                },
792            );
793        }
794
795        if let Some(TripleComponent::Variable(name)) = &scan.graph
796            && !self.context.contains(name)
797        {
798            self.context.add_variable(
799                name.clone(),
800                VariableInfo {
801                    name: name.clone(),
802                    data_type: LogicalType::Any, // IRI
803                    is_node: false,
804                    is_edge: false,
805                },
806            );
807        }
808
809        Ok(())
810    }
811
812    /// Binds a node scan operator.
813    fn bind_node_scan(&mut self, scan: &NodeScanOp) -> Result<()> {
814        // First bind the input if present
815        if let Some(ref input) = scan.input {
816            self.bind_operator(input)?;
817        }
818
819        // Add the scanned variable to scope
820        self.context.add_variable(
821            scan.variable.clone(),
822            VariableInfo {
823                name: scan.variable.clone(),
824                data_type: LogicalType::Node,
825                is_node: true,
826                is_edge: false,
827            },
828        );
829
830        Ok(())
831    }
832
833    /// Binds an expand operator.
834    fn bind_expand(&mut self, expand: &ExpandOp) -> Result<()> {
835        // First bind the input
836        self.bind_operator(&expand.input)?;
837
838        // Validate that the source variable is defined
839        if !self.context.contains(&expand.from_variable) {
840            return Err(undefined_variable_error(
841                &expand.from_variable,
842                &self.context,
843                " in EXPAND",
844            ));
845        }
846
847        // Validate that the source is a node
848        if let Some(info) = self.context.get(&expand.from_variable)
849            && !info.is_node
850        {
851            return Err(binding_error(format!(
852                "Variable '{}' is not a node, cannot expand from it",
853                expand.from_variable
854            )));
855        }
856
857        // Add edge variable if present
858        if let Some(ref edge_var) = expand.edge_variable {
859            self.context.add_variable(
860                edge_var.clone(),
861                VariableInfo {
862                    name: edge_var.clone(),
863                    data_type: LogicalType::Edge,
864                    is_node: false,
865                    is_edge: true,
866                },
867            );
868        }
869
870        // Add target variable
871        self.context.add_variable(
872            expand.to_variable.clone(),
873            VariableInfo {
874                name: expand.to_variable.clone(),
875                data_type: LogicalType::Node,
876                is_node: true,
877                is_edge: false,
878            },
879        );
880
881        // Add path variables for variable-length paths
882        if let Some(ref path_alias) = expand.path_alias {
883            // Register the path variable itself (e.g. p in MATCH p=...)
884            self.context.add_variable(
885                path_alias.clone(),
886                VariableInfo {
887                    name: path_alias.clone(),
888                    data_type: LogicalType::Any,
889                    is_node: false,
890                    is_edge: false,
891                },
892            );
893            // length(p) → _path_length_p
894            let path_length_var = format!("_path_length_{}", path_alias);
895            self.context.add_variable(
896                path_length_var.clone(),
897                VariableInfo {
898                    name: path_length_var,
899                    data_type: LogicalType::Int64,
900                    is_node: false,
901                    is_edge: false,
902                },
903            );
904            // nodes(p) → _path_nodes_p
905            let path_nodes_var = format!("_path_nodes_{}", path_alias);
906            self.context.add_variable(
907                path_nodes_var.clone(),
908                VariableInfo {
909                    name: path_nodes_var,
910                    data_type: LogicalType::Any,
911                    is_node: false,
912                    is_edge: false,
913                },
914            );
915            // edges(p) → _path_edges_p
916            let path_edges_var = format!("_path_edges_{}", path_alias);
917            self.context.add_variable(
918                path_edges_var.clone(),
919                VariableInfo {
920                    name: path_edges_var,
921                    data_type: LogicalType::Any,
922                    is_node: false,
923                    is_edge: false,
924                },
925            );
926        }
927
928        Ok(())
929    }
930
931    /// Binds a filter operator.
932    fn bind_filter(&mut self, filter: &FilterOp) -> Result<()> {
933        // First bind the input
934        self.bind_operator(&filter.input)?;
935
936        // Validate the predicate expression
937        self.validate_expression(&filter.predicate)?;
938
939        Ok(())
940    }
941
942    /// Registers output columns from a subplan into the binding context.
943    /// Walks through wrapping operators to find a Return and extracts column names.
944    fn register_subplan_columns(plan: &LogicalOperator, ctx: &mut BindingContext) {
945        match plan {
946            LogicalOperator::Return(ret) => {
947                for item in &ret.items {
948                    let col_name = if let Some(alias) = &item.alias {
949                        alias.clone()
950                    } else {
951                        match &item.expression {
952                            LogicalExpression::Variable(name) => name.clone(),
953                            LogicalExpression::Property { variable, property } => {
954                                format!("{variable}.{property}")
955                            }
956                            _ => continue,
957                        }
958                    };
959                    ctx.add_variable(
960                        col_name.clone(),
961                        VariableInfo {
962                            name: col_name,
963                            data_type: LogicalType::Any,
964                            is_node: false,
965                            is_edge: false,
966                        },
967                    );
968                }
969            }
970            LogicalOperator::Sort(s) => Self::register_subplan_columns(&s.input, ctx),
971            LogicalOperator::Limit(l) => Self::register_subplan_columns(&l.input, ctx),
972            LogicalOperator::Distinct(d) => Self::register_subplan_columns(&d.input, ctx),
973            LogicalOperator::Aggregate(agg) => {
974                // Aggregate produces named output columns
975                for expr in &agg.aggregates {
976                    if let Some(alias) = &expr.alias {
977                        ctx.add_variable(
978                            alias.clone(),
979                            VariableInfo {
980                                name: alias.clone(),
981                                data_type: LogicalType::Any,
982                                is_node: false,
983                                is_edge: false,
984                            },
985                        );
986                    }
987                }
988            }
989            _ => {}
990        }
991    }
992
993    /// Binds a return operator.
994    fn bind_return(&mut self, ret: &ReturnOp) -> Result<()> {
995        // First bind the input
996        self.bind_operator(&ret.input)?;
997
998        // Validate all return expressions and register aliases
999        // (aliases must be visible to parent Sort for ORDER BY resolution)
1000        for item in &ret.items {
1001            self.validate_return_item(item)?;
1002            if let Some(ref alias) = item.alias {
1003                let data_type = self.infer_expression_type(&item.expression);
1004                self.context.add_variable(
1005                    alias.clone(),
1006                    VariableInfo {
1007                        name: alias.clone(),
1008                        data_type,
1009                        is_node: false,
1010                        is_edge: false,
1011                    },
1012                );
1013            }
1014        }
1015
1016        Ok(())
1017    }
1018
1019    /// Validates a return item.
1020    fn validate_return_item(&mut self, item: &ReturnItem) -> Result<()> {
1021        self.validate_expression(&item.expression)
1022    }
1023
1024    /// Validates that an expression only references defined variables.
1025    fn validate_expression(&mut self, expr: &LogicalExpression) -> Result<()> {
1026        match expr {
1027            LogicalExpression::Variable(name) => {
1028                // "*" is a wildcard marker for RETURN *, expanded by the planner
1029                if name == "*" {
1030                    return Ok(());
1031                }
1032                if !self.context.contains(name) && !name.starts_with("_anon_") {
1033                    return Err(undefined_variable_error(name, &self.context, ""));
1034                }
1035                Ok(())
1036            }
1037            LogicalExpression::Property { variable, .. } => {
1038                if !self.context.contains(variable) && !variable.starts_with("_anon_") {
1039                    return Err(undefined_variable_error(
1040                        variable,
1041                        &self.context,
1042                        " in property access",
1043                    ));
1044                }
1045                Ok(())
1046            }
1047            LogicalExpression::Literal(_) => Ok(()),
1048            LogicalExpression::Binary { left, right, .. } => {
1049                self.validate_expression(left)?;
1050                self.validate_expression(right)
1051            }
1052            LogicalExpression::Unary { operand, .. } => self.validate_expression(operand),
1053            LogicalExpression::FunctionCall { args, .. } => {
1054                for arg in args {
1055                    self.validate_expression(arg)?;
1056                }
1057                Ok(())
1058            }
1059            LogicalExpression::List(items) => {
1060                for item in items {
1061                    self.validate_expression(item)?;
1062                }
1063                Ok(())
1064            }
1065            LogicalExpression::Map(pairs) => {
1066                for (_, value) in pairs {
1067                    self.validate_expression(value)?;
1068                }
1069                Ok(())
1070            }
1071            LogicalExpression::IndexAccess { base, index } => {
1072                self.validate_expression(base)?;
1073                self.validate_expression(index)
1074            }
1075            LogicalExpression::SliceAccess { base, start, end } => {
1076                self.validate_expression(base)?;
1077                if let Some(s) = start {
1078                    self.validate_expression(s)?;
1079                }
1080                if let Some(e) = end {
1081                    self.validate_expression(e)?;
1082                }
1083                Ok(())
1084            }
1085            LogicalExpression::Case {
1086                operand,
1087                when_clauses,
1088                else_clause,
1089            } => {
1090                if let Some(op) = operand {
1091                    self.validate_expression(op)?;
1092                }
1093                for (cond, result) in when_clauses {
1094                    self.validate_expression(cond)?;
1095                    self.validate_expression(result)?;
1096                }
1097                if let Some(else_expr) = else_clause {
1098                    self.validate_expression(else_expr)?;
1099                }
1100                Ok(())
1101            }
1102            // Parameter references are validated externally
1103            LogicalExpression::Parameter(_) => Ok(()),
1104            // labels(n), type(e), id(n) need the variable to be defined
1105            LogicalExpression::Labels(var)
1106            | LogicalExpression::Type(var)
1107            | LogicalExpression::Id(var) => {
1108                if !self.context.contains(var) && !var.starts_with("_anon_") {
1109                    return Err(undefined_variable_error(var, &self.context, " in function"));
1110                }
1111                Ok(())
1112            }
1113            LogicalExpression::ListComprehension { list_expr, .. } => {
1114                // Validate the list expression against the outer context.
1115                // The filter and map expressions use the iteration variable
1116                // which is locally scoped, so we skip validating them here.
1117                self.validate_expression(list_expr)?;
1118                Ok(())
1119            }
1120            LogicalExpression::ListPredicate { list_expr, .. } => {
1121                // Validate the list expression against the outer context.
1122                // The predicate uses the iteration variable which is locally
1123                // scoped, so we skip validating it against the outer context.
1124                self.validate_expression(list_expr)?;
1125                Ok(())
1126            }
1127            LogicalExpression::ExistsSubquery(subquery)
1128            | LogicalExpression::CountSubquery(subquery)
1129            | LogicalExpression::ValueSubquery(subquery) => {
1130                // Subqueries have their own binding context
1131                // For now, just validate the structure exists
1132                let _ = subquery; // Would need recursive binding
1133                Ok(())
1134            }
1135            LogicalExpression::PatternComprehension {
1136                subplan,
1137                projection,
1138            } => {
1139                // Bind the subplan to register pattern variables (e.g., `f` in `(p)-[:KNOWS]->(f)`)
1140                self.bind_operator(subplan)?;
1141                // Now validate the projection expression (e.g., `f.name`)
1142                self.validate_expression(projection)
1143            }
1144            LogicalExpression::MapProjection { base, entries } => {
1145                if !self.context.contains(base) && !base.starts_with("_anon_") {
1146                    return Err(undefined_variable_error(
1147                        base,
1148                        &self.context,
1149                        " in map projection",
1150                    ));
1151                }
1152                for entry in entries {
1153                    if let crate::query::plan::MapProjectionEntry::LiteralEntry(_, expr) = entry {
1154                        self.validate_expression(expr)?;
1155                    }
1156                }
1157                Ok(())
1158            }
1159            LogicalExpression::Reduce {
1160                accumulator,
1161                initial,
1162                variable,
1163                list,
1164                expression,
1165            } => {
1166                self.validate_expression(initial)?;
1167                self.validate_expression(list)?;
1168                // accumulator and variable are locally scoped: inject them
1169                // into context, validate body, then remove
1170                let had_acc = self.context.contains(accumulator);
1171                let had_var = self.context.contains(variable);
1172                if !had_acc {
1173                    self.context.add_variable(
1174                        accumulator.clone(),
1175                        VariableInfo {
1176                            name: accumulator.clone(),
1177                            data_type: LogicalType::Any,
1178                            is_node: false,
1179                            is_edge: false,
1180                        },
1181                    );
1182                }
1183                if !had_var {
1184                    self.context.add_variable(
1185                        variable.clone(),
1186                        VariableInfo {
1187                            name: variable.clone(),
1188                            data_type: LogicalType::Any,
1189                            is_node: false,
1190                            is_edge: false,
1191                        },
1192                    );
1193                }
1194                self.validate_expression(expression)?;
1195                if !had_acc {
1196                    self.context.remove_variable(accumulator);
1197                }
1198                if !had_var {
1199                    self.context.remove_variable(variable);
1200                }
1201                Ok(())
1202            }
1203        }
1204    }
1205
1206    /// Infers the type of an expression for use in WITH clause aliasing.
1207    fn infer_expression_type(&self, expr: &LogicalExpression) -> LogicalType {
1208        match expr {
1209            LogicalExpression::Variable(name) => {
1210                // Look up the variable type from context
1211                self.context
1212                    .get(name)
1213                    .map_or(LogicalType::Any, |info| info.data_type.clone())
1214            }
1215            LogicalExpression::Property { .. } => LogicalType::Any, // Properties can be any type
1216            LogicalExpression::Literal(value) => {
1217                // Infer type from literal value
1218                use grafeo_common::types::Value;
1219                match value {
1220                    Value::Bool(_) => LogicalType::Bool,
1221                    Value::Int64(_) => LogicalType::Int64,
1222                    Value::Float64(_) => LogicalType::Float64,
1223                    Value::String(_) => LogicalType::String,
1224                    Value::List(_) => LogicalType::Any, // Complex type
1225                    Value::Map(_) => LogicalType::Any,  // Complex type
1226                    Value::Null => LogicalType::Any,
1227                    _ => LogicalType::Any,
1228                }
1229            }
1230            LogicalExpression::Binary { .. } => LogicalType::Any, // Could be bool or numeric
1231            LogicalExpression::Unary { .. } => LogicalType::Any,
1232            LogicalExpression::FunctionCall { name, .. } => {
1233                // Infer based on function name
1234                match name.to_lowercase().as_str() {
1235                    "count" | "sum" | "id" => LogicalType::Int64,
1236                    "avg" => LogicalType::Float64,
1237                    "type" => LogicalType::String,
1238                    // List-returning functions use Any since we don't track element type
1239                    "labels" | "collect" => LogicalType::Any,
1240                    _ => LogicalType::Any,
1241                }
1242            }
1243            LogicalExpression::List(_) => LogicalType::Any, // Complex type
1244            LogicalExpression::Map(_) => LogicalType::Any,  // Complex type
1245            _ => LogicalType::Any,
1246        }
1247    }
1248
1249    /// Infers whether an expression resolves to a node or edge entity.
1250    ///
1251    /// Returns `(is_node, is_edge)`. This propagates entity status through
1252    /// simple Variable references and Case expressions whose branches all
1253    /// agree on entity kind (used by optional() translation).
1254    fn infer_entity_status(&self, expr: &LogicalExpression) -> (bool, bool) {
1255        match expr {
1256            LogicalExpression::Variable(src) => self
1257                .context
1258                .get(src)
1259                .map_or((false, false), |info| (info.is_node, info.is_edge)),
1260            LogicalExpression::Case {
1261                when_clauses,
1262                else_clause,
1263                ..
1264            } => {
1265                // Collect entity status from all THEN and ELSE branches
1266                let mut all_node = true;
1267                let mut all_edge = true;
1268                let mut any_branch = false;
1269                for (_, then_expr) in when_clauses {
1270                    let (n, e) = self.infer_entity_status(then_expr);
1271                    all_node &= n;
1272                    all_edge &= e;
1273                    any_branch = true;
1274                }
1275                if let Some(else_expr) = else_clause {
1276                    let (n, e) = self.infer_entity_status(else_expr);
1277                    all_node &= n;
1278                    all_edge &= e;
1279                    any_branch = true;
1280                }
1281                if any_branch {
1282                    (all_node, all_edge)
1283                } else {
1284                    (false, false)
1285                }
1286            }
1287            _ => (false, false),
1288        }
1289    }
1290
1291    /// Binds a join operator.
1292    fn bind_join(&mut self, join: &crate::query::plan::JoinOp) -> Result<()> {
1293        // Bind both sides of the join
1294        self.bind_operator(&join.left)?;
1295        self.bind_operator(&join.right)?;
1296
1297        // Validate join conditions
1298        for condition in &join.conditions {
1299            self.validate_expression(&condition.left)?;
1300            self.validate_expression(&condition.right)?;
1301        }
1302
1303        Ok(())
1304    }
1305
1306    /// Binds an aggregate operator.
1307    fn bind_aggregate(&mut self, agg: &crate::query::plan::AggregateOp) -> Result<()> {
1308        // Bind the input first
1309        self.bind_operator(&agg.input)?;
1310
1311        // Validate group by expressions
1312        for expr in &agg.group_by {
1313            self.validate_expression(expr)?;
1314        }
1315
1316        // Validate aggregate expressions
1317        for agg_expr in &agg.aggregates {
1318            if let Some(ref expr) = agg_expr.expression {
1319                self.validate_expression(expr)?;
1320            }
1321            // Add the alias as a new variable if present
1322            if let Some(ref alias) = agg_expr.alias {
1323                self.context.add_variable(
1324                    alias.clone(),
1325                    VariableInfo {
1326                        name: alias.clone(),
1327                        data_type: LogicalType::Any,
1328                        is_node: false,
1329                        is_edge: false,
1330                    },
1331                );
1332            }
1333        }
1334
1335        // Register group-by output column names so ORDER BY / HAVING
1336        // can reference them (e.g. "n.city" from Property(n, city)).
1337        for expr in &agg.group_by {
1338            let col_name = crate::query::planner::common::expression_to_string(expr);
1339            if !self.context.contains(&col_name) {
1340                self.context.add_variable(
1341                    col_name.clone(),
1342                    VariableInfo {
1343                        name: col_name,
1344                        data_type: LogicalType::Any,
1345                        is_node: false,
1346                        is_edge: false,
1347                    },
1348                );
1349            }
1350        }
1351
1352        Ok(())
1353    }
1354}
1355
1356impl Default for Binder {
1357    fn default() -> Self {
1358        Self::new()
1359    }
1360}
1361
1362#[cfg(test)]
1363mod tests {
1364    use super::*;
1365    use crate::query::plan::{BinaryOp, FilterOp};
1366
1367    #[test]
1368    fn test_bind_simple_scan() {
1369        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1370            items: vec![ReturnItem {
1371                expression: LogicalExpression::Variable("n".to_string()),
1372                alias: None,
1373            }],
1374            distinct: false,
1375            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1376                variable: "n".to_string(),
1377                label: Some("Person".to_string()),
1378                input: None,
1379            })),
1380        }));
1381
1382        let mut binder = Binder::new();
1383        let result = binder.bind(&plan);
1384
1385        assert!(result.is_ok());
1386        let ctx = result.unwrap();
1387        assert!(ctx.contains("n"));
1388        assert!(ctx.get("n").unwrap().is_node);
1389    }
1390
1391    #[test]
1392    fn test_bind_undefined_variable() {
1393        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1394            items: vec![ReturnItem {
1395                expression: LogicalExpression::Variable("undefined".to_string()),
1396                alias: None,
1397            }],
1398            distinct: false,
1399            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1400                variable: "n".to_string(),
1401                label: None,
1402                input: None,
1403            })),
1404        }));
1405
1406        let mut binder = Binder::new();
1407        let result = binder.bind(&plan);
1408
1409        assert!(result.is_err());
1410        let err = result.unwrap_err();
1411        assert!(err.to_string().contains("Undefined variable"));
1412    }
1413
1414    #[test]
1415    fn test_bind_property_access() {
1416        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1417            items: vec![ReturnItem {
1418                expression: LogicalExpression::Property {
1419                    variable: "n".to_string(),
1420                    property: "name".to_string(),
1421                },
1422                alias: None,
1423            }],
1424            distinct: false,
1425            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1426                variable: "n".to_string(),
1427                label: Some("Person".to_string()),
1428                input: None,
1429            })),
1430        }));
1431
1432        let mut binder = Binder::new();
1433        let result = binder.bind(&plan);
1434
1435        assert!(result.is_ok());
1436    }
1437
1438    #[test]
1439    fn test_bind_filter_with_undefined_variable() {
1440        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1441            items: vec![ReturnItem {
1442                expression: LogicalExpression::Variable("n".to_string()),
1443                alias: None,
1444            }],
1445            distinct: false,
1446            input: Box::new(LogicalOperator::Filter(FilterOp {
1447                predicate: LogicalExpression::Binary {
1448                    left: Box::new(LogicalExpression::Property {
1449                        variable: "m".to_string(), // undefined!
1450                        property: "age".to_string(),
1451                    }),
1452                    op: BinaryOp::Gt,
1453                    right: Box::new(LogicalExpression::Literal(
1454                        grafeo_common::types::Value::Int64(30),
1455                    )),
1456                },
1457                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1458                    variable: "n".to_string(),
1459                    label: None,
1460                    input: None,
1461                })),
1462                pushdown_hint: None,
1463            })),
1464        }));
1465
1466        let mut binder = Binder::new();
1467        let result = binder.bind(&plan);
1468
1469        assert!(result.is_err());
1470        let err = result.unwrap_err();
1471        assert!(err.to_string().contains("Undefined variable 'm'"));
1472    }
1473
1474    #[test]
1475    fn test_bind_expand() {
1476        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode};
1477
1478        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1479            items: vec![
1480                ReturnItem {
1481                    expression: LogicalExpression::Variable("a".to_string()),
1482                    alias: None,
1483                },
1484                ReturnItem {
1485                    expression: LogicalExpression::Variable("b".to_string()),
1486                    alias: None,
1487                },
1488            ],
1489            distinct: false,
1490            input: Box::new(LogicalOperator::Expand(ExpandOp {
1491                from_variable: "a".to_string(),
1492                to_variable: "b".to_string(),
1493                edge_variable: Some("e".to_string()),
1494                direction: ExpandDirection::Outgoing,
1495                edge_types: vec!["KNOWS".to_string()],
1496                min_hops: 1,
1497                max_hops: Some(1),
1498                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1499                    variable: "a".to_string(),
1500                    label: Some("Person".to_string()),
1501                    input: None,
1502                })),
1503                path_alias: None,
1504                path_mode: PathMode::Walk,
1505            })),
1506        }));
1507
1508        let mut binder = Binder::new();
1509        let result = binder.bind(&plan);
1510
1511        assert!(result.is_ok());
1512        let ctx = result.unwrap();
1513        assert!(ctx.contains("a"));
1514        assert!(ctx.contains("b"));
1515        assert!(ctx.contains("e"));
1516        assert!(ctx.get("a").unwrap().is_node);
1517        assert!(ctx.get("b").unwrap().is_node);
1518        assert!(ctx.get("e").unwrap().is_edge);
1519    }
1520
1521    #[test]
1522    fn test_bind_expand_from_undefined_variable() {
1523        // Tests that expanding from an undefined variable produces a clear error
1524        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode};
1525
1526        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1527            items: vec![ReturnItem {
1528                expression: LogicalExpression::Variable("b".to_string()),
1529                alias: None,
1530            }],
1531            distinct: false,
1532            input: Box::new(LogicalOperator::Expand(ExpandOp {
1533                from_variable: "undefined".to_string(), // not defined!
1534                to_variable: "b".to_string(),
1535                edge_variable: None,
1536                direction: ExpandDirection::Outgoing,
1537                edge_types: vec![],
1538                min_hops: 1,
1539                max_hops: Some(1),
1540                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1541                    variable: "a".to_string(),
1542                    label: None,
1543                    input: None,
1544                })),
1545                path_alias: None,
1546                path_mode: PathMode::Walk,
1547            })),
1548        }));
1549
1550        let mut binder = Binder::new();
1551        let result = binder.bind(&plan);
1552
1553        assert!(result.is_err());
1554        let err = result.unwrap_err();
1555        assert!(
1556            err.to_string().contains("Undefined variable 'undefined'"),
1557            "Expected error about undefined variable, got: {}",
1558            err
1559        );
1560    }
1561
1562    #[test]
1563    fn test_bind_return_with_aggregate_and_non_aggregate() {
1564        // Tests binding of aggregate functions alongside regular expressions
1565        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1566            items: vec![
1567                ReturnItem {
1568                    expression: LogicalExpression::FunctionCall {
1569                        name: "count".to_string(),
1570                        args: vec![LogicalExpression::Variable("n".to_string())],
1571                        distinct: false,
1572                    },
1573                    alias: Some("cnt".to_string()),
1574                },
1575                ReturnItem {
1576                    expression: LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1577                    alias: Some("one".to_string()),
1578                },
1579            ],
1580            distinct: false,
1581            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1582                variable: "n".to_string(),
1583                label: Some("Person".to_string()),
1584                input: None,
1585            })),
1586        }));
1587
1588        let mut binder = Binder::new();
1589        let result = binder.bind(&plan);
1590
1591        // This should succeed - count(n) with literal is valid
1592        assert!(result.is_ok());
1593    }
1594
1595    #[test]
1596    fn test_bind_nested_property_access() {
1597        // Tests that nested property access on the same variable works
1598        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1599            items: vec![
1600                ReturnItem {
1601                    expression: LogicalExpression::Property {
1602                        variable: "n".to_string(),
1603                        property: "name".to_string(),
1604                    },
1605                    alias: None,
1606                },
1607                ReturnItem {
1608                    expression: LogicalExpression::Property {
1609                        variable: "n".to_string(),
1610                        property: "age".to_string(),
1611                    },
1612                    alias: None,
1613                },
1614            ],
1615            distinct: false,
1616            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1617                variable: "n".to_string(),
1618                label: Some("Person".to_string()),
1619                input: None,
1620            })),
1621        }));
1622
1623        let mut binder = Binder::new();
1624        let result = binder.bind(&plan);
1625
1626        assert!(result.is_ok());
1627    }
1628
1629    #[test]
1630    fn test_bind_binary_expression_with_undefined() {
1631        // Tests that binary expressions with undefined variables produce errors
1632        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1633            items: vec![ReturnItem {
1634                expression: LogicalExpression::Binary {
1635                    left: Box::new(LogicalExpression::Property {
1636                        variable: "n".to_string(),
1637                        property: "age".to_string(),
1638                    }),
1639                    op: BinaryOp::Add,
1640                    right: Box::new(LogicalExpression::Property {
1641                        variable: "m".to_string(), // undefined!
1642                        property: "age".to_string(),
1643                    }),
1644                },
1645                alias: Some("total".to_string()),
1646            }],
1647            distinct: false,
1648            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1649                variable: "n".to_string(),
1650                label: None,
1651                input: None,
1652            })),
1653        }));
1654
1655        let mut binder = Binder::new();
1656        let result = binder.bind(&plan);
1657
1658        assert!(result.is_err());
1659        assert!(
1660            result
1661                .unwrap_err()
1662                .to_string()
1663                .contains("Undefined variable 'm'")
1664        );
1665    }
1666
1667    #[test]
1668    fn test_bind_duplicate_variable_definition() {
1669        // Tests behavior when the same variable is defined twice (via two NodeScans)
1670        // This is typically not allowed or the second shadows the first
1671        use crate::query::plan::{JoinOp, JoinType};
1672
1673        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1674            items: vec![ReturnItem {
1675                expression: LogicalExpression::Variable("n".to_string()),
1676                alias: None,
1677            }],
1678            distinct: false,
1679            input: Box::new(LogicalOperator::Join(JoinOp {
1680                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1681                    variable: "n".to_string(),
1682                    label: Some("A".to_string()),
1683                    input: None,
1684                })),
1685                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1686                    variable: "m".to_string(), // different variable is fine
1687                    label: Some("B".to_string()),
1688                    input: None,
1689                })),
1690                join_type: JoinType::Inner,
1691                conditions: vec![],
1692            })),
1693        }));
1694
1695        let mut binder = Binder::new();
1696        let result = binder.bind(&plan);
1697
1698        // Join with different variables should work
1699        assert!(result.is_ok());
1700        let ctx = result.unwrap();
1701        assert!(ctx.contains("n"));
1702        assert!(ctx.contains("m"));
1703    }
1704
1705    #[test]
1706    fn test_bind_function_with_wrong_arity() {
1707        // Tests that functions with wrong number of arguments are handled
1708        // (behavior depends on whether binder validates arity)
1709        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1710            items: vec![ReturnItem {
1711                expression: LogicalExpression::FunctionCall {
1712                    name: "count".to_string(),
1713                    args: vec![], // count() needs an argument
1714                    distinct: false,
1715                },
1716                alias: None,
1717            }],
1718            distinct: false,
1719            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1720                variable: "n".to_string(),
1721                label: None,
1722                input: None,
1723            })),
1724        }));
1725
1726        let mut binder = Binder::new();
1727        let result = binder.bind(&plan);
1728
1729        // The binder may or may not catch this - if it passes, execution will fail
1730        // This test documents current behavior
1731        // If binding fails, that's fine; if it passes, execution will handle it
1732        let _ = result; // We're just testing it doesn't panic
1733    }
1734
1735    // --- Mutation operator validation ---
1736
1737    #[test]
1738    fn test_create_edge_rejects_undefined_source() {
1739        use crate::query::plan::CreateEdgeOp;
1740
1741        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1742            variable: Some("e".to_string()),
1743            from_variable: "ghost".to_string(), // not defined!
1744            to_variable: "b".to_string(),
1745            edge_type: "KNOWS".to_string(),
1746            properties: vec![],
1747            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1748                variable: "b".to_string(),
1749                label: None,
1750                input: None,
1751            })),
1752        }));
1753
1754        let mut binder = Binder::new();
1755        let err = binder.bind(&plan).unwrap_err();
1756        assert!(
1757            err.to_string().contains("Undefined variable 'ghost'"),
1758            "Should reject undefined source variable, got: {err}"
1759        );
1760    }
1761
1762    #[test]
1763    fn test_create_edge_rejects_undefined_target() {
1764        use crate::query::plan::CreateEdgeOp;
1765
1766        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1767            variable: None,
1768            from_variable: "a".to_string(),
1769            to_variable: "missing".to_string(), // not defined!
1770            edge_type: "KNOWS".to_string(),
1771            properties: vec![],
1772            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1773                variable: "a".to_string(),
1774                label: None,
1775                input: None,
1776            })),
1777        }));
1778
1779        let mut binder = Binder::new();
1780        let err = binder.bind(&plan).unwrap_err();
1781        assert!(
1782            err.to_string().contains("Undefined variable 'missing'"),
1783            "Should reject undefined target variable, got: {err}"
1784        );
1785    }
1786
1787    #[test]
1788    fn test_create_edge_validates_property_expressions() {
1789        use crate::query::plan::CreateEdgeOp;
1790
1791        // Source and target defined, but property references undefined variable
1792        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1793            variable: Some("e".to_string()),
1794            from_variable: "a".to_string(),
1795            to_variable: "b".to_string(),
1796            edge_type: "KNOWS".to_string(),
1797            properties: vec![(
1798                "since".to_string(),
1799                LogicalExpression::Property {
1800                    variable: "x".to_string(), // undefined!
1801                    property: "year".to_string(),
1802                },
1803            )],
1804            input: Box::new(LogicalOperator::Join(crate::query::plan::JoinOp {
1805                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1806                    variable: "a".to_string(),
1807                    label: None,
1808                    input: None,
1809                })),
1810                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1811                    variable: "b".to_string(),
1812                    label: None,
1813                    input: None,
1814                })),
1815                join_type: crate::query::plan::JoinType::Inner,
1816                conditions: vec![],
1817            })),
1818        }));
1819
1820        let mut binder = Binder::new();
1821        let err = binder.bind(&plan).unwrap_err();
1822        assert!(err.to_string().contains("Undefined variable 'x'"));
1823    }
1824
1825    #[test]
1826    fn test_set_property_rejects_undefined_variable() {
1827        use crate::query::plan::SetPropertyOp;
1828
1829        let plan = LogicalPlan::new(LogicalOperator::SetProperty(SetPropertyOp {
1830            variable: "ghost".to_string(),
1831            properties: vec![(
1832                "name".to_string(),
1833                LogicalExpression::Literal(grafeo_common::types::Value::String("Alix".into())),
1834            )],
1835            replace: false,
1836            is_edge: false,
1837            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1838                variable: "n".to_string(),
1839                label: None,
1840                input: None,
1841            })),
1842        }));
1843
1844        let mut binder = Binder::new();
1845        let err = binder.bind(&plan).unwrap_err();
1846        assert!(
1847            err.to_string().contains("in SET"),
1848            "Error should indicate SET context, got: {err}"
1849        );
1850    }
1851
1852    #[test]
1853    fn test_delete_node_rejects_undefined_variable() {
1854        use crate::query::plan::DeleteNodeOp;
1855
1856        let plan = LogicalPlan::new(LogicalOperator::DeleteNode(DeleteNodeOp {
1857            variable: "phantom".to_string(),
1858            detach: false,
1859            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1860                variable: "n".to_string(),
1861                label: None,
1862                input: None,
1863            })),
1864        }));
1865
1866        let mut binder = Binder::new();
1867        let err = binder.bind(&plan).unwrap_err();
1868        assert!(err.to_string().contains("Undefined variable 'phantom'"));
1869    }
1870
1871    #[test]
1872    fn test_delete_edge_rejects_undefined_variable() {
1873        use crate::query::plan::DeleteEdgeOp;
1874
1875        let plan = LogicalPlan::new(LogicalOperator::DeleteEdge(DeleteEdgeOp {
1876            variable: "gone".to_string(),
1877            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1878                variable: "n".to_string(),
1879                label: None,
1880                input: None,
1881            })),
1882        }));
1883
1884        let mut binder = Binder::new();
1885        let err = binder.bind(&plan).unwrap_err();
1886        assert!(err.to_string().contains("Undefined variable 'gone'"));
1887    }
1888
1889    // --- WITH/Project clause ---
1890
1891    #[test]
1892    fn test_project_alias_becomes_available_downstream() {
1893        use crate::query::plan::{ProjectOp, Projection};
1894
1895        // WITH n.name AS person_name RETURN person_name
1896        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1897            items: vec![ReturnItem {
1898                expression: LogicalExpression::Variable("person_name".to_string()),
1899                alias: None,
1900            }],
1901            distinct: false,
1902            input: Box::new(LogicalOperator::Project(ProjectOp {
1903                projections: vec![Projection {
1904                    expression: LogicalExpression::Property {
1905                        variable: "n".to_string(),
1906                        property: "name".to_string(),
1907                    },
1908                    alias: Some("person_name".to_string()),
1909                }],
1910                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1911                    variable: "n".to_string(),
1912                    label: None,
1913                    input: None,
1914                })),
1915                pass_through_input: false,
1916            })),
1917        }));
1918
1919        let mut binder = Binder::new();
1920        let ctx = binder.bind(&plan).unwrap();
1921        assert!(
1922            ctx.contains("person_name"),
1923            "WITH alias should be available to RETURN"
1924        );
1925    }
1926
1927    #[test]
1928    fn test_project_rejects_undefined_expression() {
1929        use crate::query::plan::{ProjectOp, Projection};
1930
1931        let plan = LogicalPlan::new(LogicalOperator::Project(ProjectOp {
1932            projections: vec![Projection {
1933                expression: LogicalExpression::Variable("nope".to_string()),
1934                alias: Some("x".to_string()),
1935            }],
1936            input: Box::new(LogicalOperator::Empty),
1937            pass_through_input: false,
1938        }));
1939
1940        let mut binder = Binder::new();
1941        let result = binder.bind(&plan);
1942        assert!(result.is_err(), "WITH on undefined variable should fail");
1943    }
1944
1945    // --- UNWIND ---
1946
1947    #[test]
1948    fn test_unwind_adds_element_variable() {
1949        use crate::query::plan::UnwindOp;
1950
1951        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1952            items: vec![ReturnItem {
1953                expression: LogicalExpression::Variable("item".to_string()),
1954                alias: None,
1955            }],
1956            distinct: false,
1957            input: Box::new(LogicalOperator::Unwind(UnwindOp {
1958                expression: LogicalExpression::List(vec![
1959                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1960                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(2)),
1961                ]),
1962                variable: "item".to_string(),
1963                ordinality_var: None,
1964                offset_var: None,
1965                input: Box::new(LogicalOperator::Empty),
1966            })),
1967        }));
1968
1969        let mut binder = Binder::new();
1970        let ctx = binder.bind(&plan).unwrap();
1971        assert!(ctx.contains("item"), "UNWIND variable should be in scope");
1972        let info = ctx.get("item").unwrap();
1973        assert!(
1974            !info.is_node && !info.is_edge,
1975            "UNWIND variable is not a graph element"
1976        );
1977    }
1978
1979    // --- MERGE ---
1980
1981    #[test]
1982    fn test_merge_adds_variable_and_validates_properties() {
1983        use crate::query::plan::MergeOp;
1984
1985        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1986            items: vec![ReturnItem {
1987                expression: LogicalExpression::Variable("m".to_string()),
1988                alias: None,
1989            }],
1990            distinct: false,
1991            input: Box::new(LogicalOperator::Merge(MergeOp {
1992                variable: "m".to_string(),
1993                labels: vec!["Person".to_string()],
1994                match_properties: vec![(
1995                    "name".to_string(),
1996                    LogicalExpression::Literal(grafeo_common::types::Value::String("Alix".into())),
1997                )],
1998                on_create: vec![(
1999                    "created".to_string(),
2000                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
2001                )],
2002                on_match: vec![(
2003                    "updated".to_string(),
2004                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
2005                )],
2006                input: Box::new(LogicalOperator::Empty),
2007            })),
2008        }));
2009
2010        let mut binder = Binder::new();
2011        let ctx = binder.bind(&plan).unwrap();
2012        assert!(ctx.contains("m"));
2013        assert!(
2014            ctx.get("m").unwrap().is_node,
2015            "MERGE variable should be a node"
2016        );
2017    }
2018
2019    #[test]
2020    fn test_merge_rejects_undefined_in_on_create() {
2021        use crate::query::plan::MergeOp;
2022
2023        let plan = LogicalPlan::new(LogicalOperator::Merge(MergeOp {
2024            variable: "m".to_string(),
2025            labels: vec![],
2026            match_properties: vec![],
2027            on_create: vec![(
2028                "name".to_string(),
2029                LogicalExpression::Property {
2030                    variable: "other".to_string(), // undefined!
2031                    property: "name".to_string(),
2032                },
2033            )],
2034            on_match: vec![],
2035            input: Box::new(LogicalOperator::Empty),
2036        }));
2037
2038        let mut binder = Binder::new();
2039        let result = binder.bind(&plan);
2040        assert!(
2041            result.is_err(),
2042            "ON CREATE referencing undefined variable should fail"
2043        );
2044    }
2045
2046    // --- ShortestPath ---
2047
2048    #[test]
2049    fn test_shortest_path_rejects_undefined_source() {
2050        use crate::query::plan::{ExpandDirection, ShortestPathOp};
2051
2052        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
2053            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2054                variable: "b".to_string(),
2055                label: None,
2056                input: None,
2057            })),
2058            source_var: "missing".to_string(), // not defined
2059            target_var: "b".to_string(),
2060            edge_types: vec![],
2061            direction: ExpandDirection::Both,
2062            path_alias: "p".to_string(),
2063            all_paths: false,
2064        }));
2065
2066        let mut binder = Binder::new();
2067        let err = binder.bind(&plan).unwrap_err();
2068        assert!(
2069            err.to_string().contains("source in shortestPath"),
2070            "Error should mention shortestPath source context, got: {err}"
2071        );
2072    }
2073
2074    #[test]
2075    fn test_shortest_path_adds_path_and_length_variables() {
2076        use crate::query::plan::{ExpandDirection, JoinOp, JoinType, ShortestPathOp};
2077
2078        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
2079            input: Box::new(LogicalOperator::Join(JoinOp {
2080                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2081                    variable: "a".to_string(),
2082                    label: None,
2083                    input: None,
2084                })),
2085                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2086                    variable: "b".to_string(),
2087                    label: None,
2088                    input: None,
2089                })),
2090                join_type: JoinType::Cross,
2091                conditions: vec![],
2092            })),
2093            source_var: "a".to_string(),
2094            target_var: "b".to_string(),
2095            edge_types: vec!["ROAD".to_string()],
2096            direction: ExpandDirection::Outgoing,
2097            path_alias: "p".to_string(),
2098            all_paths: false,
2099        }));
2100
2101        let mut binder = Binder::new();
2102        let ctx = binder.bind(&plan).unwrap();
2103        assert!(ctx.contains("p"), "Path alias should be bound");
2104        assert!(
2105            ctx.contains("_path_length_p"),
2106            "Path length variable should be auto-created"
2107        );
2108    }
2109
2110    // --- Expression validation edge cases ---
2111
2112    #[test]
2113    fn test_case_expression_validates_all_branches() {
2114        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2115            items: vec![ReturnItem {
2116                expression: LogicalExpression::Case {
2117                    operand: None,
2118                    when_clauses: vec![
2119                        (
2120                            LogicalExpression::Binary {
2121                                left: Box::new(LogicalExpression::Property {
2122                                    variable: "n".to_string(),
2123                                    property: "age".to_string(),
2124                                }),
2125                                op: BinaryOp::Gt,
2126                                right: Box::new(LogicalExpression::Literal(
2127                                    grafeo_common::types::Value::Int64(18),
2128                                )),
2129                            },
2130                            LogicalExpression::Literal(grafeo_common::types::Value::String(
2131                                "adult".into(),
2132                            )),
2133                        ),
2134                        (
2135                            // This branch references undefined variable
2136                            LogicalExpression::Property {
2137                                variable: "ghost".to_string(),
2138                                property: "flag".to_string(),
2139                            },
2140                            LogicalExpression::Literal(grafeo_common::types::Value::String(
2141                                "flagged".into(),
2142                            )),
2143                        ),
2144                    ],
2145                    else_clause: Some(Box::new(LogicalExpression::Literal(
2146                        grafeo_common::types::Value::String("other".into()),
2147                    ))),
2148                },
2149                alias: None,
2150            }],
2151            distinct: false,
2152            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2153                variable: "n".to_string(),
2154                label: None,
2155                input: None,
2156            })),
2157        }));
2158
2159        let mut binder = Binder::new();
2160        let err = binder.bind(&plan).unwrap_err();
2161        assert!(
2162            err.to_string().contains("ghost"),
2163            "CASE should validate all when-clause conditions"
2164        );
2165    }
2166
2167    #[test]
2168    fn test_case_expression_validates_else_clause() {
2169        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2170            items: vec![ReturnItem {
2171                expression: LogicalExpression::Case {
2172                    operand: None,
2173                    when_clauses: vec![(
2174                        LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
2175                        LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
2176                    )],
2177                    else_clause: Some(Box::new(LogicalExpression::Property {
2178                        variable: "missing".to_string(),
2179                        property: "x".to_string(),
2180                    })),
2181                },
2182                alias: None,
2183            }],
2184            distinct: false,
2185            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2186                variable: "n".to_string(),
2187                label: None,
2188                input: None,
2189            })),
2190        }));
2191
2192        let mut binder = Binder::new();
2193        let err = binder.bind(&plan).unwrap_err();
2194        assert!(
2195            err.to_string().contains("missing"),
2196            "CASE ELSE should validate its expression too"
2197        );
2198    }
2199
2200    #[test]
2201    fn test_slice_access_validates_expressions() {
2202        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2203            items: vec![ReturnItem {
2204                expression: LogicalExpression::SliceAccess {
2205                    base: Box::new(LogicalExpression::Variable("n".to_string())),
2206                    start: Some(Box::new(LogicalExpression::Variable(
2207                        "undefined_start".to_string(),
2208                    ))),
2209                    end: None,
2210                },
2211                alias: None,
2212            }],
2213            distinct: false,
2214            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2215                variable: "n".to_string(),
2216                label: None,
2217                input: None,
2218            })),
2219        }));
2220
2221        let mut binder = Binder::new();
2222        let err = binder.bind(&plan).unwrap_err();
2223        assert!(err.to_string().contains("undefined_start"));
2224    }
2225
2226    #[test]
2227    fn test_list_comprehension_validates_list_source() {
2228        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2229            items: vec![ReturnItem {
2230                expression: LogicalExpression::ListComprehension {
2231                    variable: "x".to_string(),
2232                    list_expr: Box::new(LogicalExpression::Variable("not_defined".to_string())),
2233                    filter_expr: None,
2234                    map_expr: Box::new(LogicalExpression::Variable("x".to_string())),
2235                },
2236                alias: None,
2237            }],
2238            distinct: false,
2239            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2240                variable: "n".to_string(),
2241                label: None,
2242                input: None,
2243            })),
2244        }));
2245
2246        let mut binder = Binder::new();
2247        let err = binder.bind(&plan).unwrap_err();
2248        assert!(
2249            err.to_string().contains("not_defined"),
2250            "List comprehension should validate source list expression"
2251        );
2252    }
2253
2254    #[test]
2255    fn test_labels_type_id_reject_undefined() {
2256        // labels(x) where x is not defined
2257        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2258            items: vec![ReturnItem {
2259                expression: LogicalExpression::Labels("x".to_string()),
2260                alias: None,
2261            }],
2262            distinct: false,
2263            input: Box::new(LogicalOperator::Empty),
2264        }));
2265
2266        let mut binder = Binder::new();
2267        assert!(
2268            binder.bind(&plan).is_err(),
2269            "labels(x) on undefined x should fail"
2270        );
2271
2272        // type(e) where e is not defined
2273        let plan2 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2274            items: vec![ReturnItem {
2275                expression: LogicalExpression::Type("e".to_string()),
2276                alias: None,
2277            }],
2278            distinct: false,
2279            input: Box::new(LogicalOperator::Empty),
2280        }));
2281
2282        let mut binder2 = Binder::new();
2283        assert!(
2284            binder2.bind(&plan2).is_err(),
2285            "type(e) on undefined e should fail"
2286        );
2287
2288        // id(n) where n is not defined
2289        let plan3 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2290            items: vec![ReturnItem {
2291                expression: LogicalExpression::Id("n".to_string()),
2292                alias: None,
2293            }],
2294            distinct: false,
2295            input: Box::new(LogicalOperator::Empty),
2296        }));
2297
2298        let mut binder3 = Binder::new();
2299        assert!(
2300            binder3.bind(&plan3).is_err(),
2301            "id(n) on undefined n should fail"
2302        );
2303    }
2304
2305    #[test]
2306    fn test_expand_rejects_non_node_source() {
2307        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode, UnwindOp};
2308
2309        // UNWIND [1,2] AS x  -- x is not a node
2310        // MATCH (x)-[:E]->(b)  -- should fail: x isn't a node
2311        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2312            items: vec![ReturnItem {
2313                expression: LogicalExpression::Variable("b".to_string()),
2314                alias: None,
2315            }],
2316            distinct: false,
2317            input: Box::new(LogicalOperator::Expand(ExpandOp {
2318                from_variable: "x".to_string(),
2319                to_variable: "b".to_string(),
2320                edge_variable: None,
2321                direction: ExpandDirection::Outgoing,
2322                edge_types: vec![],
2323                min_hops: 1,
2324                max_hops: Some(1),
2325                input: Box::new(LogicalOperator::Unwind(UnwindOp {
2326                    expression: LogicalExpression::List(vec![]),
2327                    variable: "x".to_string(),
2328                    ordinality_var: None,
2329                    offset_var: None,
2330                    input: Box::new(LogicalOperator::Empty),
2331                })),
2332                path_alias: None,
2333                path_mode: PathMode::Walk,
2334            })),
2335        }));
2336
2337        let mut binder = Binder::new();
2338        let err = binder.bind(&plan).unwrap_err();
2339        assert!(
2340            err.to_string().contains("not a node"),
2341            "Expanding from non-node should fail, got: {err}"
2342        );
2343    }
2344
2345    #[test]
2346    fn test_add_label_rejects_undefined_variable() {
2347        use crate::query::plan::AddLabelOp;
2348
2349        let plan = LogicalPlan::new(LogicalOperator::AddLabel(AddLabelOp {
2350            variable: "missing".to_string(),
2351            labels: vec!["Admin".to_string()],
2352            input: Box::new(LogicalOperator::Empty),
2353        }));
2354
2355        let mut binder = Binder::new();
2356        let err = binder.bind(&plan).unwrap_err();
2357        assert!(err.to_string().contains("SET labels"));
2358    }
2359
2360    #[test]
2361    fn test_remove_label_rejects_undefined_variable() {
2362        use crate::query::plan::RemoveLabelOp;
2363
2364        let plan = LogicalPlan::new(LogicalOperator::RemoveLabel(RemoveLabelOp {
2365            variable: "missing".to_string(),
2366            labels: vec!["Admin".to_string()],
2367            input: Box::new(LogicalOperator::Empty),
2368        }));
2369
2370        let mut binder = Binder::new();
2371        let err = binder.bind(&plan).unwrap_err();
2372        assert!(err.to_string().contains("REMOVE labels"));
2373    }
2374
2375    #[test]
2376    fn test_sort_validates_key_expressions() {
2377        use crate::query::plan::{SortKey, SortOp, SortOrder};
2378
2379        let plan = LogicalPlan::new(LogicalOperator::Sort(SortOp {
2380            keys: vec![SortKey {
2381                expression: LogicalExpression::Property {
2382                    variable: "missing".to_string(),
2383                    property: "name".to_string(),
2384                },
2385                order: SortOrder::Ascending,
2386                nulls: None,
2387            }],
2388            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2389                variable: "n".to_string(),
2390                label: None,
2391                input: None,
2392            })),
2393        }));
2394
2395        let mut binder = Binder::new();
2396        assert!(
2397            binder.bind(&plan).is_err(),
2398            "ORDER BY on undefined variable should fail"
2399        );
2400    }
2401
2402    #[test]
2403    fn test_create_node_adds_variable_before_property_validation() {
2404        use crate::query::plan::CreateNodeOp;
2405
2406        // CREATE (n:Person {friend: n.name}) - referencing the node being created
2407        // The variable should be available for property expressions (self-reference)
2408        let plan = LogicalPlan::new(LogicalOperator::CreateNode(CreateNodeOp {
2409            variable: "n".to_string(),
2410            labels: vec!["Person".to_string()],
2411            properties: vec![(
2412                "self_ref".to_string(),
2413                LogicalExpression::Property {
2414                    variable: "n".to_string(),
2415                    property: "name".to_string(),
2416                },
2417            )],
2418            input: None,
2419        }));
2420
2421        let mut binder = Binder::new();
2422        // This should succeed because CreateNode adds the variable before validating properties
2423        let ctx = binder.bind(&plan).unwrap();
2424        assert!(ctx.get("n").unwrap().is_node);
2425    }
2426
2427    #[test]
2428    fn test_undefined_variable_suggests_similar() {
2429        // 'person' is defined, user types 'persn' - should get a suggestion
2430        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2431            items: vec![ReturnItem {
2432                expression: LogicalExpression::Variable("persn".to_string()),
2433                alias: None,
2434            }],
2435            distinct: false,
2436            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2437                variable: "person".to_string(),
2438                label: None,
2439                input: None,
2440            })),
2441        }));
2442
2443        let mut binder = Binder::new();
2444        let err = binder.bind(&plan).unwrap_err();
2445        let msg = err.to_string();
2446        // The error should contain the variable name at minimum
2447        assert!(
2448            msg.contains("persn"),
2449            "Error should mention the undefined variable"
2450        );
2451    }
2452
2453    #[test]
2454    fn test_anon_variables_skip_validation() {
2455        // Variables starting with _anon_ are anonymous and should be silently accepted
2456        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2457            items: vec![ReturnItem {
2458                expression: LogicalExpression::Variable("_anon_42".to_string()),
2459                alias: None,
2460            }],
2461            distinct: false,
2462            input: Box::new(LogicalOperator::Empty),
2463        }));
2464
2465        let mut binder = Binder::new();
2466        let result = binder.bind(&plan);
2467        assert!(
2468            result.is_ok(),
2469            "Anonymous variables should bypass validation"
2470        );
2471    }
2472
2473    #[test]
2474    fn test_map_expression_validates_values() {
2475        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2476            items: vec![ReturnItem {
2477                expression: LogicalExpression::Map(vec![(
2478                    "key".to_string(),
2479                    LogicalExpression::Variable("undefined".to_string()),
2480                )]),
2481                alias: None,
2482            }],
2483            distinct: false,
2484            input: Box::new(LogicalOperator::Empty),
2485        }));
2486
2487        let mut binder = Binder::new();
2488        assert!(
2489            binder.bind(&plan).is_err(),
2490            "Map values should be validated"
2491        );
2492    }
2493
2494    #[test]
2495    fn test_vector_scan_validates_query_vector() {
2496        use crate::query::plan::VectorScanOp;
2497
2498        let plan = LogicalPlan::new(LogicalOperator::VectorScan(VectorScanOp {
2499            variable: "result".to_string(),
2500            index_name: None,
2501            property: "embedding".to_string(),
2502            label: Some("Doc".to_string()),
2503            query_vector: LogicalExpression::Variable("undefined_vec".to_string()),
2504            k: 10,
2505            metric: None,
2506            min_similarity: None,
2507            max_distance: None,
2508            input: None,
2509        }));
2510
2511        let mut binder = Binder::new();
2512        let err = binder.bind(&plan).unwrap_err();
2513        assert!(err.to_string().contains("undefined_vec"));
2514    }
2515}