Skip to main content

grafeo_engine/query/
binder.rs

1//! Semantic validation - catching errors before execution.
2//!
3//! The binder walks the logical plan and validates that everything makes sense:
4//! - Is that variable actually defined? (You can't use `RETURN x` if `x` wasn't matched)
5//! - Does that property access make sense? (Accessing `.age` on an integer fails)
6//! - Are types compatible? (Can't compare a string to an integer)
7//!
8//! Better to catch these errors early than waste time executing a broken query.
9
10use crate::query::plan::{
11    ExpandOp, FilterOp, LogicalExpression, LogicalOperator, LogicalPlan, NodeScanOp, ReturnItem,
12    ReturnOp, TripleScanOp,
13};
14use grafeo_common::types::LogicalType;
15use grafeo_common::utils::error::{Error, QueryError, QueryErrorKind, Result};
16use grafeo_common::utils::strings::{find_similar, format_suggestion};
17use indexmap::IndexMap;
18use std::collections::HashSet;
19
20/// Creates a semantic binding error.
21fn binding_error(message: impl Into<String>) -> Error {
22    Error::Query(QueryError::new(QueryErrorKind::Semantic, message))
23}
24
25/// Creates a semantic binding error with a hint.
26fn binding_error_with_hint(message: impl Into<String>, hint: impl Into<String>) -> Error {
27    Error::Query(QueryError::new(QueryErrorKind::Semantic, message).with_hint(hint))
28}
29
30/// Creates an "undefined variable" error with a suggestion if a similar variable exists.
31fn undefined_variable_error(variable: &str, context: &BindingContext, suffix: &str) -> Error {
32    let candidates: Vec<String> = context.variable_names();
33    let candidates_ref: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
34
35    if let Some(suggestion) = find_similar(variable, &candidates_ref) {
36        binding_error_with_hint(
37            format!("Undefined variable '{variable}'{suffix}"),
38            format_suggestion(suggestion),
39        )
40    } else {
41        binding_error(format!("Undefined variable '{variable}'{suffix}"))
42    }
43}
44
45/// Information about a bound variable.
46#[derive(Debug, Clone)]
47pub struct VariableInfo {
48    /// The name of the variable.
49    pub name: String,
50    /// The inferred type of the variable.
51    pub data_type: LogicalType,
52    /// Whether this variable is a node.
53    pub is_node: bool,
54    /// Whether this variable is an edge.
55    pub is_edge: bool,
56}
57
58/// Context containing all bound variables and their information.
59///
60/// Uses `IndexMap` to maintain insertion order without a separate `Vec`,
61/// removing redundant storage and making `remove_variable` O(n) instead of
62/// two separate O(n) operations.
63#[derive(Debug, Clone, Default)]
64pub struct BindingContext {
65    /// Map from variable name to its info, in definition order.
66    variables: IndexMap<String, VariableInfo>,
67}
68
69impl BindingContext {
70    /// Creates a new empty binding context.
71    #[must_use]
72    pub fn new() -> Self {
73        Self {
74            variables: IndexMap::new(),
75        }
76    }
77
78    /// Adds a variable to the context.
79    ///
80    /// If the variable is already defined, replaces its info but preserves its
81    /// position in definition order.
82    pub fn add_variable(&mut self, name: String, info: VariableInfo) {
83        self.variables.insert(name, info);
84    }
85
86    /// Looks up a variable by name.
87    #[must_use]
88    pub fn get(&self, name: &str) -> Option<&VariableInfo> {
89        self.variables.get(name)
90    }
91
92    /// Checks if a variable is defined.
93    #[must_use]
94    pub fn contains(&self, name: &str) -> bool {
95        self.variables.contains_key(name)
96    }
97
98    /// Returns all variable names in definition order.
99    #[must_use]
100    pub fn variable_names(&self) -> Vec<String> {
101        self.variables.keys().cloned().collect()
102    }
103
104    /// Returns the number of bound variables.
105    #[must_use]
106    pub fn len(&self) -> usize {
107        self.variables.len()
108    }
109
110    /// Returns true if no variables are bound.
111    #[must_use]
112    pub fn is_empty(&self) -> bool {
113        self.variables.is_empty()
114    }
115
116    /// Removes a variable from the context (used for temporary scoping).
117    pub fn remove_variable(&mut self, name: &str) {
118        self.variables.shift_remove(name);
119    }
120}
121
122/// Semantic binder for query plans.
123///
124/// The binder walks the logical plan and:
125/// 1. Collects all variable definitions
126/// 2. Validates that all variable references are valid
127/// 3. Infers types where possible
128/// 4. Reports semantic errors
129pub struct Binder {
130    /// The current binding context.
131    context: BindingContext,
132}
133
134impl Binder {
135    /// Creates a new binder.
136    #[must_use]
137    pub fn new() -> Self {
138        Self {
139            context: BindingContext::new(),
140        }
141    }
142
143    /// Binds a logical plan, returning the binding context.
144    ///
145    /// # Errors
146    ///
147    /// Returns an error if semantic validation fails.
148    pub fn bind(&mut self, plan: &LogicalPlan) -> Result<BindingContext> {
149        self.bind_operator(&plan.root)?;
150        Ok(self.context.clone())
151    }
152
153    /// Binds a single logical operator.
154    fn bind_operator(&mut self, op: &LogicalOperator) -> Result<()> {
155        match op {
156            LogicalOperator::NodeScan(scan) => self.bind_node_scan(scan),
157            LogicalOperator::Expand(expand) => self.bind_expand(expand),
158            LogicalOperator::Filter(filter) => self.bind_filter(filter),
159            LogicalOperator::Return(ret) => self.bind_return(ret),
160            LogicalOperator::Project(project) => {
161                self.bind_operator(&project.input)?;
162                for projection in &project.projections {
163                    self.validate_expression(&projection.expression)?;
164                    // Add the projection alias to the context (for WITH clause support)
165                    if let Some(ref alias) = projection.alias {
166                        // Determine the type from the expression
167                        let data_type = self.infer_expression_type(&projection.expression);
168                        self.context.add_variable(
169                            alias.clone(),
170                            VariableInfo {
171                                name: alias.clone(),
172                                data_type,
173                                is_node: false,
174                                is_edge: false,
175                            },
176                        );
177                    }
178                }
179                Ok(())
180            }
181            LogicalOperator::Limit(limit) => self.bind_operator(&limit.input),
182            LogicalOperator::Skip(skip) => self.bind_operator(&skip.input),
183            LogicalOperator::Sort(sort) => {
184                self.bind_operator(&sort.input)?;
185                for key in &sort.keys {
186                    self.validate_expression(&key.expression)?;
187                }
188                Ok(())
189            }
190            LogicalOperator::CreateNode(create) => {
191                // CreateNode introduces a new variable
192                if let Some(ref input) = create.input {
193                    self.bind_operator(input)?;
194                }
195                self.context.add_variable(
196                    create.variable.clone(),
197                    VariableInfo {
198                        name: create.variable.clone(),
199                        data_type: LogicalType::Node,
200                        is_node: true,
201                        is_edge: false,
202                    },
203                );
204                // Validate property expressions
205                for (_, expr) in &create.properties {
206                    self.validate_expression(expr)?;
207                }
208                Ok(())
209            }
210            LogicalOperator::EdgeScan(scan) => {
211                if let Some(ref input) = scan.input {
212                    self.bind_operator(input)?;
213                }
214                self.context.add_variable(
215                    scan.variable.clone(),
216                    VariableInfo {
217                        name: scan.variable.clone(),
218                        data_type: LogicalType::Edge,
219                        is_node: false,
220                        is_edge: true,
221                    },
222                );
223                Ok(())
224            }
225            LogicalOperator::Distinct(distinct) => self.bind_operator(&distinct.input),
226            LogicalOperator::Join(join) => self.bind_join(join),
227            LogicalOperator::Aggregate(agg) => self.bind_aggregate(agg),
228            LogicalOperator::CreateEdge(create) => {
229                self.bind_operator(&create.input)?;
230                // Validate that source and target variables are defined
231                if !self.context.contains(&create.from_variable) {
232                    return Err(undefined_variable_error(
233                        &create.from_variable,
234                        &self.context,
235                        " (source in CREATE EDGE)",
236                    ));
237                }
238                if !self.context.contains(&create.to_variable) {
239                    return Err(undefined_variable_error(
240                        &create.to_variable,
241                        &self.context,
242                        " (target in CREATE EDGE)",
243                    ));
244                }
245                // Add edge variable if present
246                if let Some(ref var) = create.variable {
247                    self.context.add_variable(
248                        var.clone(),
249                        VariableInfo {
250                            name: var.clone(),
251                            data_type: LogicalType::Edge,
252                            is_node: false,
253                            is_edge: true,
254                        },
255                    );
256                }
257                // Validate property expressions
258                for (_, expr) in &create.properties {
259                    self.validate_expression(expr)?;
260                }
261                Ok(())
262            }
263            LogicalOperator::DeleteNode(delete) => {
264                self.bind_operator(&delete.input)?;
265                // Validate that the variable to delete is defined
266                if !self.context.contains(&delete.variable) {
267                    return Err(undefined_variable_error(
268                        &delete.variable,
269                        &self.context,
270                        " in DELETE",
271                    ));
272                }
273                Ok(())
274            }
275            LogicalOperator::DeleteEdge(delete) => {
276                self.bind_operator(&delete.input)?;
277                // Validate that the variable to delete is defined
278                if !self.context.contains(&delete.variable) {
279                    return Err(undefined_variable_error(
280                        &delete.variable,
281                        &self.context,
282                        " in DELETE",
283                    ));
284                }
285                Ok(())
286            }
287            LogicalOperator::SetProperty(set) => {
288                self.bind_operator(&set.input)?;
289                // Validate that the variable to update is defined
290                if !self.context.contains(&set.variable) {
291                    return Err(undefined_variable_error(
292                        &set.variable,
293                        &self.context,
294                        " in SET",
295                    ));
296                }
297                // Validate property value expressions
298                for (_, expr) in &set.properties {
299                    self.validate_expression(expr)?;
300                }
301                Ok(())
302            }
303            LogicalOperator::Empty => Ok(()),
304
305            LogicalOperator::Unwind(unwind) => {
306                // First bind the input
307                self.bind_operator(&unwind.input)?;
308                // Validate the expression being unwound
309                self.validate_expression(&unwind.expression)?;
310                // Add the new variable to the context
311                self.context.add_variable(
312                    unwind.variable.clone(),
313                    VariableInfo {
314                        name: unwind.variable.clone(),
315                        data_type: LogicalType::Any, // Unwound elements can be any type
316                        is_node: false,
317                        is_edge: false,
318                    },
319                );
320                // Add ORDINALITY variable if present (1-based index)
321                if let Some(ref ord_var) = unwind.ordinality_var {
322                    self.context.add_variable(
323                        ord_var.clone(),
324                        VariableInfo {
325                            name: ord_var.clone(),
326                            data_type: LogicalType::Int64,
327                            is_node: false,
328                            is_edge: false,
329                        },
330                    );
331                }
332                // Add OFFSET variable if present (0-based index)
333                if let Some(ref off_var) = unwind.offset_var {
334                    self.context.add_variable(
335                        off_var.clone(),
336                        VariableInfo {
337                            name: off_var.clone(),
338                            data_type: LogicalType::Int64,
339                            is_node: false,
340                            is_edge: false,
341                        },
342                    );
343                }
344                Ok(())
345            }
346
347            // RDF/SPARQL operators
348            LogicalOperator::TripleScan(scan) => self.bind_triple_scan(scan),
349            LogicalOperator::Union(union) => {
350                for input in &union.inputs {
351                    self.bind_operator(input)?;
352                }
353                Ok(())
354            }
355            LogicalOperator::LeftJoin(lj) => {
356                self.bind_operator(&lj.left)?;
357                self.bind_operator(&lj.right)?;
358                if let Some(ref cond) = lj.condition {
359                    self.validate_expression(cond)?;
360                }
361                Ok(())
362            }
363            LogicalOperator::AntiJoin(aj) => {
364                self.bind_operator(&aj.left)?;
365                self.bind_operator(&aj.right)?;
366                Ok(())
367            }
368            LogicalOperator::Bind(bind) => {
369                self.bind_operator(&bind.input)?;
370                self.validate_expression(&bind.expression)?;
371                self.context.add_variable(
372                    bind.variable.clone(),
373                    VariableInfo {
374                        name: bind.variable.clone(),
375                        data_type: LogicalType::Any,
376                        is_node: false,
377                        is_edge: false,
378                    },
379                );
380                Ok(())
381            }
382            LogicalOperator::Merge(merge) => {
383                // First bind the input
384                self.bind_operator(&merge.input)?;
385                // Validate the match property expressions
386                for (_, expr) in &merge.match_properties {
387                    self.validate_expression(expr)?;
388                }
389                // Validate the ON CREATE property expressions
390                for (_, expr) in &merge.on_create {
391                    self.validate_expression(expr)?;
392                }
393                // Validate the ON MATCH property expressions
394                for (_, expr) in &merge.on_match {
395                    self.validate_expression(expr)?;
396                }
397                // MERGE introduces a new variable
398                self.context.add_variable(
399                    merge.variable.clone(),
400                    VariableInfo {
401                        name: merge.variable.clone(),
402                        data_type: LogicalType::Node,
403                        is_node: true,
404                        is_edge: false,
405                    },
406                );
407                Ok(())
408            }
409            LogicalOperator::MergeRelationship(merge_rel) => {
410                self.bind_operator(&merge_rel.input)?;
411                // Validate source and target variables exist
412                if !self.context.contains(&merge_rel.source_variable) {
413                    return Err(undefined_variable_error(
414                        &merge_rel.source_variable,
415                        &self.context,
416                        " in MERGE relationship source",
417                    ));
418                }
419                if !self.context.contains(&merge_rel.target_variable) {
420                    return Err(undefined_variable_error(
421                        &merge_rel.target_variable,
422                        &self.context,
423                        " in MERGE relationship target",
424                    ));
425                }
426                for (_, expr) in &merge_rel.match_properties {
427                    self.validate_expression(expr)?;
428                }
429                for (_, expr) in &merge_rel.on_create {
430                    self.validate_expression(expr)?;
431                }
432                for (_, expr) in &merge_rel.on_match {
433                    self.validate_expression(expr)?;
434                }
435                // MERGE relationship introduces the edge variable
436                self.context.add_variable(
437                    merge_rel.variable.clone(),
438                    VariableInfo {
439                        name: merge_rel.variable.clone(),
440                        data_type: LogicalType::Edge,
441                        is_node: false,
442                        is_edge: true,
443                    },
444                );
445                Ok(())
446            }
447            LogicalOperator::AddLabel(add_label) => {
448                self.bind_operator(&add_label.input)?;
449                // Validate that the variable exists
450                if !self.context.contains(&add_label.variable) {
451                    return Err(undefined_variable_error(
452                        &add_label.variable,
453                        &self.context,
454                        " in SET labels",
455                    ));
456                }
457                Ok(())
458            }
459            LogicalOperator::RemoveLabel(remove_label) => {
460                self.bind_operator(&remove_label.input)?;
461                // Validate that the variable exists
462                if !self.context.contains(&remove_label.variable) {
463                    return Err(undefined_variable_error(
464                        &remove_label.variable,
465                        &self.context,
466                        " in REMOVE labels",
467                    ));
468                }
469                Ok(())
470            }
471            LogicalOperator::ShortestPath(sp) => {
472                // First bind the input
473                self.bind_operator(&sp.input)?;
474                // Validate that source and target variables are defined
475                if !self.context.contains(&sp.source_var) {
476                    return Err(undefined_variable_error(
477                        &sp.source_var,
478                        &self.context,
479                        " (source in shortestPath)",
480                    ));
481                }
482                if !self.context.contains(&sp.target_var) {
483                    return Err(undefined_variable_error(
484                        &sp.target_var,
485                        &self.context,
486                        " (target in shortestPath)",
487                    ));
488                }
489                // Add the path alias variable to the context
490                self.context.add_variable(
491                    sp.path_alias.clone(),
492                    VariableInfo {
493                        name: sp.path_alias.clone(),
494                        data_type: LogicalType::Any, // Path is a complex type
495                        is_node: false,
496                        is_edge: false,
497                    },
498                );
499                // Also add the path length variable for length(p) calls
500                let path_length_var = format!("_path_length_{}", sp.path_alias);
501                self.context.add_variable(
502                    path_length_var.clone(),
503                    VariableInfo {
504                        name: path_length_var,
505                        data_type: LogicalType::Int64,
506                        is_node: false,
507                        is_edge: false,
508                    },
509                );
510                Ok(())
511            }
512            // SPARQL Update operators - these don't require variable binding
513            LogicalOperator::InsertTriple(insert) => {
514                if let Some(ref input) = insert.input {
515                    self.bind_operator(input)?;
516                }
517                Ok(())
518            }
519            LogicalOperator::DeleteTriple(delete) => {
520                if let Some(ref input) = delete.input {
521                    self.bind_operator(input)?;
522                }
523                Ok(())
524            }
525            LogicalOperator::Modify(modify) => {
526                self.bind_operator(&modify.where_clause)?;
527                Ok(())
528            }
529            LogicalOperator::ClearGraph(_)
530            | LogicalOperator::CreateGraph(_)
531            | LogicalOperator::DropGraph(_)
532            | LogicalOperator::LoadGraph(_)
533            | LogicalOperator::CopyGraph(_)
534            | LogicalOperator::MoveGraph(_)
535            | LogicalOperator::AddGraph(_)
536            | LogicalOperator::HorizontalAggregate(_) => Ok(()),
537            LogicalOperator::VectorScan(scan) => {
538                // VectorScan introduces a variable for matched nodes
539                if let Some(ref input) = scan.input {
540                    self.bind_operator(input)?;
541                }
542                self.context.add_variable(
543                    scan.variable.clone(),
544                    VariableInfo {
545                        name: scan.variable.clone(),
546                        data_type: LogicalType::Node,
547                        is_node: true,
548                        is_edge: false,
549                    },
550                );
551                // Validate the query vector expression
552                self.validate_expression(&scan.query_vector)?;
553                Ok(())
554            }
555            LogicalOperator::VectorJoin(join) => {
556                // VectorJoin takes input from left side and produces right-side matches
557                self.bind_operator(&join.input)?;
558                // Add right variable for matched nodes
559                self.context.add_variable(
560                    join.right_variable.clone(),
561                    VariableInfo {
562                        name: join.right_variable.clone(),
563                        data_type: LogicalType::Node,
564                        is_node: true,
565                        is_edge: false,
566                    },
567                );
568                // Optionally add score variable
569                if let Some(ref score_var) = join.score_variable {
570                    self.context.add_variable(
571                        score_var.clone(),
572                        VariableInfo {
573                            name: score_var.clone(),
574                            data_type: LogicalType::Float64,
575                            is_node: false,
576                            is_edge: false,
577                        },
578                    );
579                }
580                // Validate the query vector expression
581                self.validate_expression(&join.query_vector)?;
582                Ok(())
583            }
584            LogicalOperator::MapCollect(mc) => {
585                self.bind_operator(&mc.input)?;
586                self.context.add_variable(
587                    mc.alias.clone(),
588                    VariableInfo {
589                        name: mc.alias.clone(),
590                        data_type: LogicalType::Any,
591                        is_node: false,
592                        is_edge: false,
593                    },
594                );
595                Ok(())
596            }
597            LogicalOperator::Except(except) => {
598                self.bind_operator(&except.left)?;
599                self.bind_operator(&except.right)?;
600                Ok(())
601            }
602            LogicalOperator::Intersect(intersect) => {
603                self.bind_operator(&intersect.left)?;
604                self.bind_operator(&intersect.right)?;
605                Ok(())
606            }
607            LogicalOperator::Otherwise(otherwise) => {
608                self.bind_operator(&otherwise.left)?;
609                self.bind_operator(&otherwise.right)?;
610                Ok(())
611            }
612            LogicalOperator::Apply(apply) => {
613                // Snapshot context BEFORE binding the input, so we can detect
614                // which variables were added by the input plan.
615                let pre_apply_names: HashSet<String> =
616                    self.context.variable_names().iter().cloned().collect();
617
618                self.bind_operator(&apply.input)?;
619
620                // Scope down: when the input plan exposes a Return/Aggregate
621                // projection (not a raw scan/expand), remove its internal-only
622                // variables. Only the projected output columns should be visible
623                // to the subplan — this prevents variables internal to a sibling
624                // CALL block from leaking into the next CALL block.
625                let mut input_output_ctx = BindingContext::new();
626                Self::register_subplan_columns(&apply.input, &mut input_output_ctx);
627                let input_output_names: HashSet<String> =
628                    input_output_ctx.variable_names().iter().cloned().collect();
629
630                if !input_output_names.is_empty() {
631                    // Input has an explicit projection: remove its internals.
632                    let input_internals: Vec<String> = self
633                        .context
634                        .variable_names()
635                        .iter()
636                        .filter(|n| {
637                            !pre_apply_names.contains(*n) && !input_output_names.contains(*n)
638                        })
639                        .cloned()
640                        .collect();
641                    for name in input_internals {
642                        self.context.remove_variable(&name);
643                    }
644                }
645
646                // Snapshot the permitted outer context for the subplan.
647                let outer_names: HashSet<String> =
648                    self.context.variable_names().iter().cloned().collect();
649
650                self.bind_operator(&apply.subplan)?;
651
652                // Remove internal-only variables added by the subplan (those that
653                // are not output columns). Prevents subplan internals from leaking
654                // into the outer query or sibling CALL blocks.
655                let mut subplan_output_ctx = BindingContext::new();
656                Self::register_subplan_columns(&apply.subplan, &mut subplan_output_ctx);
657                let subplan_output_names: HashSet<String> = subplan_output_ctx
658                    .variable_names()
659                    .iter()
660                    .cloned()
661                    .collect();
662
663                let to_remove: Vec<String> = self
664                    .context
665                    .variable_names()
666                    .iter()
667                    .filter(|n| !outer_names.contains(*n) && !subplan_output_names.contains(*n))
668                    .cloned()
669                    .collect();
670                for name in to_remove {
671                    self.context.remove_variable(&name);
672                }
673
674                // Register output columns so downstream operators can reference them.
675                Self::register_subplan_columns(&apply.subplan, &mut self.context);
676                Ok(())
677            }
678            LogicalOperator::MultiWayJoin(mwj) => {
679                for input in &mwj.inputs {
680                    self.bind_operator(input)?;
681                }
682                for cond in &mwj.conditions {
683                    self.validate_expression(&cond.left)?;
684                    self.validate_expression(&cond.right)?;
685                }
686                Ok(())
687            }
688            LogicalOperator::ParameterScan(param_scan) => {
689                // Register parameter columns as variables (injected by outer Apply)
690                for col in &param_scan.columns {
691                    self.context.add_variable(
692                        col.clone(),
693                        VariableInfo {
694                            name: col.clone(),
695                            data_type: LogicalType::Any,
696                            is_node: true,
697                            is_edge: false,
698                        },
699                    );
700                }
701                Ok(())
702            }
703            // DDL operators don't need binding: they're handled before the binder
704            LogicalOperator::CreatePropertyGraph(_) => Ok(()),
705            // Procedure calls: register yielded columns as variables for downstream operators
706            LogicalOperator::CallProcedure(call) => {
707                if let Some(yields) = &call.yield_items {
708                    for item in yields {
709                        let var_name = item.alias.as_deref().unwrap_or(&item.field_name);
710                        self.context.add_variable(
711                            var_name.to_string(),
712                            VariableInfo {
713                                name: var_name.to_string(),
714                                data_type: LogicalType::Any,
715                                is_node: false,
716                                is_edge: false,
717                            },
718                        );
719                    }
720                }
721                Ok(())
722            }
723            LogicalOperator::LoadData(load) => {
724                // The row variable is bound as Any (Map or List depending on WITH HEADERS)
725                self.context.add_variable(
726                    load.variable.clone(),
727                    VariableInfo {
728                        name: load.variable.clone(),
729                        data_type: LogicalType::Any,
730                        is_node: false,
731                        is_edge: false,
732                    },
733                );
734                Ok(())
735            }
736        }
737    }
738
739    /// Binds a triple scan operator (for RDF/SPARQL).
740    fn bind_triple_scan(&mut self, scan: &TripleScanOp) -> Result<()> {
741        use crate::query::plan::TripleComponent;
742
743        // First bind the input if present
744        if let Some(ref input) = scan.input {
745            self.bind_operator(input)?;
746        }
747
748        // Add variables for subject, predicate, object
749        if let TripleComponent::Variable(name) = &scan.subject
750            && !self.context.contains(name)
751        {
752            self.context.add_variable(
753                name.clone(),
754                VariableInfo {
755                    name: name.clone(),
756                    data_type: LogicalType::Any, // RDF term
757                    is_node: false,
758                    is_edge: false,
759                },
760            );
761        }
762
763        if let TripleComponent::Variable(name) = &scan.predicate
764            && !self.context.contains(name)
765        {
766            self.context.add_variable(
767                name.clone(),
768                VariableInfo {
769                    name: name.clone(),
770                    data_type: LogicalType::Any, // IRI
771                    is_node: false,
772                    is_edge: false,
773                },
774            );
775        }
776
777        if let TripleComponent::Variable(name) = &scan.object
778            && !self.context.contains(name)
779        {
780            self.context.add_variable(
781                name.clone(),
782                VariableInfo {
783                    name: name.clone(),
784                    data_type: LogicalType::Any, // RDF term
785                    is_node: false,
786                    is_edge: false,
787                },
788            );
789        }
790
791        if let Some(TripleComponent::Variable(name)) = &scan.graph
792            && !self.context.contains(name)
793        {
794            self.context.add_variable(
795                name.clone(),
796                VariableInfo {
797                    name: name.clone(),
798                    data_type: LogicalType::Any, // IRI
799                    is_node: false,
800                    is_edge: false,
801                },
802            );
803        }
804
805        Ok(())
806    }
807
808    /// Binds a node scan operator.
809    fn bind_node_scan(&mut self, scan: &NodeScanOp) -> Result<()> {
810        // First bind the input if present
811        if let Some(ref input) = scan.input {
812            self.bind_operator(input)?;
813        }
814
815        // Add the scanned variable to scope
816        self.context.add_variable(
817            scan.variable.clone(),
818            VariableInfo {
819                name: scan.variable.clone(),
820                data_type: LogicalType::Node,
821                is_node: true,
822                is_edge: false,
823            },
824        );
825
826        Ok(())
827    }
828
829    /// Binds an expand operator.
830    fn bind_expand(&mut self, expand: &ExpandOp) -> Result<()> {
831        // First bind the input
832        self.bind_operator(&expand.input)?;
833
834        // Validate that the source variable is defined
835        if !self.context.contains(&expand.from_variable) {
836            return Err(undefined_variable_error(
837                &expand.from_variable,
838                &self.context,
839                " in EXPAND",
840            ));
841        }
842
843        // Validate that the source is a node
844        if let Some(info) = self.context.get(&expand.from_variable)
845            && !info.is_node
846        {
847            return Err(binding_error(format!(
848                "Variable '{}' is not a node, cannot expand from it",
849                expand.from_variable
850            )));
851        }
852
853        // Add edge variable if present
854        if let Some(ref edge_var) = expand.edge_variable {
855            self.context.add_variable(
856                edge_var.clone(),
857                VariableInfo {
858                    name: edge_var.clone(),
859                    data_type: LogicalType::Edge,
860                    is_node: false,
861                    is_edge: true,
862                },
863            );
864        }
865
866        // Add target variable
867        self.context.add_variable(
868            expand.to_variable.clone(),
869            VariableInfo {
870                name: expand.to_variable.clone(),
871                data_type: LogicalType::Node,
872                is_node: true,
873                is_edge: false,
874            },
875        );
876
877        // Add path variables for variable-length paths
878        if let Some(ref path_alias) = expand.path_alias {
879            // Register the path variable itself (e.g. p in MATCH p=...)
880            self.context.add_variable(
881                path_alias.clone(),
882                VariableInfo {
883                    name: path_alias.clone(),
884                    data_type: LogicalType::Any,
885                    is_node: false,
886                    is_edge: false,
887                },
888            );
889            // length(p) → _path_length_p
890            let path_length_var = format!("_path_length_{}", path_alias);
891            self.context.add_variable(
892                path_length_var.clone(),
893                VariableInfo {
894                    name: path_length_var,
895                    data_type: LogicalType::Int64,
896                    is_node: false,
897                    is_edge: false,
898                },
899            );
900            // nodes(p) → _path_nodes_p
901            let path_nodes_var = format!("_path_nodes_{}", path_alias);
902            self.context.add_variable(
903                path_nodes_var.clone(),
904                VariableInfo {
905                    name: path_nodes_var,
906                    data_type: LogicalType::Any,
907                    is_node: false,
908                    is_edge: false,
909                },
910            );
911            // edges(p) → _path_edges_p
912            let path_edges_var = format!("_path_edges_{}", path_alias);
913            self.context.add_variable(
914                path_edges_var.clone(),
915                VariableInfo {
916                    name: path_edges_var,
917                    data_type: LogicalType::Any,
918                    is_node: false,
919                    is_edge: false,
920                },
921            );
922        }
923
924        Ok(())
925    }
926
927    /// Binds a filter operator.
928    fn bind_filter(&mut self, filter: &FilterOp) -> Result<()> {
929        // First bind the input
930        self.bind_operator(&filter.input)?;
931
932        // Validate the predicate expression
933        self.validate_expression(&filter.predicate)?;
934
935        Ok(())
936    }
937
938    /// Registers output columns from a subplan into the binding context.
939    /// Walks through wrapping operators to find a Return and extracts column names.
940    fn register_subplan_columns(plan: &LogicalOperator, ctx: &mut BindingContext) {
941        match plan {
942            LogicalOperator::Return(ret) => {
943                for item in &ret.items {
944                    let col_name = if let Some(alias) = &item.alias {
945                        alias.clone()
946                    } else {
947                        match &item.expression {
948                            LogicalExpression::Variable(name) => name.clone(),
949                            LogicalExpression::Property { variable, property } => {
950                                format!("{variable}.{property}")
951                            }
952                            _ => continue,
953                        }
954                    };
955                    ctx.add_variable(
956                        col_name.clone(),
957                        VariableInfo {
958                            name: col_name,
959                            data_type: LogicalType::Any,
960                            is_node: false,
961                            is_edge: false,
962                        },
963                    );
964                }
965            }
966            LogicalOperator::Sort(s) => Self::register_subplan_columns(&s.input, ctx),
967            LogicalOperator::Limit(l) => Self::register_subplan_columns(&l.input, ctx),
968            LogicalOperator::Distinct(d) => Self::register_subplan_columns(&d.input, ctx),
969            LogicalOperator::Aggregate(agg) => {
970                // Aggregate produces named output columns
971                for expr in &agg.aggregates {
972                    if let Some(alias) = &expr.alias {
973                        ctx.add_variable(
974                            alias.clone(),
975                            VariableInfo {
976                                name: alias.clone(),
977                                data_type: LogicalType::Any,
978                                is_node: false,
979                                is_edge: false,
980                            },
981                        );
982                    }
983                }
984            }
985            _ => {}
986        }
987    }
988
989    /// Binds a return operator.
990    fn bind_return(&mut self, ret: &ReturnOp) -> Result<()> {
991        // First bind the input
992        self.bind_operator(&ret.input)?;
993
994        // Validate all return expressions and register aliases
995        // (aliases must be visible to parent Sort for ORDER BY resolution)
996        for item in &ret.items {
997            self.validate_return_item(item)?;
998            if let Some(ref alias) = item.alias {
999                let data_type = self.infer_expression_type(&item.expression);
1000                self.context.add_variable(
1001                    alias.clone(),
1002                    VariableInfo {
1003                        name: alias.clone(),
1004                        data_type,
1005                        is_node: false,
1006                        is_edge: false,
1007                    },
1008                );
1009            }
1010        }
1011
1012        Ok(())
1013    }
1014
1015    /// Validates a return item.
1016    fn validate_return_item(&mut self, item: &ReturnItem) -> Result<()> {
1017        self.validate_expression(&item.expression)
1018    }
1019
1020    /// Validates that an expression only references defined variables.
1021    fn validate_expression(&mut self, expr: &LogicalExpression) -> Result<()> {
1022        match expr {
1023            LogicalExpression::Variable(name) => {
1024                // "*" is a wildcard marker for RETURN *, expanded by the planner
1025                if name == "*" {
1026                    return Ok(());
1027                }
1028                if !self.context.contains(name) && !name.starts_with("_anon_") {
1029                    return Err(undefined_variable_error(name, &self.context, ""));
1030                }
1031                Ok(())
1032            }
1033            LogicalExpression::Property { variable, .. } => {
1034                if !self.context.contains(variable) && !variable.starts_with("_anon_") {
1035                    return Err(undefined_variable_error(
1036                        variable,
1037                        &self.context,
1038                        " in property access",
1039                    ));
1040                }
1041                Ok(())
1042            }
1043            LogicalExpression::Literal(_) => Ok(()),
1044            LogicalExpression::Binary { left, right, .. } => {
1045                self.validate_expression(left)?;
1046                self.validate_expression(right)
1047            }
1048            LogicalExpression::Unary { operand, .. } => self.validate_expression(operand),
1049            LogicalExpression::FunctionCall { args, .. } => {
1050                for arg in args {
1051                    self.validate_expression(arg)?;
1052                }
1053                Ok(())
1054            }
1055            LogicalExpression::List(items) => {
1056                for item in items {
1057                    self.validate_expression(item)?;
1058                }
1059                Ok(())
1060            }
1061            LogicalExpression::Map(pairs) => {
1062                for (_, value) in pairs {
1063                    self.validate_expression(value)?;
1064                }
1065                Ok(())
1066            }
1067            LogicalExpression::IndexAccess { base, index } => {
1068                self.validate_expression(base)?;
1069                self.validate_expression(index)
1070            }
1071            LogicalExpression::SliceAccess { base, start, end } => {
1072                self.validate_expression(base)?;
1073                if let Some(s) = start {
1074                    self.validate_expression(s)?;
1075                }
1076                if let Some(e) = end {
1077                    self.validate_expression(e)?;
1078                }
1079                Ok(())
1080            }
1081            LogicalExpression::Case {
1082                operand,
1083                when_clauses,
1084                else_clause,
1085            } => {
1086                if let Some(op) = operand {
1087                    self.validate_expression(op)?;
1088                }
1089                for (cond, result) in when_clauses {
1090                    self.validate_expression(cond)?;
1091                    self.validate_expression(result)?;
1092                }
1093                if let Some(else_expr) = else_clause {
1094                    self.validate_expression(else_expr)?;
1095                }
1096                Ok(())
1097            }
1098            // Parameter references are validated externally
1099            LogicalExpression::Parameter(_) => Ok(()),
1100            // labels(n), type(e), id(n) need the variable to be defined
1101            LogicalExpression::Labels(var)
1102            | LogicalExpression::Type(var)
1103            | LogicalExpression::Id(var) => {
1104                if !self.context.contains(var) && !var.starts_with("_anon_") {
1105                    return Err(undefined_variable_error(var, &self.context, " in function"));
1106                }
1107                Ok(())
1108            }
1109            LogicalExpression::ListComprehension { list_expr, .. } => {
1110                // Validate the list expression against the outer context.
1111                // The filter and map expressions use the iteration variable
1112                // which is locally scoped, so we skip validating them here.
1113                self.validate_expression(list_expr)?;
1114                Ok(())
1115            }
1116            LogicalExpression::ListPredicate { list_expr, .. } => {
1117                // Validate the list expression against the outer context.
1118                // The predicate uses the iteration variable which is locally
1119                // scoped, so we skip validating it against the outer context.
1120                self.validate_expression(list_expr)?;
1121                Ok(())
1122            }
1123            LogicalExpression::ExistsSubquery(subquery)
1124            | LogicalExpression::CountSubquery(subquery)
1125            | LogicalExpression::ValueSubquery(subquery) => {
1126                // Subqueries have their own binding context
1127                // For now, just validate the structure exists
1128                let _ = subquery; // Would need recursive binding
1129                Ok(())
1130            }
1131            LogicalExpression::PatternComprehension {
1132                subplan,
1133                projection,
1134            } => {
1135                // Bind the subplan to register pattern variables (e.g., `f` in `(p)-[:KNOWS]->(f)`)
1136                self.bind_operator(subplan)?;
1137                // Now validate the projection expression (e.g., `f.name`)
1138                self.validate_expression(projection)
1139            }
1140            LogicalExpression::MapProjection { base, entries } => {
1141                if !self.context.contains(base) && !base.starts_with("_anon_") {
1142                    return Err(undefined_variable_error(
1143                        base,
1144                        &self.context,
1145                        " in map projection",
1146                    ));
1147                }
1148                for entry in entries {
1149                    if let crate::query::plan::MapProjectionEntry::LiteralEntry(_, expr) = entry {
1150                        self.validate_expression(expr)?;
1151                    }
1152                }
1153                Ok(())
1154            }
1155            LogicalExpression::Reduce {
1156                accumulator,
1157                initial,
1158                variable,
1159                list,
1160                expression,
1161            } => {
1162                self.validate_expression(initial)?;
1163                self.validate_expression(list)?;
1164                // accumulator and variable are locally scoped: inject them
1165                // into context, validate body, then remove
1166                let had_acc = self.context.contains(accumulator);
1167                let had_var = self.context.contains(variable);
1168                if !had_acc {
1169                    self.context.add_variable(
1170                        accumulator.clone(),
1171                        VariableInfo {
1172                            name: accumulator.clone(),
1173                            data_type: LogicalType::Any,
1174                            is_node: false,
1175                            is_edge: false,
1176                        },
1177                    );
1178                }
1179                if !had_var {
1180                    self.context.add_variable(
1181                        variable.clone(),
1182                        VariableInfo {
1183                            name: variable.clone(),
1184                            data_type: LogicalType::Any,
1185                            is_node: false,
1186                            is_edge: false,
1187                        },
1188                    );
1189                }
1190                self.validate_expression(expression)?;
1191                if !had_acc {
1192                    self.context.remove_variable(accumulator);
1193                }
1194                if !had_var {
1195                    self.context.remove_variable(variable);
1196                }
1197                Ok(())
1198            }
1199        }
1200    }
1201
1202    /// Infers the type of an expression for use in WITH clause aliasing.
1203    fn infer_expression_type(&self, expr: &LogicalExpression) -> LogicalType {
1204        match expr {
1205            LogicalExpression::Variable(name) => {
1206                // Look up the variable type from context
1207                self.context
1208                    .get(name)
1209                    .map_or(LogicalType::Any, |info| info.data_type.clone())
1210            }
1211            LogicalExpression::Property { .. } => LogicalType::Any, // Properties can be any type
1212            LogicalExpression::Literal(value) => {
1213                // Infer type from literal value
1214                use grafeo_common::types::Value;
1215                match value {
1216                    Value::Bool(_) => LogicalType::Bool,
1217                    Value::Int64(_) => LogicalType::Int64,
1218                    Value::Float64(_) => LogicalType::Float64,
1219                    Value::String(_) => LogicalType::String,
1220                    Value::List(_) => LogicalType::Any, // Complex type
1221                    Value::Map(_) => LogicalType::Any,  // Complex type
1222                    Value::Null => LogicalType::Any,
1223                    _ => LogicalType::Any,
1224                }
1225            }
1226            LogicalExpression::Binary { .. } => LogicalType::Any, // Could be bool or numeric
1227            LogicalExpression::Unary { .. } => LogicalType::Any,
1228            LogicalExpression::FunctionCall { name, .. } => {
1229                // Infer based on function name
1230                match name.to_lowercase().as_str() {
1231                    "count" | "sum" | "id" => LogicalType::Int64,
1232                    "avg" => LogicalType::Float64,
1233                    "type" => LogicalType::String,
1234                    // List-returning functions use Any since we don't track element type
1235                    "labels" | "collect" => LogicalType::Any,
1236                    _ => LogicalType::Any,
1237                }
1238            }
1239            LogicalExpression::List(_) => LogicalType::Any, // Complex type
1240            LogicalExpression::Map(_) => LogicalType::Any,  // Complex type
1241            _ => LogicalType::Any,
1242        }
1243    }
1244
1245    /// Binds a join operator.
1246    fn bind_join(&mut self, join: &crate::query::plan::JoinOp) -> Result<()> {
1247        // Bind both sides of the join
1248        self.bind_operator(&join.left)?;
1249        self.bind_operator(&join.right)?;
1250
1251        // Validate join conditions
1252        for condition in &join.conditions {
1253            self.validate_expression(&condition.left)?;
1254            self.validate_expression(&condition.right)?;
1255        }
1256
1257        Ok(())
1258    }
1259
1260    /// Binds an aggregate operator.
1261    fn bind_aggregate(&mut self, agg: &crate::query::plan::AggregateOp) -> Result<()> {
1262        // Bind the input first
1263        self.bind_operator(&agg.input)?;
1264
1265        // Validate group by expressions
1266        for expr in &agg.group_by {
1267            self.validate_expression(expr)?;
1268        }
1269
1270        // Validate aggregate expressions
1271        for agg_expr in &agg.aggregates {
1272            if let Some(ref expr) = agg_expr.expression {
1273                self.validate_expression(expr)?;
1274            }
1275            // Add the alias as a new variable if present
1276            if let Some(ref alias) = agg_expr.alias {
1277                self.context.add_variable(
1278                    alias.clone(),
1279                    VariableInfo {
1280                        name: alias.clone(),
1281                        data_type: LogicalType::Any,
1282                        is_node: false,
1283                        is_edge: false,
1284                    },
1285                );
1286            }
1287        }
1288
1289        // Register group-by output column names so ORDER BY / HAVING
1290        // can reference them (e.g. "n.city" from Property(n, city)).
1291        for expr in &agg.group_by {
1292            let col_name = crate::query::planner::common::expression_to_string(expr);
1293            if !self.context.contains(&col_name) {
1294                self.context.add_variable(
1295                    col_name.clone(),
1296                    VariableInfo {
1297                        name: col_name,
1298                        data_type: LogicalType::Any,
1299                        is_node: false,
1300                        is_edge: false,
1301                    },
1302                );
1303            }
1304        }
1305
1306        Ok(())
1307    }
1308}
1309
1310impl Default for Binder {
1311    fn default() -> Self {
1312        Self::new()
1313    }
1314}
1315
1316#[cfg(test)]
1317mod tests {
1318    use super::*;
1319    use crate::query::plan::{BinaryOp, FilterOp};
1320
1321    #[test]
1322    fn test_bind_simple_scan() {
1323        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1324            items: vec![ReturnItem {
1325                expression: LogicalExpression::Variable("n".to_string()),
1326                alias: None,
1327            }],
1328            distinct: false,
1329            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1330                variable: "n".to_string(),
1331                label: Some("Person".to_string()),
1332                input: None,
1333            })),
1334        }));
1335
1336        let mut binder = Binder::new();
1337        let result = binder.bind(&plan);
1338
1339        assert!(result.is_ok());
1340        let ctx = result.unwrap();
1341        assert!(ctx.contains("n"));
1342        assert!(ctx.get("n").unwrap().is_node);
1343    }
1344
1345    #[test]
1346    fn test_bind_undefined_variable() {
1347        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1348            items: vec![ReturnItem {
1349                expression: LogicalExpression::Variable("undefined".to_string()),
1350                alias: None,
1351            }],
1352            distinct: false,
1353            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1354                variable: "n".to_string(),
1355                label: None,
1356                input: None,
1357            })),
1358        }));
1359
1360        let mut binder = Binder::new();
1361        let result = binder.bind(&plan);
1362
1363        assert!(result.is_err());
1364        let err = result.unwrap_err();
1365        assert!(err.to_string().contains("Undefined variable"));
1366    }
1367
1368    #[test]
1369    fn test_bind_property_access() {
1370        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1371            items: vec![ReturnItem {
1372                expression: LogicalExpression::Property {
1373                    variable: "n".to_string(),
1374                    property: "name".to_string(),
1375                },
1376                alias: None,
1377            }],
1378            distinct: false,
1379            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1380                variable: "n".to_string(),
1381                label: Some("Person".to_string()),
1382                input: None,
1383            })),
1384        }));
1385
1386        let mut binder = Binder::new();
1387        let result = binder.bind(&plan);
1388
1389        assert!(result.is_ok());
1390    }
1391
1392    #[test]
1393    fn test_bind_filter_with_undefined_variable() {
1394        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1395            items: vec![ReturnItem {
1396                expression: LogicalExpression::Variable("n".to_string()),
1397                alias: None,
1398            }],
1399            distinct: false,
1400            input: Box::new(LogicalOperator::Filter(FilterOp {
1401                predicate: LogicalExpression::Binary {
1402                    left: Box::new(LogicalExpression::Property {
1403                        variable: "m".to_string(), // undefined!
1404                        property: "age".to_string(),
1405                    }),
1406                    op: BinaryOp::Gt,
1407                    right: Box::new(LogicalExpression::Literal(
1408                        grafeo_common::types::Value::Int64(30),
1409                    )),
1410                },
1411                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1412                    variable: "n".to_string(),
1413                    label: None,
1414                    input: None,
1415                })),
1416                pushdown_hint: None,
1417            })),
1418        }));
1419
1420        let mut binder = Binder::new();
1421        let result = binder.bind(&plan);
1422
1423        assert!(result.is_err());
1424        let err = result.unwrap_err();
1425        assert!(err.to_string().contains("Undefined variable 'm'"));
1426    }
1427
1428    #[test]
1429    fn test_bind_expand() {
1430        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode};
1431
1432        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1433            items: vec![
1434                ReturnItem {
1435                    expression: LogicalExpression::Variable("a".to_string()),
1436                    alias: None,
1437                },
1438                ReturnItem {
1439                    expression: LogicalExpression::Variable("b".to_string()),
1440                    alias: None,
1441                },
1442            ],
1443            distinct: false,
1444            input: Box::new(LogicalOperator::Expand(ExpandOp {
1445                from_variable: "a".to_string(),
1446                to_variable: "b".to_string(),
1447                edge_variable: Some("e".to_string()),
1448                direction: ExpandDirection::Outgoing,
1449                edge_types: vec!["KNOWS".to_string()],
1450                min_hops: 1,
1451                max_hops: Some(1),
1452                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1453                    variable: "a".to_string(),
1454                    label: Some("Person".to_string()),
1455                    input: None,
1456                })),
1457                path_alias: None,
1458                path_mode: PathMode::Walk,
1459            })),
1460        }));
1461
1462        let mut binder = Binder::new();
1463        let result = binder.bind(&plan);
1464
1465        assert!(result.is_ok());
1466        let ctx = result.unwrap();
1467        assert!(ctx.contains("a"));
1468        assert!(ctx.contains("b"));
1469        assert!(ctx.contains("e"));
1470        assert!(ctx.get("a").unwrap().is_node);
1471        assert!(ctx.get("b").unwrap().is_node);
1472        assert!(ctx.get("e").unwrap().is_edge);
1473    }
1474
1475    #[test]
1476    fn test_bind_expand_from_undefined_variable() {
1477        // Tests that expanding from an undefined variable produces a clear error
1478        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode};
1479
1480        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1481            items: vec![ReturnItem {
1482                expression: LogicalExpression::Variable("b".to_string()),
1483                alias: None,
1484            }],
1485            distinct: false,
1486            input: Box::new(LogicalOperator::Expand(ExpandOp {
1487                from_variable: "undefined".to_string(), // not defined!
1488                to_variable: "b".to_string(),
1489                edge_variable: None,
1490                direction: ExpandDirection::Outgoing,
1491                edge_types: vec![],
1492                min_hops: 1,
1493                max_hops: Some(1),
1494                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1495                    variable: "a".to_string(),
1496                    label: None,
1497                    input: None,
1498                })),
1499                path_alias: None,
1500                path_mode: PathMode::Walk,
1501            })),
1502        }));
1503
1504        let mut binder = Binder::new();
1505        let result = binder.bind(&plan);
1506
1507        assert!(result.is_err());
1508        let err = result.unwrap_err();
1509        assert!(
1510            err.to_string().contains("Undefined variable 'undefined'"),
1511            "Expected error about undefined variable, got: {}",
1512            err
1513        );
1514    }
1515
1516    #[test]
1517    fn test_bind_return_with_aggregate_and_non_aggregate() {
1518        // Tests binding of aggregate functions alongside regular expressions
1519        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1520            items: vec![
1521                ReturnItem {
1522                    expression: LogicalExpression::FunctionCall {
1523                        name: "count".to_string(),
1524                        args: vec![LogicalExpression::Variable("n".to_string())],
1525                        distinct: false,
1526                    },
1527                    alias: Some("cnt".to_string()),
1528                },
1529                ReturnItem {
1530                    expression: LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1531                    alias: Some("one".to_string()),
1532                },
1533            ],
1534            distinct: false,
1535            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1536                variable: "n".to_string(),
1537                label: Some("Person".to_string()),
1538                input: None,
1539            })),
1540        }));
1541
1542        let mut binder = Binder::new();
1543        let result = binder.bind(&plan);
1544
1545        // This should succeed - count(n) with literal is valid
1546        assert!(result.is_ok());
1547    }
1548
1549    #[test]
1550    fn test_bind_nested_property_access() {
1551        // Tests that nested property access on the same variable works
1552        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1553            items: vec![
1554                ReturnItem {
1555                    expression: LogicalExpression::Property {
1556                        variable: "n".to_string(),
1557                        property: "name".to_string(),
1558                    },
1559                    alias: None,
1560                },
1561                ReturnItem {
1562                    expression: LogicalExpression::Property {
1563                        variable: "n".to_string(),
1564                        property: "age".to_string(),
1565                    },
1566                    alias: None,
1567                },
1568            ],
1569            distinct: false,
1570            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1571                variable: "n".to_string(),
1572                label: Some("Person".to_string()),
1573                input: None,
1574            })),
1575        }));
1576
1577        let mut binder = Binder::new();
1578        let result = binder.bind(&plan);
1579
1580        assert!(result.is_ok());
1581    }
1582
1583    #[test]
1584    fn test_bind_binary_expression_with_undefined() {
1585        // Tests that binary expressions with undefined variables produce errors
1586        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1587            items: vec![ReturnItem {
1588                expression: LogicalExpression::Binary {
1589                    left: Box::new(LogicalExpression::Property {
1590                        variable: "n".to_string(),
1591                        property: "age".to_string(),
1592                    }),
1593                    op: BinaryOp::Add,
1594                    right: Box::new(LogicalExpression::Property {
1595                        variable: "m".to_string(), // undefined!
1596                        property: "age".to_string(),
1597                    }),
1598                },
1599                alias: Some("total".to_string()),
1600            }],
1601            distinct: false,
1602            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1603                variable: "n".to_string(),
1604                label: None,
1605                input: None,
1606            })),
1607        }));
1608
1609        let mut binder = Binder::new();
1610        let result = binder.bind(&plan);
1611
1612        assert!(result.is_err());
1613        assert!(
1614            result
1615                .unwrap_err()
1616                .to_string()
1617                .contains("Undefined variable 'm'")
1618        );
1619    }
1620
1621    #[test]
1622    fn test_bind_duplicate_variable_definition() {
1623        // Tests behavior when the same variable is defined twice (via two NodeScans)
1624        // This is typically not allowed or the second shadows the first
1625        use crate::query::plan::{JoinOp, JoinType};
1626
1627        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1628            items: vec![ReturnItem {
1629                expression: LogicalExpression::Variable("n".to_string()),
1630                alias: None,
1631            }],
1632            distinct: false,
1633            input: Box::new(LogicalOperator::Join(JoinOp {
1634                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1635                    variable: "n".to_string(),
1636                    label: Some("A".to_string()),
1637                    input: None,
1638                })),
1639                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1640                    variable: "m".to_string(), // different variable is fine
1641                    label: Some("B".to_string()),
1642                    input: None,
1643                })),
1644                join_type: JoinType::Inner,
1645                conditions: vec![],
1646            })),
1647        }));
1648
1649        let mut binder = Binder::new();
1650        let result = binder.bind(&plan);
1651
1652        // Join with different variables should work
1653        assert!(result.is_ok());
1654        let ctx = result.unwrap();
1655        assert!(ctx.contains("n"));
1656        assert!(ctx.contains("m"));
1657    }
1658
1659    #[test]
1660    fn test_bind_function_with_wrong_arity() {
1661        // Tests that functions with wrong number of arguments are handled
1662        // (behavior depends on whether binder validates arity)
1663        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1664            items: vec![ReturnItem {
1665                expression: LogicalExpression::FunctionCall {
1666                    name: "count".to_string(),
1667                    args: vec![], // count() needs an argument
1668                    distinct: false,
1669                },
1670                alias: None,
1671            }],
1672            distinct: false,
1673            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1674                variable: "n".to_string(),
1675                label: None,
1676                input: None,
1677            })),
1678        }));
1679
1680        let mut binder = Binder::new();
1681        let result = binder.bind(&plan);
1682
1683        // The binder may or may not catch this - if it passes, execution will fail
1684        // This test documents current behavior
1685        // If binding fails, that's fine; if it passes, execution will handle it
1686        let _ = result; // We're just testing it doesn't panic
1687    }
1688
1689    // --- Mutation operator validation ---
1690
1691    #[test]
1692    fn test_create_edge_rejects_undefined_source() {
1693        use crate::query::plan::CreateEdgeOp;
1694
1695        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1696            variable: Some("e".to_string()),
1697            from_variable: "ghost".to_string(), // not defined!
1698            to_variable: "b".to_string(),
1699            edge_type: "KNOWS".to_string(),
1700            properties: vec![],
1701            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1702                variable: "b".to_string(),
1703                label: None,
1704                input: None,
1705            })),
1706        }));
1707
1708        let mut binder = Binder::new();
1709        let err = binder.bind(&plan).unwrap_err();
1710        assert!(
1711            err.to_string().contains("Undefined variable 'ghost'"),
1712            "Should reject undefined source variable, got: {err}"
1713        );
1714    }
1715
1716    #[test]
1717    fn test_create_edge_rejects_undefined_target() {
1718        use crate::query::plan::CreateEdgeOp;
1719
1720        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1721            variable: None,
1722            from_variable: "a".to_string(),
1723            to_variable: "missing".to_string(), // not defined!
1724            edge_type: "KNOWS".to_string(),
1725            properties: vec![],
1726            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1727                variable: "a".to_string(),
1728                label: None,
1729                input: None,
1730            })),
1731        }));
1732
1733        let mut binder = Binder::new();
1734        let err = binder.bind(&plan).unwrap_err();
1735        assert!(
1736            err.to_string().contains("Undefined variable 'missing'"),
1737            "Should reject undefined target variable, got: {err}"
1738        );
1739    }
1740
1741    #[test]
1742    fn test_create_edge_validates_property_expressions() {
1743        use crate::query::plan::CreateEdgeOp;
1744
1745        // Source and target defined, but property references undefined variable
1746        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1747            variable: Some("e".to_string()),
1748            from_variable: "a".to_string(),
1749            to_variable: "b".to_string(),
1750            edge_type: "KNOWS".to_string(),
1751            properties: vec![(
1752                "since".to_string(),
1753                LogicalExpression::Property {
1754                    variable: "x".to_string(), // undefined!
1755                    property: "year".to_string(),
1756                },
1757            )],
1758            input: Box::new(LogicalOperator::Join(crate::query::plan::JoinOp {
1759                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1760                    variable: "a".to_string(),
1761                    label: None,
1762                    input: None,
1763                })),
1764                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1765                    variable: "b".to_string(),
1766                    label: None,
1767                    input: None,
1768                })),
1769                join_type: crate::query::plan::JoinType::Inner,
1770                conditions: vec![],
1771            })),
1772        }));
1773
1774        let mut binder = Binder::new();
1775        let err = binder.bind(&plan).unwrap_err();
1776        assert!(err.to_string().contains("Undefined variable 'x'"));
1777    }
1778
1779    #[test]
1780    fn test_set_property_rejects_undefined_variable() {
1781        use crate::query::plan::SetPropertyOp;
1782
1783        let plan = LogicalPlan::new(LogicalOperator::SetProperty(SetPropertyOp {
1784            variable: "ghost".to_string(),
1785            properties: vec![(
1786                "name".to_string(),
1787                LogicalExpression::Literal(grafeo_common::types::Value::String("Alix".into())),
1788            )],
1789            replace: false,
1790            is_edge: false,
1791            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1792                variable: "n".to_string(),
1793                label: None,
1794                input: None,
1795            })),
1796        }));
1797
1798        let mut binder = Binder::new();
1799        let err = binder.bind(&plan).unwrap_err();
1800        assert!(
1801            err.to_string().contains("in SET"),
1802            "Error should indicate SET context, got: {err}"
1803        );
1804    }
1805
1806    #[test]
1807    fn test_delete_node_rejects_undefined_variable() {
1808        use crate::query::plan::DeleteNodeOp;
1809
1810        let plan = LogicalPlan::new(LogicalOperator::DeleteNode(DeleteNodeOp {
1811            variable: "phantom".to_string(),
1812            detach: false,
1813            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1814                variable: "n".to_string(),
1815                label: None,
1816                input: None,
1817            })),
1818        }));
1819
1820        let mut binder = Binder::new();
1821        let err = binder.bind(&plan).unwrap_err();
1822        assert!(err.to_string().contains("Undefined variable 'phantom'"));
1823    }
1824
1825    #[test]
1826    fn test_delete_edge_rejects_undefined_variable() {
1827        use crate::query::plan::DeleteEdgeOp;
1828
1829        let plan = LogicalPlan::new(LogicalOperator::DeleteEdge(DeleteEdgeOp {
1830            variable: "gone".to_string(),
1831            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1832                variable: "n".to_string(),
1833                label: None,
1834                input: None,
1835            })),
1836        }));
1837
1838        let mut binder = Binder::new();
1839        let err = binder.bind(&plan).unwrap_err();
1840        assert!(err.to_string().contains("Undefined variable 'gone'"));
1841    }
1842
1843    // --- WITH/Project clause ---
1844
1845    #[test]
1846    fn test_project_alias_becomes_available_downstream() {
1847        use crate::query::plan::{ProjectOp, Projection};
1848
1849        // WITH n.name AS person_name RETURN person_name
1850        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1851            items: vec![ReturnItem {
1852                expression: LogicalExpression::Variable("person_name".to_string()),
1853                alias: None,
1854            }],
1855            distinct: false,
1856            input: Box::new(LogicalOperator::Project(ProjectOp {
1857                projections: vec![Projection {
1858                    expression: LogicalExpression::Property {
1859                        variable: "n".to_string(),
1860                        property: "name".to_string(),
1861                    },
1862                    alias: Some("person_name".to_string()),
1863                }],
1864                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1865                    variable: "n".to_string(),
1866                    label: None,
1867                    input: None,
1868                })),
1869                pass_through_input: false,
1870            })),
1871        }));
1872
1873        let mut binder = Binder::new();
1874        let ctx = binder.bind(&plan).unwrap();
1875        assert!(
1876            ctx.contains("person_name"),
1877            "WITH alias should be available to RETURN"
1878        );
1879    }
1880
1881    #[test]
1882    fn test_project_rejects_undefined_expression() {
1883        use crate::query::plan::{ProjectOp, Projection};
1884
1885        let plan = LogicalPlan::new(LogicalOperator::Project(ProjectOp {
1886            projections: vec![Projection {
1887                expression: LogicalExpression::Variable("nope".to_string()),
1888                alias: Some("x".to_string()),
1889            }],
1890            input: Box::new(LogicalOperator::Empty),
1891            pass_through_input: false,
1892        }));
1893
1894        let mut binder = Binder::new();
1895        let result = binder.bind(&plan);
1896        assert!(result.is_err(), "WITH on undefined variable should fail");
1897    }
1898
1899    // --- UNWIND ---
1900
1901    #[test]
1902    fn test_unwind_adds_element_variable() {
1903        use crate::query::plan::UnwindOp;
1904
1905        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1906            items: vec![ReturnItem {
1907                expression: LogicalExpression::Variable("item".to_string()),
1908                alias: None,
1909            }],
1910            distinct: false,
1911            input: Box::new(LogicalOperator::Unwind(UnwindOp {
1912                expression: LogicalExpression::List(vec![
1913                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1914                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(2)),
1915                ]),
1916                variable: "item".to_string(),
1917                ordinality_var: None,
1918                offset_var: None,
1919                input: Box::new(LogicalOperator::Empty),
1920            })),
1921        }));
1922
1923        let mut binder = Binder::new();
1924        let ctx = binder.bind(&plan).unwrap();
1925        assert!(ctx.contains("item"), "UNWIND variable should be in scope");
1926        let info = ctx.get("item").unwrap();
1927        assert!(
1928            !info.is_node && !info.is_edge,
1929            "UNWIND variable is not a graph element"
1930        );
1931    }
1932
1933    // --- MERGE ---
1934
1935    #[test]
1936    fn test_merge_adds_variable_and_validates_properties() {
1937        use crate::query::plan::MergeOp;
1938
1939        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1940            items: vec![ReturnItem {
1941                expression: LogicalExpression::Variable("m".to_string()),
1942                alias: None,
1943            }],
1944            distinct: false,
1945            input: Box::new(LogicalOperator::Merge(MergeOp {
1946                variable: "m".to_string(),
1947                labels: vec!["Person".to_string()],
1948                match_properties: vec![(
1949                    "name".to_string(),
1950                    LogicalExpression::Literal(grafeo_common::types::Value::String("Alix".into())),
1951                )],
1952                on_create: vec![(
1953                    "created".to_string(),
1954                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1955                )],
1956                on_match: vec![(
1957                    "updated".to_string(),
1958                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1959                )],
1960                input: Box::new(LogicalOperator::Empty),
1961            })),
1962        }));
1963
1964        let mut binder = Binder::new();
1965        let ctx = binder.bind(&plan).unwrap();
1966        assert!(ctx.contains("m"));
1967        assert!(
1968            ctx.get("m").unwrap().is_node,
1969            "MERGE variable should be a node"
1970        );
1971    }
1972
1973    #[test]
1974    fn test_merge_rejects_undefined_in_on_create() {
1975        use crate::query::plan::MergeOp;
1976
1977        let plan = LogicalPlan::new(LogicalOperator::Merge(MergeOp {
1978            variable: "m".to_string(),
1979            labels: vec![],
1980            match_properties: vec![],
1981            on_create: vec![(
1982                "name".to_string(),
1983                LogicalExpression::Property {
1984                    variable: "other".to_string(), // undefined!
1985                    property: "name".to_string(),
1986                },
1987            )],
1988            on_match: vec![],
1989            input: Box::new(LogicalOperator::Empty),
1990        }));
1991
1992        let mut binder = Binder::new();
1993        let result = binder.bind(&plan);
1994        assert!(
1995            result.is_err(),
1996            "ON CREATE referencing undefined variable should fail"
1997        );
1998    }
1999
2000    // --- ShortestPath ---
2001
2002    #[test]
2003    fn test_shortest_path_rejects_undefined_source() {
2004        use crate::query::plan::{ExpandDirection, ShortestPathOp};
2005
2006        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
2007            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2008                variable: "b".to_string(),
2009                label: None,
2010                input: None,
2011            })),
2012            source_var: "missing".to_string(), // not defined
2013            target_var: "b".to_string(),
2014            edge_types: vec![],
2015            direction: ExpandDirection::Both,
2016            path_alias: "p".to_string(),
2017            all_paths: false,
2018        }));
2019
2020        let mut binder = Binder::new();
2021        let err = binder.bind(&plan).unwrap_err();
2022        assert!(
2023            err.to_string().contains("source in shortestPath"),
2024            "Error should mention shortestPath source context, got: {err}"
2025        );
2026    }
2027
2028    #[test]
2029    fn test_shortest_path_adds_path_and_length_variables() {
2030        use crate::query::plan::{ExpandDirection, JoinOp, JoinType, ShortestPathOp};
2031
2032        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
2033            input: Box::new(LogicalOperator::Join(JoinOp {
2034                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2035                    variable: "a".to_string(),
2036                    label: None,
2037                    input: None,
2038                })),
2039                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2040                    variable: "b".to_string(),
2041                    label: None,
2042                    input: None,
2043                })),
2044                join_type: JoinType::Cross,
2045                conditions: vec![],
2046            })),
2047            source_var: "a".to_string(),
2048            target_var: "b".to_string(),
2049            edge_types: vec!["ROAD".to_string()],
2050            direction: ExpandDirection::Outgoing,
2051            path_alias: "p".to_string(),
2052            all_paths: false,
2053        }));
2054
2055        let mut binder = Binder::new();
2056        let ctx = binder.bind(&plan).unwrap();
2057        assert!(ctx.contains("p"), "Path alias should be bound");
2058        assert!(
2059            ctx.contains("_path_length_p"),
2060            "Path length variable should be auto-created"
2061        );
2062    }
2063
2064    // --- Expression validation edge cases ---
2065
2066    #[test]
2067    fn test_case_expression_validates_all_branches() {
2068        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2069            items: vec![ReturnItem {
2070                expression: LogicalExpression::Case {
2071                    operand: None,
2072                    when_clauses: vec![
2073                        (
2074                            LogicalExpression::Binary {
2075                                left: Box::new(LogicalExpression::Property {
2076                                    variable: "n".to_string(),
2077                                    property: "age".to_string(),
2078                                }),
2079                                op: BinaryOp::Gt,
2080                                right: Box::new(LogicalExpression::Literal(
2081                                    grafeo_common::types::Value::Int64(18),
2082                                )),
2083                            },
2084                            LogicalExpression::Literal(grafeo_common::types::Value::String(
2085                                "adult".into(),
2086                            )),
2087                        ),
2088                        (
2089                            // This branch references undefined variable
2090                            LogicalExpression::Property {
2091                                variable: "ghost".to_string(),
2092                                property: "flag".to_string(),
2093                            },
2094                            LogicalExpression::Literal(grafeo_common::types::Value::String(
2095                                "flagged".into(),
2096                            )),
2097                        ),
2098                    ],
2099                    else_clause: Some(Box::new(LogicalExpression::Literal(
2100                        grafeo_common::types::Value::String("other".into()),
2101                    ))),
2102                },
2103                alias: None,
2104            }],
2105            distinct: false,
2106            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2107                variable: "n".to_string(),
2108                label: None,
2109                input: None,
2110            })),
2111        }));
2112
2113        let mut binder = Binder::new();
2114        let err = binder.bind(&plan).unwrap_err();
2115        assert!(
2116            err.to_string().contains("ghost"),
2117            "CASE should validate all when-clause conditions"
2118        );
2119    }
2120
2121    #[test]
2122    fn test_case_expression_validates_else_clause() {
2123        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2124            items: vec![ReturnItem {
2125                expression: LogicalExpression::Case {
2126                    operand: None,
2127                    when_clauses: vec![(
2128                        LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
2129                        LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
2130                    )],
2131                    else_clause: Some(Box::new(LogicalExpression::Property {
2132                        variable: "missing".to_string(),
2133                        property: "x".to_string(),
2134                    })),
2135                },
2136                alias: None,
2137            }],
2138            distinct: false,
2139            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2140                variable: "n".to_string(),
2141                label: None,
2142                input: None,
2143            })),
2144        }));
2145
2146        let mut binder = Binder::new();
2147        let err = binder.bind(&plan).unwrap_err();
2148        assert!(
2149            err.to_string().contains("missing"),
2150            "CASE ELSE should validate its expression too"
2151        );
2152    }
2153
2154    #[test]
2155    fn test_slice_access_validates_expressions() {
2156        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2157            items: vec![ReturnItem {
2158                expression: LogicalExpression::SliceAccess {
2159                    base: Box::new(LogicalExpression::Variable("n".to_string())),
2160                    start: Some(Box::new(LogicalExpression::Variable(
2161                        "undefined_start".to_string(),
2162                    ))),
2163                    end: None,
2164                },
2165                alias: None,
2166            }],
2167            distinct: false,
2168            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2169                variable: "n".to_string(),
2170                label: None,
2171                input: None,
2172            })),
2173        }));
2174
2175        let mut binder = Binder::new();
2176        let err = binder.bind(&plan).unwrap_err();
2177        assert!(err.to_string().contains("undefined_start"));
2178    }
2179
2180    #[test]
2181    fn test_list_comprehension_validates_list_source() {
2182        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2183            items: vec![ReturnItem {
2184                expression: LogicalExpression::ListComprehension {
2185                    variable: "x".to_string(),
2186                    list_expr: Box::new(LogicalExpression::Variable("not_defined".to_string())),
2187                    filter_expr: None,
2188                    map_expr: Box::new(LogicalExpression::Variable("x".to_string())),
2189                },
2190                alias: None,
2191            }],
2192            distinct: false,
2193            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2194                variable: "n".to_string(),
2195                label: None,
2196                input: None,
2197            })),
2198        }));
2199
2200        let mut binder = Binder::new();
2201        let err = binder.bind(&plan).unwrap_err();
2202        assert!(
2203            err.to_string().contains("not_defined"),
2204            "List comprehension should validate source list expression"
2205        );
2206    }
2207
2208    #[test]
2209    fn test_labels_type_id_reject_undefined() {
2210        // labels(x) where x is not defined
2211        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2212            items: vec![ReturnItem {
2213                expression: LogicalExpression::Labels("x".to_string()),
2214                alias: None,
2215            }],
2216            distinct: false,
2217            input: Box::new(LogicalOperator::Empty),
2218        }));
2219
2220        let mut binder = Binder::new();
2221        assert!(
2222            binder.bind(&plan).is_err(),
2223            "labels(x) on undefined x should fail"
2224        );
2225
2226        // type(e) where e is not defined
2227        let plan2 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2228            items: vec![ReturnItem {
2229                expression: LogicalExpression::Type("e".to_string()),
2230                alias: None,
2231            }],
2232            distinct: false,
2233            input: Box::new(LogicalOperator::Empty),
2234        }));
2235
2236        let mut binder2 = Binder::new();
2237        assert!(
2238            binder2.bind(&plan2).is_err(),
2239            "type(e) on undefined e should fail"
2240        );
2241
2242        // id(n) where n is not defined
2243        let plan3 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2244            items: vec![ReturnItem {
2245                expression: LogicalExpression::Id("n".to_string()),
2246                alias: None,
2247            }],
2248            distinct: false,
2249            input: Box::new(LogicalOperator::Empty),
2250        }));
2251
2252        let mut binder3 = Binder::new();
2253        assert!(
2254            binder3.bind(&plan3).is_err(),
2255            "id(n) on undefined n should fail"
2256        );
2257    }
2258
2259    #[test]
2260    fn test_expand_rejects_non_node_source() {
2261        use crate::query::plan::{ExpandDirection, ExpandOp, PathMode, UnwindOp};
2262
2263        // UNWIND [1,2] AS x  -- x is not a node
2264        // MATCH (x)-[:E]->(b)  -- should fail: x isn't a node
2265        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2266            items: vec![ReturnItem {
2267                expression: LogicalExpression::Variable("b".to_string()),
2268                alias: None,
2269            }],
2270            distinct: false,
2271            input: Box::new(LogicalOperator::Expand(ExpandOp {
2272                from_variable: "x".to_string(),
2273                to_variable: "b".to_string(),
2274                edge_variable: None,
2275                direction: ExpandDirection::Outgoing,
2276                edge_types: vec![],
2277                min_hops: 1,
2278                max_hops: Some(1),
2279                input: Box::new(LogicalOperator::Unwind(UnwindOp {
2280                    expression: LogicalExpression::List(vec![]),
2281                    variable: "x".to_string(),
2282                    ordinality_var: None,
2283                    offset_var: None,
2284                    input: Box::new(LogicalOperator::Empty),
2285                })),
2286                path_alias: None,
2287                path_mode: PathMode::Walk,
2288            })),
2289        }));
2290
2291        let mut binder = Binder::new();
2292        let err = binder.bind(&plan).unwrap_err();
2293        assert!(
2294            err.to_string().contains("not a node"),
2295            "Expanding from non-node should fail, got: {err}"
2296        );
2297    }
2298
2299    #[test]
2300    fn test_add_label_rejects_undefined_variable() {
2301        use crate::query::plan::AddLabelOp;
2302
2303        let plan = LogicalPlan::new(LogicalOperator::AddLabel(AddLabelOp {
2304            variable: "missing".to_string(),
2305            labels: vec!["Admin".to_string()],
2306            input: Box::new(LogicalOperator::Empty),
2307        }));
2308
2309        let mut binder = Binder::new();
2310        let err = binder.bind(&plan).unwrap_err();
2311        assert!(err.to_string().contains("SET labels"));
2312    }
2313
2314    #[test]
2315    fn test_remove_label_rejects_undefined_variable() {
2316        use crate::query::plan::RemoveLabelOp;
2317
2318        let plan = LogicalPlan::new(LogicalOperator::RemoveLabel(RemoveLabelOp {
2319            variable: "missing".to_string(),
2320            labels: vec!["Admin".to_string()],
2321            input: Box::new(LogicalOperator::Empty),
2322        }));
2323
2324        let mut binder = Binder::new();
2325        let err = binder.bind(&plan).unwrap_err();
2326        assert!(err.to_string().contains("REMOVE labels"));
2327    }
2328
2329    #[test]
2330    fn test_sort_validates_key_expressions() {
2331        use crate::query::plan::{SortKey, SortOp, SortOrder};
2332
2333        let plan = LogicalPlan::new(LogicalOperator::Sort(SortOp {
2334            keys: vec![SortKey {
2335                expression: LogicalExpression::Property {
2336                    variable: "missing".to_string(),
2337                    property: "name".to_string(),
2338                },
2339                order: SortOrder::Ascending,
2340                nulls: None,
2341            }],
2342            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2343                variable: "n".to_string(),
2344                label: None,
2345                input: None,
2346            })),
2347        }));
2348
2349        let mut binder = Binder::new();
2350        assert!(
2351            binder.bind(&plan).is_err(),
2352            "ORDER BY on undefined variable should fail"
2353        );
2354    }
2355
2356    #[test]
2357    fn test_create_node_adds_variable_before_property_validation() {
2358        use crate::query::plan::CreateNodeOp;
2359
2360        // CREATE (n:Person {friend: n.name}) - referencing the node being created
2361        // The variable should be available for property expressions (self-reference)
2362        let plan = LogicalPlan::new(LogicalOperator::CreateNode(CreateNodeOp {
2363            variable: "n".to_string(),
2364            labels: vec!["Person".to_string()],
2365            properties: vec![(
2366                "self_ref".to_string(),
2367                LogicalExpression::Property {
2368                    variable: "n".to_string(),
2369                    property: "name".to_string(),
2370                },
2371            )],
2372            input: None,
2373        }));
2374
2375        let mut binder = Binder::new();
2376        // This should succeed because CreateNode adds the variable before validating properties
2377        let ctx = binder.bind(&plan).unwrap();
2378        assert!(ctx.get("n").unwrap().is_node);
2379    }
2380
2381    #[test]
2382    fn test_undefined_variable_suggests_similar() {
2383        // 'person' is defined, user types 'persn' - should get a suggestion
2384        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2385            items: vec![ReturnItem {
2386                expression: LogicalExpression::Variable("persn".to_string()),
2387                alias: None,
2388            }],
2389            distinct: false,
2390            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2391                variable: "person".to_string(),
2392                label: None,
2393                input: None,
2394            })),
2395        }));
2396
2397        let mut binder = Binder::new();
2398        let err = binder.bind(&plan).unwrap_err();
2399        let msg = err.to_string();
2400        // The error should contain the variable name at minimum
2401        assert!(
2402            msg.contains("persn"),
2403            "Error should mention the undefined variable"
2404        );
2405    }
2406
2407    #[test]
2408    fn test_anon_variables_skip_validation() {
2409        // Variables starting with _anon_ are anonymous and should be silently accepted
2410        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2411            items: vec![ReturnItem {
2412                expression: LogicalExpression::Variable("_anon_42".to_string()),
2413                alias: None,
2414            }],
2415            distinct: false,
2416            input: Box::new(LogicalOperator::Empty),
2417        }));
2418
2419        let mut binder = Binder::new();
2420        let result = binder.bind(&plan);
2421        assert!(
2422            result.is_ok(),
2423            "Anonymous variables should bypass validation"
2424        );
2425    }
2426
2427    #[test]
2428    fn test_map_expression_validates_values() {
2429        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2430            items: vec![ReturnItem {
2431                expression: LogicalExpression::Map(vec![(
2432                    "key".to_string(),
2433                    LogicalExpression::Variable("undefined".to_string()),
2434                )]),
2435                alias: None,
2436            }],
2437            distinct: false,
2438            input: Box::new(LogicalOperator::Empty),
2439        }));
2440
2441        let mut binder = Binder::new();
2442        assert!(
2443            binder.bind(&plan).is_err(),
2444            "Map values should be validated"
2445        );
2446    }
2447
2448    #[test]
2449    fn test_vector_scan_validates_query_vector() {
2450        use crate::query::plan::VectorScanOp;
2451
2452        let plan = LogicalPlan::new(LogicalOperator::VectorScan(VectorScanOp {
2453            variable: "result".to_string(),
2454            index_name: None,
2455            property: "embedding".to_string(),
2456            label: Some("Doc".to_string()),
2457            query_vector: LogicalExpression::Variable("undefined_vec".to_string()),
2458            k: 10,
2459            metric: None,
2460            min_similarity: None,
2461            max_distance: None,
2462            input: None,
2463        }));
2464
2465        let mut binder = Binder::new();
2466        let err = binder.bind(&plan).unwrap_err();
2467        assert!(err.to_string().contains("undefined_vec"));
2468    }
2469}