Skip to main content

grafeo_engine/query/
binder.rs

1//! Semantic validation - catching errors before execution.
2//!
3//! The binder walks the logical plan and validates that everything makes sense:
4//! - Is that variable actually defined? (You can't use `RETURN x` if `x` wasn't matched)
5//! - Does that property access make sense? (Accessing `.age` on an integer fails)
6//! - Are types compatible? (Can't compare a string to an integer)
7//!
8//! Better to catch these errors early than waste time executing a broken query.
9
10use crate::query::plan::{
11    ExpandOp, FilterOp, LogicalExpression, LogicalOperator, LogicalPlan, NodeScanOp, ReturnItem,
12    ReturnOp, TripleScanOp,
13};
14use grafeo_common::types::LogicalType;
15use grafeo_common::utils::error::{Error, QueryError, QueryErrorKind, Result};
16use grafeo_common::utils::strings::{find_similar, format_suggestion};
17use std::collections::HashMap;
18
19/// Creates a semantic binding error.
20fn binding_error(message: impl Into<String>) -> Error {
21    Error::Query(QueryError::new(QueryErrorKind::Semantic, message))
22}
23
24/// Creates a semantic binding error with a hint.
25fn binding_error_with_hint(message: impl Into<String>, hint: impl Into<String>) -> Error {
26    Error::Query(QueryError::new(QueryErrorKind::Semantic, message).with_hint(hint))
27}
28
29/// Creates an "undefined variable" error with a suggestion if a similar variable exists.
30fn undefined_variable_error(variable: &str, context: &BindingContext, suffix: &str) -> Error {
31    let candidates: Vec<String> = context.variable_names().to_vec();
32    let candidates_ref: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
33
34    if let Some(suggestion) = find_similar(variable, &candidates_ref) {
35        binding_error_with_hint(
36            format!("Undefined variable '{variable}'{suffix}"),
37            format_suggestion(suggestion),
38        )
39    } else {
40        binding_error(format!("Undefined variable '{variable}'{suffix}"))
41    }
42}
43
44/// Information about a bound variable.
45#[derive(Debug, Clone)]
46pub struct VariableInfo {
47    /// The name of the variable.
48    pub name: String,
49    /// The inferred type of the variable.
50    pub data_type: LogicalType,
51    /// Whether this variable is a node.
52    pub is_node: bool,
53    /// Whether this variable is an edge.
54    pub is_edge: bool,
55}
56
57/// Context containing all bound variables and their information.
58#[derive(Debug, Clone, Default)]
59pub struct BindingContext {
60    /// Map from variable name to its info.
61    variables: HashMap<String, VariableInfo>,
62    /// Variables in order of definition.
63    order: Vec<String>,
64}
65
66impl BindingContext {
67    /// Creates a new empty binding context.
68    #[must_use]
69    pub fn new() -> Self {
70        Self {
71            variables: HashMap::new(),
72            order: Vec::new(),
73        }
74    }
75
76    /// Adds a variable to the context.
77    pub fn add_variable(&mut self, name: String, info: VariableInfo) {
78        if !self.variables.contains_key(&name) {
79            self.order.push(name.clone());
80        }
81        self.variables.insert(name, info);
82    }
83
84    /// Looks up a variable by name.
85    #[must_use]
86    pub fn get(&self, name: &str) -> Option<&VariableInfo> {
87        self.variables.get(name)
88    }
89
90    /// Checks if a variable is defined.
91    #[must_use]
92    pub fn contains(&self, name: &str) -> bool {
93        self.variables.contains_key(name)
94    }
95
96    /// Returns all variable names in definition order.
97    #[must_use]
98    pub fn variable_names(&self) -> &[String] {
99        &self.order
100    }
101
102    /// Returns the number of bound variables.
103    #[must_use]
104    pub fn len(&self) -> usize {
105        self.variables.len()
106    }
107
108    /// Returns true if no variables are bound.
109    #[must_use]
110    pub fn is_empty(&self) -> bool {
111        self.variables.is_empty()
112    }
113}
114
115/// Semantic binder for query plans.
116///
117/// The binder walks the logical plan and:
118/// 1. Collects all variable definitions
119/// 2. Validates that all variable references are valid
120/// 3. Infers types where possible
121/// 4. Reports semantic errors
122pub struct Binder {
123    /// The current binding context.
124    context: BindingContext,
125}
126
127impl Binder {
128    /// Creates a new binder.
129    #[must_use]
130    pub fn new() -> Self {
131        Self {
132            context: BindingContext::new(),
133        }
134    }
135
136    /// Binds a logical plan, returning the binding context.
137    ///
138    /// # Errors
139    ///
140    /// Returns an error if semantic validation fails.
141    pub fn bind(&mut self, plan: &LogicalPlan) -> Result<BindingContext> {
142        self.bind_operator(&plan.root)?;
143        Ok(self.context.clone())
144    }
145
146    /// Binds a single logical operator.
147    fn bind_operator(&mut self, op: &LogicalOperator) -> Result<()> {
148        match op {
149            LogicalOperator::NodeScan(scan) => self.bind_node_scan(scan),
150            LogicalOperator::Expand(expand) => self.bind_expand(expand),
151            LogicalOperator::Filter(filter) => self.bind_filter(filter),
152            LogicalOperator::Return(ret) => self.bind_return(ret),
153            LogicalOperator::Project(project) => {
154                self.bind_operator(&project.input)?;
155                for projection in &project.projections {
156                    self.validate_expression(&projection.expression)?;
157                    // Add the projection alias to the context (for WITH clause support)
158                    if let Some(ref alias) = projection.alias {
159                        // Determine the type from the expression
160                        let data_type = self.infer_expression_type(&projection.expression);
161                        self.context.add_variable(
162                            alias.clone(),
163                            VariableInfo {
164                                name: alias.clone(),
165                                data_type,
166                                is_node: false,
167                                is_edge: false,
168                            },
169                        );
170                    }
171                }
172                Ok(())
173            }
174            LogicalOperator::Limit(limit) => self.bind_operator(&limit.input),
175            LogicalOperator::Skip(skip) => self.bind_operator(&skip.input),
176            LogicalOperator::Sort(sort) => {
177                self.bind_operator(&sort.input)?;
178                for key in &sort.keys {
179                    self.validate_expression(&key.expression)?;
180                }
181                Ok(())
182            }
183            LogicalOperator::CreateNode(create) => {
184                // CreateNode introduces a new variable
185                if let Some(ref input) = create.input {
186                    self.bind_operator(input)?;
187                }
188                self.context.add_variable(
189                    create.variable.clone(),
190                    VariableInfo {
191                        name: create.variable.clone(),
192                        data_type: LogicalType::Node,
193                        is_node: true,
194                        is_edge: false,
195                    },
196                );
197                // Validate property expressions
198                for (_, expr) in &create.properties {
199                    self.validate_expression(expr)?;
200                }
201                Ok(())
202            }
203            LogicalOperator::EdgeScan(scan) => {
204                if let Some(ref input) = scan.input {
205                    self.bind_operator(input)?;
206                }
207                self.context.add_variable(
208                    scan.variable.clone(),
209                    VariableInfo {
210                        name: scan.variable.clone(),
211                        data_type: LogicalType::Edge,
212                        is_node: false,
213                        is_edge: true,
214                    },
215                );
216                Ok(())
217            }
218            LogicalOperator::Distinct(distinct) => self.bind_operator(&distinct.input),
219            LogicalOperator::Join(join) => self.bind_join(join),
220            LogicalOperator::Aggregate(agg) => self.bind_aggregate(agg),
221            LogicalOperator::CreateEdge(create) => {
222                self.bind_operator(&create.input)?;
223                // Validate that source and target variables are defined
224                if !self.context.contains(&create.from_variable) {
225                    return Err(undefined_variable_error(
226                        &create.from_variable,
227                        &self.context,
228                        " (source in CREATE EDGE)",
229                    ));
230                }
231                if !self.context.contains(&create.to_variable) {
232                    return Err(undefined_variable_error(
233                        &create.to_variable,
234                        &self.context,
235                        " (target in CREATE EDGE)",
236                    ));
237                }
238                // Add edge variable if present
239                if let Some(ref var) = create.variable {
240                    self.context.add_variable(
241                        var.clone(),
242                        VariableInfo {
243                            name: var.clone(),
244                            data_type: LogicalType::Edge,
245                            is_node: false,
246                            is_edge: true,
247                        },
248                    );
249                }
250                // Validate property expressions
251                for (_, expr) in &create.properties {
252                    self.validate_expression(expr)?;
253                }
254                Ok(())
255            }
256            LogicalOperator::DeleteNode(delete) => {
257                self.bind_operator(&delete.input)?;
258                // Validate that the variable to delete is defined
259                if !self.context.contains(&delete.variable) {
260                    return Err(undefined_variable_error(
261                        &delete.variable,
262                        &self.context,
263                        " in DELETE",
264                    ));
265                }
266                Ok(())
267            }
268            LogicalOperator::DeleteEdge(delete) => {
269                self.bind_operator(&delete.input)?;
270                // Validate that the variable to delete is defined
271                if !self.context.contains(&delete.variable) {
272                    return Err(undefined_variable_error(
273                        &delete.variable,
274                        &self.context,
275                        " in DELETE",
276                    ));
277                }
278                Ok(())
279            }
280            LogicalOperator::SetProperty(set) => {
281                self.bind_operator(&set.input)?;
282                // Validate that the variable to update is defined
283                if !self.context.contains(&set.variable) {
284                    return Err(undefined_variable_error(
285                        &set.variable,
286                        &self.context,
287                        " in SET",
288                    ));
289                }
290                // Validate property value expressions
291                for (_, expr) in &set.properties {
292                    self.validate_expression(expr)?;
293                }
294                Ok(())
295            }
296            LogicalOperator::Empty => Ok(()),
297
298            LogicalOperator::Unwind(unwind) => {
299                // First bind the input
300                self.bind_operator(&unwind.input)?;
301                // Validate the expression being unwound
302                self.validate_expression(&unwind.expression)?;
303                // Add the new variable to the context
304                self.context.add_variable(
305                    unwind.variable.clone(),
306                    VariableInfo {
307                        name: unwind.variable.clone(),
308                        data_type: LogicalType::Any, // Unwound elements can be any type
309                        is_node: false,
310                        is_edge: false,
311                    },
312                );
313                Ok(())
314            }
315
316            // RDF/SPARQL operators
317            LogicalOperator::TripleScan(scan) => self.bind_triple_scan(scan),
318            LogicalOperator::Union(union) => {
319                for input in &union.inputs {
320                    self.bind_operator(input)?;
321                }
322                Ok(())
323            }
324            LogicalOperator::LeftJoin(lj) => {
325                self.bind_operator(&lj.left)?;
326                self.bind_operator(&lj.right)?;
327                if let Some(ref cond) = lj.condition {
328                    self.validate_expression(cond)?;
329                }
330                Ok(())
331            }
332            LogicalOperator::AntiJoin(aj) => {
333                self.bind_operator(&aj.left)?;
334                self.bind_operator(&aj.right)?;
335                Ok(())
336            }
337            LogicalOperator::Bind(bind) => {
338                self.bind_operator(&bind.input)?;
339                self.validate_expression(&bind.expression)?;
340                self.context.add_variable(
341                    bind.variable.clone(),
342                    VariableInfo {
343                        name: bind.variable.clone(),
344                        data_type: LogicalType::Any,
345                        is_node: false,
346                        is_edge: false,
347                    },
348                );
349                Ok(())
350            }
351            LogicalOperator::Merge(merge) => {
352                // First bind the input
353                self.bind_operator(&merge.input)?;
354                // Validate the match property expressions
355                for (_, expr) in &merge.match_properties {
356                    self.validate_expression(expr)?;
357                }
358                // Validate the ON CREATE property expressions
359                for (_, expr) in &merge.on_create {
360                    self.validate_expression(expr)?;
361                }
362                // Validate the ON MATCH property expressions
363                for (_, expr) in &merge.on_match {
364                    self.validate_expression(expr)?;
365                }
366                // MERGE introduces a new variable
367                self.context.add_variable(
368                    merge.variable.clone(),
369                    VariableInfo {
370                        name: merge.variable.clone(),
371                        data_type: LogicalType::Node,
372                        is_node: true,
373                        is_edge: false,
374                    },
375                );
376                Ok(())
377            }
378            LogicalOperator::AddLabel(add_label) => {
379                self.bind_operator(&add_label.input)?;
380                // Validate that the variable exists
381                if !self.context.contains(&add_label.variable) {
382                    return Err(undefined_variable_error(
383                        &add_label.variable,
384                        &self.context,
385                        " in SET labels",
386                    ));
387                }
388                Ok(())
389            }
390            LogicalOperator::RemoveLabel(remove_label) => {
391                self.bind_operator(&remove_label.input)?;
392                // Validate that the variable exists
393                if !self.context.contains(&remove_label.variable) {
394                    return Err(undefined_variable_error(
395                        &remove_label.variable,
396                        &self.context,
397                        " in REMOVE labels",
398                    ));
399                }
400                Ok(())
401            }
402            LogicalOperator::ShortestPath(sp) => {
403                // First bind the input
404                self.bind_operator(&sp.input)?;
405                // Validate that source and target variables are defined
406                if !self.context.contains(&sp.source_var) {
407                    return Err(undefined_variable_error(
408                        &sp.source_var,
409                        &self.context,
410                        " (source in shortestPath)",
411                    ));
412                }
413                if !self.context.contains(&sp.target_var) {
414                    return Err(undefined_variable_error(
415                        &sp.target_var,
416                        &self.context,
417                        " (target in shortestPath)",
418                    ));
419                }
420                // Add the path alias variable to the context
421                self.context.add_variable(
422                    sp.path_alias.clone(),
423                    VariableInfo {
424                        name: sp.path_alias.clone(),
425                        data_type: LogicalType::Any, // Path is a complex type
426                        is_node: false,
427                        is_edge: false,
428                    },
429                );
430                // Also add the path length variable for length(p) calls
431                let path_length_var = format!("_path_length_{}", sp.path_alias);
432                self.context.add_variable(
433                    path_length_var.clone(),
434                    VariableInfo {
435                        name: path_length_var,
436                        data_type: LogicalType::Int64,
437                        is_node: false,
438                        is_edge: false,
439                    },
440                );
441                Ok(())
442            }
443            // SPARQL Update operators - these don't require variable binding
444            LogicalOperator::InsertTriple(insert) => {
445                if let Some(ref input) = insert.input {
446                    self.bind_operator(input)?;
447                }
448                Ok(())
449            }
450            LogicalOperator::DeleteTriple(delete) => {
451                if let Some(ref input) = delete.input {
452                    self.bind_operator(input)?;
453                }
454                Ok(())
455            }
456            LogicalOperator::Modify(modify) => {
457                self.bind_operator(&modify.where_clause)?;
458                Ok(())
459            }
460            LogicalOperator::ClearGraph(_)
461            | LogicalOperator::CreateGraph(_)
462            | LogicalOperator::DropGraph(_)
463            | LogicalOperator::LoadGraph(_)
464            | LogicalOperator::CopyGraph(_)
465            | LogicalOperator::MoveGraph(_)
466            | LogicalOperator::AddGraph(_) => Ok(()),
467            LogicalOperator::VectorScan(scan) => {
468                // VectorScan introduces a variable for matched nodes
469                if let Some(ref input) = scan.input {
470                    self.bind_operator(input)?;
471                }
472                self.context.add_variable(
473                    scan.variable.clone(),
474                    VariableInfo {
475                        name: scan.variable.clone(),
476                        data_type: LogicalType::Node,
477                        is_node: true,
478                        is_edge: false,
479                    },
480                );
481                // Validate the query vector expression
482                self.validate_expression(&scan.query_vector)?;
483                Ok(())
484            }
485            LogicalOperator::VectorJoin(join) => {
486                // VectorJoin takes input from left side and produces right-side matches
487                self.bind_operator(&join.input)?;
488                // Add right variable for matched nodes
489                self.context.add_variable(
490                    join.right_variable.clone(),
491                    VariableInfo {
492                        name: join.right_variable.clone(),
493                        data_type: LogicalType::Node,
494                        is_node: true,
495                        is_edge: false,
496                    },
497                );
498                // Optionally add score variable
499                if let Some(ref score_var) = join.score_variable {
500                    self.context.add_variable(
501                        score_var.clone(),
502                        VariableInfo {
503                            name: score_var.clone(),
504                            data_type: LogicalType::Float64,
505                            is_node: false,
506                            is_edge: false,
507                        },
508                    );
509                }
510                // Validate the query vector expression
511                self.validate_expression(&join.query_vector)?;
512                Ok(())
513            }
514            // DDL operators don't need binding — they're handled before the binder
515            LogicalOperator::CreatePropertyGraph(_) => Ok(()),
516        }
517    }
518
519    /// Binds a triple scan operator (for RDF/SPARQL).
520    fn bind_triple_scan(&mut self, scan: &TripleScanOp) -> Result<()> {
521        use crate::query::plan::TripleComponent;
522
523        // First bind the input if present
524        if let Some(ref input) = scan.input {
525            self.bind_operator(input)?;
526        }
527
528        // Add variables for subject, predicate, object
529        if let TripleComponent::Variable(name) = &scan.subject
530            && !self.context.contains(name)
531        {
532            self.context.add_variable(
533                name.clone(),
534                VariableInfo {
535                    name: name.clone(),
536                    data_type: LogicalType::Any, // RDF term
537                    is_node: false,
538                    is_edge: false,
539                },
540            );
541        }
542
543        if let TripleComponent::Variable(name) = &scan.predicate
544            && !self.context.contains(name)
545        {
546            self.context.add_variable(
547                name.clone(),
548                VariableInfo {
549                    name: name.clone(),
550                    data_type: LogicalType::Any, // IRI
551                    is_node: false,
552                    is_edge: false,
553                },
554            );
555        }
556
557        if let TripleComponent::Variable(name) = &scan.object
558            && !self.context.contains(name)
559        {
560            self.context.add_variable(
561                name.clone(),
562                VariableInfo {
563                    name: name.clone(),
564                    data_type: LogicalType::Any, // RDF term
565                    is_node: false,
566                    is_edge: false,
567                },
568            );
569        }
570
571        if let Some(TripleComponent::Variable(name)) = &scan.graph
572            && !self.context.contains(name)
573        {
574            self.context.add_variable(
575                name.clone(),
576                VariableInfo {
577                    name: name.clone(),
578                    data_type: LogicalType::Any, // IRI
579                    is_node: false,
580                    is_edge: false,
581                },
582            );
583        }
584
585        Ok(())
586    }
587
588    /// Binds a node scan operator.
589    fn bind_node_scan(&mut self, scan: &NodeScanOp) -> Result<()> {
590        // First bind the input if present
591        if let Some(ref input) = scan.input {
592            self.bind_operator(input)?;
593        }
594
595        // Add the scanned variable to scope
596        self.context.add_variable(
597            scan.variable.clone(),
598            VariableInfo {
599                name: scan.variable.clone(),
600                data_type: LogicalType::Node,
601                is_node: true,
602                is_edge: false,
603            },
604        );
605
606        Ok(())
607    }
608
609    /// Binds an expand operator.
610    fn bind_expand(&mut self, expand: &ExpandOp) -> Result<()> {
611        // First bind the input
612        self.bind_operator(&expand.input)?;
613
614        // Validate that the source variable is defined
615        if !self.context.contains(&expand.from_variable) {
616            return Err(undefined_variable_error(
617                &expand.from_variable,
618                &self.context,
619                " in EXPAND",
620            ));
621        }
622
623        // Validate that the source is a node
624        if let Some(info) = self.context.get(&expand.from_variable)
625            && !info.is_node
626        {
627            return Err(binding_error(format!(
628                "Variable '{}' is not a node, cannot expand from it",
629                expand.from_variable
630            )));
631        }
632
633        // Add edge variable if present
634        if let Some(ref edge_var) = expand.edge_variable {
635            self.context.add_variable(
636                edge_var.clone(),
637                VariableInfo {
638                    name: edge_var.clone(),
639                    data_type: LogicalType::Edge,
640                    is_node: false,
641                    is_edge: true,
642                },
643            );
644        }
645
646        // Add target variable
647        self.context.add_variable(
648            expand.to_variable.clone(),
649            VariableInfo {
650                name: expand.to_variable.clone(),
651                data_type: LogicalType::Node,
652                is_node: true,
653                is_edge: false,
654            },
655        );
656
657        // Add path variables for variable-length paths
658        if let Some(ref path_alias) = expand.path_alias {
659            // length(p) → _path_length_p
660            let path_length_var = format!("_path_length_{}", path_alias);
661            self.context.add_variable(
662                path_length_var.clone(),
663                VariableInfo {
664                    name: path_length_var,
665                    data_type: LogicalType::Int64,
666                    is_node: false,
667                    is_edge: false,
668                },
669            );
670            // nodes(p) → _path_nodes_p
671            let path_nodes_var = format!("_path_nodes_{}", path_alias);
672            self.context.add_variable(
673                path_nodes_var.clone(),
674                VariableInfo {
675                    name: path_nodes_var,
676                    data_type: LogicalType::Any,
677                    is_node: false,
678                    is_edge: false,
679                },
680            );
681            // edges(p) → _path_edges_p
682            let path_edges_var = format!("_path_edges_{}", path_alias);
683            self.context.add_variable(
684                path_edges_var.clone(),
685                VariableInfo {
686                    name: path_edges_var,
687                    data_type: LogicalType::Any,
688                    is_node: false,
689                    is_edge: false,
690                },
691            );
692        }
693
694        Ok(())
695    }
696
697    /// Binds a filter operator.
698    fn bind_filter(&mut self, filter: &FilterOp) -> Result<()> {
699        // First bind the input
700        self.bind_operator(&filter.input)?;
701
702        // Validate the predicate expression
703        self.validate_expression(&filter.predicate)?;
704
705        Ok(())
706    }
707
708    /// Binds a return operator.
709    fn bind_return(&mut self, ret: &ReturnOp) -> Result<()> {
710        // First bind the input
711        self.bind_operator(&ret.input)?;
712
713        // Validate all return expressions
714        for item in &ret.items {
715            self.validate_return_item(item)?;
716        }
717
718        Ok(())
719    }
720
721    /// Validates a return item.
722    fn validate_return_item(&self, item: &ReturnItem) -> Result<()> {
723        self.validate_expression(&item.expression)
724    }
725
726    /// Validates that an expression only references defined variables.
727    fn validate_expression(&self, expr: &LogicalExpression) -> Result<()> {
728        match expr {
729            LogicalExpression::Variable(name) => {
730                if !self.context.contains(name) && !name.starts_with("_anon_") {
731                    return Err(undefined_variable_error(name, &self.context, ""));
732                }
733                Ok(())
734            }
735            LogicalExpression::Property { variable, .. } => {
736                if !self.context.contains(variable) && !variable.starts_with("_anon_") {
737                    return Err(undefined_variable_error(
738                        variable,
739                        &self.context,
740                        " in property access",
741                    ));
742                }
743                Ok(())
744            }
745            LogicalExpression::Literal(_) => Ok(()),
746            LogicalExpression::Binary { left, right, .. } => {
747                self.validate_expression(left)?;
748                self.validate_expression(right)
749            }
750            LogicalExpression::Unary { operand, .. } => self.validate_expression(operand),
751            LogicalExpression::FunctionCall { args, .. } => {
752                for arg in args {
753                    self.validate_expression(arg)?;
754                }
755                Ok(())
756            }
757            LogicalExpression::List(items) => {
758                for item in items {
759                    self.validate_expression(item)?;
760                }
761                Ok(())
762            }
763            LogicalExpression::Map(pairs) => {
764                for (_, value) in pairs {
765                    self.validate_expression(value)?;
766                }
767                Ok(())
768            }
769            LogicalExpression::IndexAccess { base, index } => {
770                self.validate_expression(base)?;
771                self.validate_expression(index)
772            }
773            LogicalExpression::SliceAccess { base, start, end } => {
774                self.validate_expression(base)?;
775                if let Some(s) = start {
776                    self.validate_expression(s)?;
777                }
778                if let Some(e) = end {
779                    self.validate_expression(e)?;
780                }
781                Ok(())
782            }
783            LogicalExpression::Case {
784                operand,
785                when_clauses,
786                else_clause,
787            } => {
788                if let Some(op) = operand {
789                    self.validate_expression(op)?;
790                }
791                for (cond, result) in when_clauses {
792                    self.validate_expression(cond)?;
793                    self.validate_expression(result)?;
794                }
795                if let Some(else_expr) = else_clause {
796                    self.validate_expression(else_expr)?;
797                }
798                Ok(())
799            }
800            // Parameter references are validated externally
801            LogicalExpression::Parameter(_) => Ok(()),
802            // labels(n), type(e), id(n) need the variable to be defined
803            LogicalExpression::Labels(var)
804            | LogicalExpression::Type(var)
805            | LogicalExpression::Id(var) => {
806                if !self.context.contains(var) && !var.starts_with("_anon_") {
807                    return Err(undefined_variable_error(var, &self.context, " in function"));
808                }
809                Ok(())
810            }
811            LogicalExpression::ListComprehension {
812                list_expr,
813                filter_expr,
814                map_expr,
815                ..
816            } => {
817                // Validate the list expression
818                self.validate_expression(list_expr)?;
819                // Note: filter_expr and map_expr use the comprehension variable
820                // which is defined within the comprehension scope, so we don't
821                // need to validate it against the outer context
822                if let Some(filter) = filter_expr {
823                    self.validate_expression(filter)?;
824                }
825                self.validate_expression(map_expr)?;
826                Ok(())
827            }
828            LogicalExpression::ExistsSubquery(subquery)
829            | LogicalExpression::CountSubquery(subquery) => {
830                // Subqueries have their own binding context
831                // For now, just validate the structure exists
832                let _ = subquery; // Would need recursive binding
833                Ok(())
834            }
835        }
836    }
837
838    /// Infers the type of an expression for use in WITH clause aliasing.
839    fn infer_expression_type(&self, expr: &LogicalExpression) -> LogicalType {
840        match expr {
841            LogicalExpression::Variable(name) => {
842                // Look up the variable type from context
843                self.context
844                    .get(name)
845                    .map_or(LogicalType::Any, |info| info.data_type.clone())
846            }
847            LogicalExpression::Property { .. } => LogicalType::Any, // Properties can be any type
848            LogicalExpression::Literal(value) => {
849                // Infer type from literal value
850                use grafeo_common::types::Value;
851                match value {
852                    Value::Bool(_) => LogicalType::Bool,
853                    Value::Int64(_) => LogicalType::Int64,
854                    Value::Float64(_) => LogicalType::Float64,
855                    Value::String(_) => LogicalType::String,
856                    Value::List(_) => LogicalType::Any, // Complex type
857                    Value::Map(_) => LogicalType::Any,  // Complex type
858                    Value::Null => LogicalType::Any,
859                    _ => LogicalType::Any,
860                }
861            }
862            LogicalExpression::Binary { .. } => LogicalType::Any, // Could be bool or numeric
863            LogicalExpression::Unary { .. } => LogicalType::Any,
864            LogicalExpression::FunctionCall { name, .. } => {
865                // Infer based on function name
866                match name.to_lowercase().as_str() {
867                    "count" | "sum" | "id" => LogicalType::Int64,
868                    "avg" => LogicalType::Float64,
869                    "type" => LogicalType::String,
870                    // List-returning functions use Any since we don't track element type
871                    "labels" | "collect" => LogicalType::Any,
872                    _ => LogicalType::Any,
873                }
874            }
875            LogicalExpression::List(_) => LogicalType::Any, // Complex type
876            LogicalExpression::Map(_) => LogicalType::Any,  // Complex type
877            _ => LogicalType::Any,
878        }
879    }
880
881    /// Binds a join operator.
882    fn bind_join(&mut self, join: &crate::query::plan::JoinOp) -> Result<()> {
883        // Bind both sides of the join
884        self.bind_operator(&join.left)?;
885        self.bind_operator(&join.right)?;
886
887        // Validate join conditions
888        for condition in &join.conditions {
889            self.validate_expression(&condition.left)?;
890            self.validate_expression(&condition.right)?;
891        }
892
893        Ok(())
894    }
895
896    /// Binds an aggregate operator.
897    fn bind_aggregate(&mut self, agg: &crate::query::plan::AggregateOp) -> Result<()> {
898        // Bind the input first
899        self.bind_operator(&agg.input)?;
900
901        // Validate group by expressions
902        for expr in &agg.group_by {
903            self.validate_expression(expr)?;
904        }
905
906        // Validate aggregate expressions
907        for agg_expr in &agg.aggregates {
908            if let Some(ref expr) = agg_expr.expression {
909                self.validate_expression(expr)?;
910            }
911            // Add the alias as a new variable if present
912            if let Some(ref alias) = agg_expr.alias {
913                self.context.add_variable(
914                    alias.clone(),
915                    VariableInfo {
916                        name: alias.clone(),
917                        data_type: LogicalType::Any,
918                        is_node: false,
919                        is_edge: false,
920                    },
921                );
922            }
923        }
924
925        Ok(())
926    }
927}
928
929impl Default for Binder {
930    fn default() -> Self {
931        Self::new()
932    }
933}
934
935#[cfg(test)]
936mod tests {
937    use super::*;
938    use crate::query::plan::{BinaryOp, FilterOp};
939
940    #[test]
941    fn test_bind_simple_scan() {
942        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
943            items: vec![ReturnItem {
944                expression: LogicalExpression::Variable("n".to_string()),
945                alias: None,
946            }],
947            distinct: false,
948            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
949                variable: "n".to_string(),
950                label: Some("Person".to_string()),
951                input: None,
952            })),
953        }));
954
955        let mut binder = Binder::new();
956        let result = binder.bind(&plan);
957
958        assert!(result.is_ok());
959        let ctx = result.unwrap();
960        assert!(ctx.contains("n"));
961        assert!(ctx.get("n").unwrap().is_node);
962    }
963
964    #[test]
965    fn test_bind_undefined_variable() {
966        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
967            items: vec![ReturnItem {
968                expression: LogicalExpression::Variable("undefined".to_string()),
969                alias: None,
970            }],
971            distinct: false,
972            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
973                variable: "n".to_string(),
974                label: None,
975                input: None,
976            })),
977        }));
978
979        let mut binder = Binder::new();
980        let result = binder.bind(&plan);
981
982        assert!(result.is_err());
983        let err = result.unwrap_err();
984        assert!(err.to_string().contains("Undefined variable"));
985    }
986
987    #[test]
988    fn test_bind_property_access() {
989        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
990            items: vec![ReturnItem {
991                expression: LogicalExpression::Property {
992                    variable: "n".to_string(),
993                    property: "name".to_string(),
994                },
995                alias: None,
996            }],
997            distinct: false,
998            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
999                variable: "n".to_string(),
1000                label: Some("Person".to_string()),
1001                input: None,
1002            })),
1003        }));
1004
1005        let mut binder = Binder::new();
1006        let result = binder.bind(&plan);
1007
1008        assert!(result.is_ok());
1009    }
1010
1011    #[test]
1012    fn test_bind_filter_with_undefined_variable() {
1013        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1014            items: vec![ReturnItem {
1015                expression: LogicalExpression::Variable("n".to_string()),
1016                alias: None,
1017            }],
1018            distinct: false,
1019            input: Box::new(LogicalOperator::Filter(FilterOp {
1020                predicate: LogicalExpression::Binary {
1021                    left: Box::new(LogicalExpression::Property {
1022                        variable: "m".to_string(), // undefined!
1023                        property: "age".to_string(),
1024                    }),
1025                    op: BinaryOp::Gt,
1026                    right: Box::new(LogicalExpression::Literal(
1027                        grafeo_common::types::Value::Int64(30),
1028                    )),
1029                },
1030                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1031                    variable: "n".to_string(),
1032                    label: None,
1033                    input: None,
1034                })),
1035            })),
1036        }));
1037
1038        let mut binder = Binder::new();
1039        let result = binder.bind(&plan);
1040
1041        assert!(result.is_err());
1042        let err = result.unwrap_err();
1043        assert!(err.to_string().contains("Undefined variable 'm'"));
1044    }
1045
1046    #[test]
1047    fn test_bind_expand() {
1048        use crate::query::plan::{ExpandDirection, ExpandOp};
1049
1050        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1051            items: vec![
1052                ReturnItem {
1053                    expression: LogicalExpression::Variable("a".to_string()),
1054                    alias: None,
1055                },
1056                ReturnItem {
1057                    expression: LogicalExpression::Variable("b".to_string()),
1058                    alias: None,
1059                },
1060            ],
1061            distinct: false,
1062            input: Box::new(LogicalOperator::Expand(ExpandOp {
1063                from_variable: "a".to_string(),
1064                to_variable: "b".to_string(),
1065                edge_variable: Some("e".to_string()),
1066                direction: ExpandDirection::Outgoing,
1067                edge_type: Some("KNOWS".to_string()),
1068                min_hops: 1,
1069                max_hops: Some(1),
1070                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1071                    variable: "a".to_string(),
1072                    label: Some("Person".to_string()),
1073                    input: None,
1074                })),
1075                path_alias: None,
1076            })),
1077        }));
1078
1079        let mut binder = Binder::new();
1080        let result = binder.bind(&plan);
1081
1082        assert!(result.is_ok());
1083        let ctx = result.unwrap();
1084        assert!(ctx.contains("a"));
1085        assert!(ctx.contains("b"));
1086        assert!(ctx.contains("e"));
1087        assert!(ctx.get("a").unwrap().is_node);
1088        assert!(ctx.get("b").unwrap().is_node);
1089        assert!(ctx.get("e").unwrap().is_edge);
1090    }
1091
1092    #[test]
1093    fn test_bind_expand_from_undefined_variable() {
1094        // Tests that expanding from an undefined variable produces a clear error
1095        use crate::query::plan::{ExpandDirection, ExpandOp};
1096
1097        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1098            items: vec![ReturnItem {
1099                expression: LogicalExpression::Variable("b".to_string()),
1100                alias: None,
1101            }],
1102            distinct: false,
1103            input: Box::new(LogicalOperator::Expand(ExpandOp {
1104                from_variable: "undefined".to_string(), // not defined!
1105                to_variable: "b".to_string(),
1106                edge_variable: None,
1107                direction: ExpandDirection::Outgoing,
1108                edge_type: None,
1109                min_hops: 1,
1110                max_hops: Some(1),
1111                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1112                    variable: "a".to_string(),
1113                    label: None,
1114                    input: None,
1115                })),
1116                path_alias: None,
1117            })),
1118        }));
1119
1120        let mut binder = Binder::new();
1121        let result = binder.bind(&plan);
1122
1123        assert!(result.is_err());
1124        let err = result.unwrap_err();
1125        assert!(
1126            err.to_string().contains("Undefined variable 'undefined'"),
1127            "Expected error about undefined variable, got: {}",
1128            err
1129        );
1130    }
1131
1132    #[test]
1133    fn test_bind_return_with_aggregate_and_non_aggregate() {
1134        // Tests binding of aggregate functions alongside regular expressions
1135        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1136            items: vec![
1137                ReturnItem {
1138                    expression: LogicalExpression::FunctionCall {
1139                        name: "count".to_string(),
1140                        args: vec![LogicalExpression::Variable("n".to_string())],
1141                        distinct: false,
1142                    },
1143                    alias: Some("cnt".to_string()),
1144                },
1145                ReturnItem {
1146                    expression: LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1147                    alias: Some("one".to_string()),
1148                },
1149            ],
1150            distinct: false,
1151            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1152                variable: "n".to_string(),
1153                label: Some("Person".to_string()),
1154                input: None,
1155            })),
1156        }));
1157
1158        let mut binder = Binder::new();
1159        let result = binder.bind(&plan);
1160
1161        // This should succeed - count(n) with literal is valid
1162        assert!(result.is_ok());
1163    }
1164
1165    #[test]
1166    fn test_bind_nested_property_access() {
1167        // Tests that nested property access on the same variable works
1168        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1169            items: vec![
1170                ReturnItem {
1171                    expression: LogicalExpression::Property {
1172                        variable: "n".to_string(),
1173                        property: "name".to_string(),
1174                    },
1175                    alias: None,
1176                },
1177                ReturnItem {
1178                    expression: LogicalExpression::Property {
1179                        variable: "n".to_string(),
1180                        property: "age".to_string(),
1181                    },
1182                    alias: None,
1183                },
1184            ],
1185            distinct: false,
1186            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1187                variable: "n".to_string(),
1188                label: Some("Person".to_string()),
1189                input: None,
1190            })),
1191        }));
1192
1193        let mut binder = Binder::new();
1194        let result = binder.bind(&plan);
1195
1196        assert!(result.is_ok());
1197    }
1198
1199    #[test]
1200    fn test_bind_binary_expression_with_undefined() {
1201        // Tests that binary expressions with undefined variables produce errors
1202        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1203            items: vec![ReturnItem {
1204                expression: LogicalExpression::Binary {
1205                    left: Box::new(LogicalExpression::Property {
1206                        variable: "n".to_string(),
1207                        property: "age".to_string(),
1208                    }),
1209                    op: BinaryOp::Add,
1210                    right: Box::new(LogicalExpression::Property {
1211                        variable: "m".to_string(), // undefined!
1212                        property: "age".to_string(),
1213                    }),
1214                },
1215                alias: Some("total".to_string()),
1216            }],
1217            distinct: false,
1218            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1219                variable: "n".to_string(),
1220                label: None,
1221                input: None,
1222            })),
1223        }));
1224
1225        let mut binder = Binder::new();
1226        let result = binder.bind(&plan);
1227
1228        assert!(result.is_err());
1229        assert!(
1230            result
1231                .unwrap_err()
1232                .to_string()
1233                .contains("Undefined variable 'm'")
1234        );
1235    }
1236
1237    #[test]
1238    fn test_bind_duplicate_variable_definition() {
1239        // Tests behavior when the same variable is defined twice (via two NodeScans)
1240        // This is typically not allowed or the second shadows the first
1241        use crate::query::plan::{JoinOp, JoinType};
1242
1243        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1244            items: vec![ReturnItem {
1245                expression: LogicalExpression::Variable("n".to_string()),
1246                alias: None,
1247            }],
1248            distinct: false,
1249            input: Box::new(LogicalOperator::Join(JoinOp {
1250                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1251                    variable: "n".to_string(),
1252                    label: Some("A".to_string()),
1253                    input: None,
1254                })),
1255                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1256                    variable: "m".to_string(), // different variable is fine
1257                    label: Some("B".to_string()),
1258                    input: None,
1259                })),
1260                join_type: JoinType::Inner,
1261                conditions: vec![],
1262            })),
1263        }));
1264
1265        let mut binder = Binder::new();
1266        let result = binder.bind(&plan);
1267
1268        // Join with different variables should work
1269        assert!(result.is_ok());
1270        let ctx = result.unwrap();
1271        assert!(ctx.contains("n"));
1272        assert!(ctx.contains("m"));
1273    }
1274
1275    #[test]
1276    fn test_bind_function_with_wrong_arity() {
1277        // Tests that functions with wrong number of arguments are handled
1278        // (behavior depends on whether binder validates arity)
1279        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1280            items: vec![ReturnItem {
1281                expression: LogicalExpression::FunctionCall {
1282                    name: "count".to_string(),
1283                    args: vec![], // count() needs an argument
1284                    distinct: false,
1285                },
1286                alias: None,
1287            }],
1288            distinct: false,
1289            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1290                variable: "n".to_string(),
1291                label: None,
1292                input: None,
1293            })),
1294        }));
1295
1296        let mut binder = Binder::new();
1297        let result = binder.bind(&plan);
1298
1299        // The binder may or may not catch this - if it passes, execution will fail
1300        // This test documents current behavior
1301        // If binding fails, that's fine; if it passes, execution will handle it
1302        let _ = result; // We're just testing it doesn't panic
1303    }
1304
1305    // --- Mutation operator validation ---
1306
1307    #[test]
1308    fn test_create_edge_rejects_undefined_source() {
1309        use crate::query::plan::CreateEdgeOp;
1310
1311        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1312            variable: Some("e".to_string()),
1313            from_variable: "ghost".to_string(), // not defined!
1314            to_variable: "b".to_string(),
1315            edge_type: "KNOWS".to_string(),
1316            properties: vec![],
1317            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1318                variable: "b".to_string(),
1319                label: None,
1320                input: None,
1321            })),
1322        }));
1323
1324        let mut binder = Binder::new();
1325        let err = binder.bind(&plan).unwrap_err();
1326        assert!(
1327            err.to_string().contains("Undefined variable 'ghost'"),
1328            "Should reject undefined source variable, got: {err}"
1329        );
1330    }
1331
1332    #[test]
1333    fn test_create_edge_rejects_undefined_target() {
1334        use crate::query::plan::CreateEdgeOp;
1335
1336        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1337            variable: None,
1338            from_variable: "a".to_string(),
1339            to_variable: "missing".to_string(), // not defined!
1340            edge_type: "KNOWS".to_string(),
1341            properties: vec![],
1342            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1343                variable: "a".to_string(),
1344                label: None,
1345                input: None,
1346            })),
1347        }));
1348
1349        let mut binder = Binder::new();
1350        let err = binder.bind(&plan).unwrap_err();
1351        assert!(
1352            err.to_string().contains("Undefined variable 'missing'"),
1353            "Should reject undefined target variable, got: {err}"
1354        );
1355    }
1356
1357    #[test]
1358    fn test_create_edge_validates_property_expressions() {
1359        use crate::query::plan::CreateEdgeOp;
1360
1361        // Source and target defined, but property references undefined variable
1362        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1363            variable: Some("e".to_string()),
1364            from_variable: "a".to_string(),
1365            to_variable: "b".to_string(),
1366            edge_type: "KNOWS".to_string(),
1367            properties: vec![(
1368                "since".to_string(),
1369                LogicalExpression::Property {
1370                    variable: "x".to_string(), // undefined!
1371                    property: "year".to_string(),
1372                },
1373            )],
1374            input: Box::new(LogicalOperator::Join(crate::query::plan::JoinOp {
1375                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1376                    variable: "a".to_string(),
1377                    label: None,
1378                    input: None,
1379                })),
1380                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1381                    variable: "b".to_string(),
1382                    label: None,
1383                    input: None,
1384                })),
1385                join_type: crate::query::plan::JoinType::Inner,
1386                conditions: vec![],
1387            })),
1388        }));
1389
1390        let mut binder = Binder::new();
1391        let err = binder.bind(&plan).unwrap_err();
1392        assert!(err.to_string().contains("Undefined variable 'x'"));
1393    }
1394
1395    #[test]
1396    fn test_set_property_rejects_undefined_variable() {
1397        use crate::query::plan::SetPropertyOp;
1398
1399        let plan = LogicalPlan::new(LogicalOperator::SetProperty(SetPropertyOp {
1400            variable: "ghost".to_string(),
1401            properties: vec![(
1402                "name".to_string(),
1403                LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1404            )],
1405            replace: false,
1406            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1407                variable: "n".to_string(),
1408                label: None,
1409                input: None,
1410            })),
1411        }));
1412
1413        let mut binder = Binder::new();
1414        let err = binder.bind(&plan).unwrap_err();
1415        assert!(
1416            err.to_string().contains("in SET"),
1417            "Error should indicate SET context, got: {err}"
1418        );
1419    }
1420
1421    #[test]
1422    fn test_delete_node_rejects_undefined_variable() {
1423        use crate::query::plan::DeleteNodeOp;
1424
1425        let plan = LogicalPlan::new(LogicalOperator::DeleteNode(DeleteNodeOp {
1426            variable: "phantom".to_string(),
1427            detach: false,
1428            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1429                variable: "n".to_string(),
1430                label: None,
1431                input: None,
1432            })),
1433        }));
1434
1435        let mut binder = Binder::new();
1436        let err = binder.bind(&plan).unwrap_err();
1437        assert!(err.to_string().contains("Undefined variable 'phantom'"));
1438    }
1439
1440    #[test]
1441    fn test_delete_edge_rejects_undefined_variable() {
1442        use crate::query::plan::DeleteEdgeOp;
1443
1444        let plan = LogicalPlan::new(LogicalOperator::DeleteEdge(DeleteEdgeOp {
1445            variable: "gone".to_string(),
1446            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1447                variable: "n".to_string(),
1448                label: None,
1449                input: None,
1450            })),
1451        }));
1452
1453        let mut binder = Binder::new();
1454        let err = binder.bind(&plan).unwrap_err();
1455        assert!(err.to_string().contains("Undefined variable 'gone'"));
1456    }
1457
1458    // --- WITH/Project clause ---
1459
1460    #[test]
1461    fn test_project_alias_becomes_available_downstream() {
1462        use crate::query::plan::{ProjectOp, Projection};
1463
1464        // WITH n.name AS person_name RETURN person_name
1465        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1466            items: vec![ReturnItem {
1467                expression: LogicalExpression::Variable("person_name".to_string()),
1468                alias: None,
1469            }],
1470            distinct: false,
1471            input: Box::new(LogicalOperator::Project(ProjectOp {
1472                projections: vec![Projection {
1473                    expression: LogicalExpression::Property {
1474                        variable: "n".to_string(),
1475                        property: "name".to_string(),
1476                    },
1477                    alias: Some("person_name".to_string()),
1478                }],
1479                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1480                    variable: "n".to_string(),
1481                    label: None,
1482                    input: None,
1483                })),
1484            })),
1485        }));
1486
1487        let mut binder = Binder::new();
1488        let ctx = binder.bind(&plan).unwrap();
1489        assert!(
1490            ctx.contains("person_name"),
1491            "WITH alias should be available to RETURN"
1492        );
1493    }
1494
1495    #[test]
1496    fn test_project_rejects_undefined_expression() {
1497        use crate::query::plan::{ProjectOp, Projection};
1498
1499        let plan = LogicalPlan::new(LogicalOperator::Project(ProjectOp {
1500            projections: vec![Projection {
1501                expression: LogicalExpression::Variable("nope".to_string()),
1502                alias: Some("x".to_string()),
1503            }],
1504            input: Box::new(LogicalOperator::Empty),
1505        }));
1506
1507        let mut binder = Binder::new();
1508        let result = binder.bind(&plan);
1509        assert!(result.is_err(), "WITH on undefined variable should fail");
1510    }
1511
1512    // --- UNWIND ---
1513
1514    #[test]
1515    fn test_unwind_adds_element_variable() {
1516        use crate::query::plan::UnwindOp;
1517
1518        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1519            items: vec![ReturnItem {
1520                expression: LogicalExpression::Variable("item".to_string()),
1521                alias: None,
1522            }],
1523            distinct: false,
1524            input: Box::new(LogicalOperator::Unwind(UnwindOp {
1525                expression: LogicalExpression::List(vec![
1526                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1527                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(2)),
1528                ]),
1529                variable: "item".to_string(),
1530                input: Box::new(LogicalOperator::Empty),
1531            })),
1532        }));
1533
1534        let mut binder = Binder::new();
1535        let ctx = binder.bind(&plan).unwrap();
1536        assert!(ctx.contains("item"), "UNWIND variable should be in scope");
1537        let info = ctx.get("item").unwrap();
1538        assert!(
1539            !info.is_node && !info.is_edge,
1540            "UNWIND variable is not a graph element"
1541        );
1542    }
1543
1544    // --- MERGE ---
1545
1546    #[test]
1547    fn test_merge_adds_variable_and_validates_properties() {
1548        use crate::query::plan::MergeOp;
1549
1550        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1551            items: vec![ReturnItem {
1552                expression: LogicalExpression::Variable("m".to_string()),
1553                alias: None,
1554            }],
1555            distinct: false,
1556            input: Box::new(LogicalOperator::Merge(MergeOp {
1557                variable: "m".to_string(),
1558                labels: vec!["Person".to_string()],
1559                match_properties: vec![(
1560                    "name".to_string(),
1561                    LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1562                )],
1563                on_create: vec![(
1564                    "created".to_string(),
1565                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1566                )],
1567                on_match: vec![(
1568                    "updated".to_string(),
1569                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1570                )],
1571                input: Box::new(LogicalOperator::Empty),
1572            })),
1573        }));
1574
1575        let mut binder = Binder::new();
1576        let ctx = binder.bind(&plan).unwrap();
1577        assert!(ctx.contains("m"));
1578        assert!(
1579            ctx.get("m").unwrap().is_node,
1580            "MERGE variable should be a node"
1581        );
1582    }
1583
1584    #[test]
1585    fn test_merge_rejects_undefined_in_on_create() {
1586        use crate::query::plan::MergeOp;
1587
1588        let plan = LogicalPlan::new(LogicalOperator::Merge(MergeOp {
1589            variable: "m".to_string(),
1590            labels: vec![],
1591            match_properties: vec![],
1592            on_create: vec![(
1593                "name".to_string(),
1594                LogicalExpression::Property {
1595                    variable: "other".to_string(), // undefined!
1596                    property: "name".to_string(),
1597                },
1598            )],
1599            on_match: vec![],
1600            input: Box::new(LogicalOperator::Empty),
1601        }));
1602
1603        let mut binder = Binder::new();
1604        let result = binder.bind(&plan);
1605        assert!(
1606            result.is_err(),
1607            "ON CREATE referencing undefined variable should fail"
1608        );
1609    }
1610
1611    // --- ShortestPath ---
1612
1613    #[test]
1614    fn test_shortest_path_rejects_undefined_source() {
1615        use crate::query::plan::{ExpandDirection, ShortestPathOp};
1616
1617        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1618            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1619                variable: "b".to_string(),
1620                label: None,
1621                input: None,
1622            })),
1623            source_var: "missing".to_string(), // not defined
1624            target_var: "b".to_string(),
1625            edge_type: None,
1626            direction: ExpandDirection::Both,
1627            path_alias: "p".to_string(),
1628            all_paths: false,
1629        }));
1630
1631        let mut binder = Binder::new();
1632        let err = binder.bind(&plan).unwrap_err();
1633        assert!(
1634            err.to_string().contains("source in shortestPath"),
1635            "Error should mention shortestPath source context, got: {err}"
1636        );
1637    }
1638
1639    #[test]
1640    fn test_shortest_path_adds_path_and_length_variables() {
1641        use crate::query::plan::{ExpandDirection, JoinOp, JoinType, ShortestPathOp};
1642
1643        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1644            input: Box::new(LogicalOperator::Join(JoinOp {
1645                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1646                    variable: "a".to_string(),
1647                    label: None,
1648                    input: None,
1649                })),
1650                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1651                    variable: "b".to_string(),
1652                    label: None,
1653                    input: None,
1654                })),
1655                join_type: JoinType::Cross,
1656                conditions: vec![],
1657            })),
1658            source_var: "a".to_string(),
1659            target_var: "b".to_string(),
1660            edge_type: Some("ROAD".to_string()),
1661            direction: ExpandDirection::Outgoing,
1662            path_alias: "p".to_string(),
1663            all_paths: false,
1664        }));
1665
1666        let mut binder = Binder::new();
1667        let ctx = binder.bind(&plan).unwrap();
1668        assert!(ctx.contains("p"), "Path alias should be bound");
1669        assert!(
1670            ctx.contains("_path_length_p"),
1671            "Path length variable should be auto-created"
1672        );
1673    }
1674
1675    // --- Expression validation edge cases ---
1676
1677    #[test]
1678    fn test_case_expression_validates_all_branches() {
1679        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1680            items: vec![ReturnItem {
1681                expression: LogicalExpression::Case {
1682                    operand: None,
1683                    when_clauses: vec![
1684                        (
1685                            LogicalExpression::Binary {
1686                                left: Box::new(LogicalExpression::Property {
1687                                    variable: "n".to_string(),
1688                                    property: "age".to_string(),
1689                                }),
1690                                op: BinaryOp::Gt,
1691                                right: Box::new(LogicalExpression::Literal(
1692                                    grafeo_common::types::Value::Int64(18),
1693                                )),
1694                            },
1695                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1696                                "adult".into(),
1697                            )),
1698                        ),
1699                        (
1700                            // This branch references undefined variable
1701                            LogicalExpression::Property {
1702                                variable: "ghost".to_string(),
1703                                property: "flag".to_string(),
1704                            },
1705                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1706                                "flagged".into(),
1707                            )),
1708                        ),
1709                    ],
1710                    else_clause: Some(Box::new(LogicalExpression::Literal(
1711                        grafeo_common::types::Value::String("other".into()),
1712                    ))),
1713                },
1714                alias: None,
1715            }],
1716            distinct: false,
1717            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1718                variable: "n".to_string(),
1719                label: None,
1720                input: None,
1721            })),
1722        }));
1723
1724        let mut binder = Binder::new();
1725        let err = binder.bind(&plan).unwrap_err();
1726        assert!(
1727            err.to_string().contains("ghost"),
1728            "CASE should validate all when-clause conditions"
1729        );
1730    }
1731
1732    #[test]
1733    fn test_case_expression_validates_else_clause() {
1734        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1735            items: vec![ReturnItem {
1736                expression: LogicalExpression::Case {
1737                    operand: None,
1738                    when_clauses: vec![(
1739                        LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1740                        LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1741                    )],
1742                    else_clause: Some(Box::new(LogicalExpression::Property {
1743                        variable: "missing".to_string(),
1744                        property: "x".to_string(),
1745                    })),
1746                },
1747                alias: None,
1748            }],
1749            distinct: false,
1750            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1751                variable: "n".to_string(),
1752                label: None,
1753                input: None,
1754            })),
1755        }));
1756
1757        let mut binder = Binder::new();
1758        let err = binder.bind(&plan).unwrap_err();
1759        assert!(
1760            err.to_string().contains("missing"),
1761            "CASE ELSE should validate its expression too"
1762        );
1763    }
1764
1765    #[test]
1766    fn test_slice_access_validates_expressions() {
1767        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1768            items: vec![ReturnItem {
1769                expression: LogicalExpression::SliceAccess {
1770                    base: Box::new(LogicalExpression::Variable("n".to_string())),
1771                    start: Some(Box::new(LogicalExpression::Variable(
1772                        "undefined_start".to_string(),
1773                    ))),
1774                    end: None,
1775                },
1776                alias: None,
1777            }],
1778            distinct: false,
1779            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1780                variable: "n".to_string(),
1781                label: None,
1782                input: None,
1783            })),
1784        }));
1785
1786        let mut binder = Binder::new();
1787        let err = binder.bind(&plan).unwrap_err();
1788        assert!(err.to_string().contains("undefined_start"));
1789    }
1790
1791    #[test]
1792    fn test_list_comprehension_validates_list_source() {
1793        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1794            items: vec![ReturnItem {
1795                expression: LogicalExpression::ListComprehension {
1796                    variable: "x".to_string(),
1797                    list_expr: Box::new(LogicalExpression::Variable("not_defined".to_string())),
1798                    filter_expr: None,
1799                    map_expr: Box::new(LogicalExpression::Variable("x".to_string())),
1800                },
1801                alias: None,
1802            }],
1803            distinct: false,
1804            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1805                variable: "n".to_string(),
1806                label: None,
1807                input: None,
1808            })),
1809        }));
1810
1811        let mut binder = Binder::new();
1812        let err = binder.bind(&plan).unwrap_err();
1813        assert!(
1814            err.to_string().contains("not_defined"),
1815            "List comprehension should validate source list expression"
1816        );
1817    }
1818
1819    #[test]
1820    fn test_labels_type_id_reject_undefined() {
1821        // labels(x) where x is not defined
1822        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1823            items: vec![ReturnItem {
1824                expression: LogicalExpression::Labels("x".to_string()),
1825                alias: None,
1826            }],
1827            distinct: false,
1828            input: Box::new(LogicalOperator::Empty),
1829        }));
1830
1831        let mut binder = Binder::new();
1832        assert!(
1833            binder.bind(&plan).is_err(),
1834            "labels(x) on undefined x should fail"
1835        );
1836
1837        // type(e) where e is not defined
1838        let plan2 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1839            items: vec![ReturnItem {
1840                expression: LogicalExpression::Type("e".to_string()),
1841                alias: None,
1842            }],
1843            distinct: false,
1844            input: Box::new(LogicalOperator::Empty),
1845        }));
1846
1847        let mut binder2 = Binder::new();
1848        assert!(
1849            binder2.bind(&plan2).is_err(),
1850            "type(e) on undefined e should fail"
1851        );
1852
1853        // id(n) where n is not defined
1854        let plan3 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1855            items: vec![ReturnItem {
1856                expression: LogicalExpression::Id("n".to_string()),
1857                alias: None,
1858            }],
1859            distinct: false,
1860            input: Box::new(LogicalOperator::Empty),
1861        }));
1862
1863        let mut binder3 = Binder::new();
1864        assert!(
1865            binder3.bind(&plan3).is_err(),
1866            "id(n) on undefined n should fail"
1867        );
1868    }
1869
1870    #[test]
1871    fn test_expand_rejects_non_node_source() {
1872        use crate::query::plan::{ExpandDirection, ExpandOp, UnwindOp};
1873
1874        // UNWIND [1,2] AS x  -- x is not a node
1875        // MATCH (x)-[:E]->(b)  -- should fail: x isn't a node
1876        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1877            items: vec![ReturnItem {
1878                expression: LogicalExpression::Variable("b".to_string()),
1879                alias: None,
1880            }],
1881            distinct: false,
1882            input: Box::new(LogicalOperator::Expand(ExpandOp {
1883                from_variable: "x".to_string(),
1884                to_variable: "b".to_string(),
1885                edge_variable: None,
1886                direction: ExpandDirection::Outgoing,
1887                edge_type: None,
1888                min_hops: 1,
1889                max_hops: Some(1),
1890                input: Box::new(LogicalOperator::Unwind(UnwindOp {
1891                    expression: LogicalExpression::List(vec![]),
1892                    variable: "x".to_string(),
1893                    input: Box::new(LogicalOperator::Empty),
1894                })),
1895                path_alias: None,
1896            })),
1897        }));
1898
1899        let mut binder = Binder::new();
1900        let err = binder.bind(&plan).unwrap_err();
1901        assert!(
1902            err.to_string().contains("not a node"),
1903            "Expanding from non-node should fail, got: {err}"
1904        );
1905    }
1906
1907    #[test]
1908    fn test_add_label_rejects_undefined_variable() {
1909        use crate::query::plan::AddLabelOp;
1910
1911        let plan = LogicalPlan::new(LogicalOperator::AddLabel(AddLabelOp {
1912            variable: "missing".to_string(),
1913            labels: vec!["Admin".to_string()],
1914            input: Box::new(LogicalOperator::Empty),
1915        }));
1916
1917        let mut binder = Binder::new();
1918        let err = binder.bind(&plan).unwrap_err();
1919        assert!(err.to_string().contains("SET labels"));
1920    }
1921
1922    #[test]
1923    fn test_remove_label_rejects_undefined_variable() {
1924        use crate::query::plan::RemoveLabelOp;
1925
1926        let plan = LogicalPlan::new(LogicalOperator::RemoveLabel(RemoveLabelOp {
1927            variable: "missing".to_string(),
1928            labels: vec!["Admin".to_string()],
1929            input: Box::new(LogicalOperator::Empty),
1930        }));
1931
1932        let mut binder = Binder::new();
1933        let err = binder.bind(&plan).unwrap_err();
1934        assert!(err.to_string().contains("REMOVE labels"));
1935    }
1936
1937    #[test]
1938    fn test_sort_validates_key_expressions() {
1939        use crate::query::plan::{SortKey, SortOp, SortOrder};
1940
1941        let plan = LogicalPlan::new(LogicalOperator::Sort(SortOp {
1942            keys: vec![SortKey {
1943                expression: LogicalExpression::Property {
1944                    variable: "missing".to_string(),
1945                    property: "name".to_string(),
1946                },
1947                order: SortOrder::Ascending,
1948            }],
1949            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1950                variable: "n".to_string(),
1951                label: None,
1952                input: None,
1953            })),
1954        }));
1955
1956        let mut binder = Binder::new();
1957        assert!(
1958            binder.bind(&plan).is_err(),
1959            "ORDER BY on undefined variable should fail"
1960        );
1961    }
1962
1963    #[test]
1964    fn test_create_node_adds_variable_before_property_validation() {
1965        use crate::query::plan::CreateNodeOp;
1966
1967        // CREATE (n:Person {friend: n.name}) - referencing the node being created
1968        // The variable should be available for property expressions (self-reference)
1969        let plan = LogicalPlan::new(LogicalOperator::CreateNode(CreateNodeOp {
1970            variable: "n".to_string(),
1971            labels: vec!["Person".to_string()],
1972            properties: vec![(
1973                "self_ref".to_string(),
1974                LogicalExpression::Property {
1975                    variable: "n".to_string(),
1976                    property: "name".to_string(),
1977                },
1978            )],
1979            input: None,
1980        }));
1981
1982        let mut binder = Binder::new();
1983        // This should succeed because CreateNode adds the variable before validating properties
1984        let ctx = binder.bind(&plan).unwrap();
1985        assert!(ctx.get("n").unwrap().is_node);
1986    }
1987
1988    #[test]
1989    fn test_undefined_variable_suggests_similar() {
1990        // 'person' is defined, user types 'persn' - should get a suggestion
1991        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1992            items: vec![ReturnItem {
1993                expression: LogicalExpression::Variable("persn".to_string()),
1994                alias: None,
1995            }],
1996            distinct: false,
1997            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1998                variable: "person".to_string(),
1999                label: None,
2000                input: None,
2001            })),
2002        }));
2003
2004        let mut binder = Binder::new();
2005        let err = binder.bind(&plan).unwrap_err();
2006        let msg = err.to_string();
2007        // The error should contain the variable name at minimum
2008        assert!(
2009            msg.contains("persn"),
2010            "Error should mention the undefined variable"
2011        );
2012    }
2013
2014    #[test]
2015    fn test_anon_variables_skip_validation() {
2016        // Variables starting with _anon_ are anonymous and should be silently accepted
2017        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2018            items: vec![ReturnItem {
2019                expression: LogicalExpression::Variable("_anon_42".to_string()),
2020                alias: None,
2021            }],
2022            distinct: false,
2023            input: Box::new(LogicalOperator::Empty),
2024        }));
2025
2026        let mut binder = Binder::new();
2027        let result = binder.bind(&plan);
2028        assert!(
2029            result.is_ok(),
2030            "Anonymous variables should bypass validation"
2031        );
2032    }
2033
2034    #[test]
2035    fn test_map_expression_validates_values() {
2036        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2037            items: vec![ReturnItem {
2038                expression: LogicalExpression::Map(vec![(
2039                    "key".to_string(),
2040                    LogicalExpression::Variable("undefined".to_string()),
2041                )]),
2042                alias: None,
2043            }],
2044            distinct: false,
2045            input: Box::new(LogicalOperator::Empty),
2046        }));
2047
2048        let mut binder = Binder::new();
2049        assert!(
2050            binder.bind(&plan).is_err(),
2051            "Map values should be validated"
2052        );
2053    }
2054
2055    #[test]
2056    fn test_vector_scan_validates_query_vector() {
2057        use crate::query::plan::VectorScanOp;
2058
2059        let plan = LogicalPlan::new(LogicalOperator::VectorScan(VectorScanOp {
2060            variable: "result".to_string(),
2061            index_name: None,
2062            property: "embedding".to_string(),
2063            label: Some("Doc".to_string()),
2064            query_vector: LogicalExpression::Variable("undefined_vec".to_string()),
2065            k: 10,
2066            metric: None,
2067            min_similarity: None,
2068            max_distance: None,
2069            input: None,
2070        }));
2071
2072        let mut binder = Binder::new();
2073        let err = binder.bind(&plan).unwrap_err();
2074        assert!(err.to_string().contains("undefined_vec"));
2075    }
2076}