Skip to main content

grafeo_engine/query/
binder.rs

1//! Semantic validation - catching errors before execution.
2//!
3//! The binder walks the logical plan and validates that everything makes sense:
4//! - Is that variable actually defined? (You can't use `RETURN x` if `x` wasn't matched)
5//! - Does that property access make sense? (Accessing `.age` on an integer fails)
6//! - Are types compatible? (Can't compare a string to an integer)
7//!
8//! Better to catch these errors early than waste time executing a broken query.
9
10use crate::query::plan::{
11    ExpandOp, FilterOp, LogicalExpression, LogicalOperator, LogicalPlan, NodeScanOp, ReturnItem,
12    ReturnOp, TripleScanOp,
13};
14use grafeo_common::types::LogicalType;
15use grafeo_common::utils::error::{Error, QueryError, QueryErrorKind, Result};
16use grafeo_common::utils::strings::{find_similar, format_suggestion};
17use std::collections::HashMap;
18
19/// Creates a semantic binding error.
20fn binding_error(message: impl Into<String>) -> Error {
21    Error::Query(QueryError::new(QueryErrorKind::Semantic, message))
22}
23
24/// Creates a semantic binding error with a hint.
25fn binding_error_with_hint(message: impl Into<String>, hint: impl Into<String>) -> Error {
26    Error::Query(QueryError::new(QueryErrorKind::Semantic, message).with_hint(hint))
27}
28
29/// Creates an "undefined variable" error with a suggestion if a similar variable exists.
30fn undefined_variable_error(variable: &str, context: &BindingContext, suffix: &str) -> Error {
31    let candidates: Vec<String> = context.variable_names().to_vec();
32    let candidates_ref: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
33
34    if let Some(suggestion) = find_similar(variable, &candidates_ref) {
35        binding_error_with_hint(
36            format!("Undefined variable '{variable}'{suffix}"),
37            format_suggestion(suggestion),
38        )
39    } else {
40        binding_error(format!("Undefined variable '{variable}'{suffix}"))
41    }
42}
43
44/// Information about a bound variable.
45#[derive(Debug, Clone)]
46pub struct VariableInfo {
47    /// The name of the variable.
48    pub name: String,
49    /// The inferred type of the variable.
50    pub data_type: LogicalType,
51    /// Whether this variable is a node.
52    pub is_node: bool,
53    /// Whether this variable is an edge.
54    pub is_edge: bool,
55}
56
57/// Context containing all bound variables and their information.
58#[derive(Debug, Clone, Default)]
59pub struct BindingContext {
60    /// Map from variable name to its info.
61    variables: HashMap<String, VariableInfo>,
62    /// Variables in order of definition.
63    order: Vec<String>,
64}
65
66impl BindingContext {
67    /// Creates a new empty binding context.
68    #[must_use]
69    pub fn new() -> Self {
70        Self {
71            variables: HashMap::new(),
72            order: Vec::new(),
73        }
74    }
75
76    /// Adds a variable to the context.
77    pub fn add_variable(&mut self, name: String, info: VariableInfo) {
78        if !self.variables.contains_key(&name) {
79            self.order.push(name.clone());
80        }
81        self.variables.insert(name, info);
82    }
83
84    /// Looks up a variable by name.
85    #[must_use]
86    pub fn get(&self, name: &str) -> Option<&VariableInfo> {
87        self.variables.get(name)
88    }
89
90    /// Checks if a variable is defined.
91    #[must_use]
92    pub fn contains(&self, name: &str) -> bool {
93        self.variables.contains_key(name)
94    }
95
96    /// Returns all variable names in definition order.
97    #[must_use]
98    pub fn variable_names(&self) -> &[String] {
99        &self.order
100    }
101
102    /// Returns the number of bound variables.
103    #[must_use]
104    pub fn len(&self) -> usize {
105        self.variables.len()
106    }
107
108    /// Returns true if no variables are bound.
109    #[must_use]
110    pub fn is_empty(&self) -> bool {
111        self.variables.is_empty()
112    }
113}
114
115/// Semantic binder for query plans.
116///
117/// The binder walks the logical plan and:
118/// 1. Collects all variable definitions
119/// 2. Validates that all variable references are valid
120/// 3. Infers types where possible
121/// 4. Reports semantic errors
122pub struct Binder {
123    /// The current binding context.
124    context: BindingContext,
125}
126
127impl Binder {
128    /// Creates a new binder.
129    #[must_use]
130    pub fn new() -> Self {
131        Self {
132            context: BindingContext::new(),
133        }
134    }
135
136    /// Binds a logical plan, returning the binding context.
137    ///
138    /// # Errors
139    ///
140    /// Returns an error if semantic validation fails.
141    pub fn bind(&mut self, plan: &LogicalPlan) -> Result<BindingContext> {
142        self.bind_operator(&plan.root)?;
143        Ok(self.context.clone())
144    }
145
146    /// Binds a single logical operator.
147    fn bind_operator(&mut self, op: &LogicalOperator) -> Result<()> {
148        match op {
149            LogicalOperator::NodeScan(scan) => self.bind_node_scan(scan),
150            LogicalOperator::Expand(expand) => self.bind_expand(expand),
151            LogicalOperator::Filter(filter) => self.bind_filter(filter),
152            LogicalOperator::Return(ret) => self.bind_return(ret),
153            LogicalOperator::Project(project) => {
154                self.bind_operator(&project.input)?;
155                for projection in &project.projections {
156                    self.validate_expression(&projection.expression)?;
157                    // Add the projection alias to the context (for WITH clause support)
158                    if let Some(ref alias) = projection.alias {
159                        // Determine the type from the expression
160                        let data_type = self.infer_expression_type(&projection.expression);
161                        self.context.add_variable(
162                            alias.clone(),
163                            VariableInfo {
164                                name: alias.clone(),
165                                data_type,
166                                is_node: false,
167                                is_edge: false,
168                            },
169                        );
170                    }
171                }
172                Ok(())
173            }
174            LogicalOperator::Limit(limit) => self.bind_operator(&limit.input),
175            LogicalOperator::Skip(skip) => self.bind_operator(&skip.input),
176            LogicalOperator::Sort(sort) => {
177                self.bind_operator(&sort.input)?;
178                for key in &sort.keys {
179                    self.validate_expression(&key.expression)?;
180                }
181                Ok(())
182            }
183            LogicalOperator::CreateNode(create) => {
184                // CreateNode introduces a new variable
185                if let Some(ref input) = create.input {
186                    self.bind_operator(input)?;
187                }
188                self.context.add_variable(
189                    create.variable.clone(),
190                    VariableInfo {
191                        name: create.variable.clone(),
192                        data_type: LogicalType::Node,
193                        is_node: true,
194                        is_edge: false,
195                    },
196                );
197                // Validate property expressions
198                for (_, expr) in &create.properties {
199                    self.validate_expression(expr)?;
200                }
201                Ok(())
202            }
203            LogicalOperator::EdgeScan(scan) => {
204                if let Some(ref input) = scan.input {
205                    self.bind_operator(input)?;
206                }
207                self.context.add_variable(
208                    scan.variable.clone(),
209                    VariableInfo {
210                        name: scan.variable.clone(),
211                        data_type: LogicalType::Edge,
212                        is_node: false,
213                        is_edge: true,
214                    },
215                );
216                Ok(())
217            }
218            LogicalOperator::Distinct(distinct) => self.bind_operator(&distinct.input),
219            LogicalOperator::Join(join) => self.bind_join(join),
220            LogicalOperator::Aggregate(agg) => self.bind_aggregate(agg),
221            LogicalOperator::CreateEdge(create) => {
222                self.bind_operator(&create.input)?;
223                // Validate that source and target variables are defined
224                if !self.context.contains(&create.from_variable) {
225                    return Err(undefined_variable_error(
226                        &create.from_variable,
227                        &self.context,
228                        " (source in CREATE EDGE)",
229                    ));
230                }
231                if !self.context.contains(&create.to_variable) {
232                    return Err(undefined_variable_error(
233                        &create.to_variable,
234                        &self.context,
235                        " (target in CREATE EDGE)",
236                    ));
237                }
238                // Add edge variable if present
239                if let Some(ref var) = create.variable {
240                    self.context.add_variable(
241                        var.clone(),
242                        VariableInfo {
243                            name: var.clone(),
244                            data_type: LogicalType::Edge,
245                            is_node: false,
246                            is_edge: true,
247                        },
248                    );
249                }
250                // Validate property expressions
251                for (_, expr) in &create.properties {
252                    self.validate_expression(expr)?;
253                }
254                Ok(())
255            }
256            LogicalOperator::DeleteNode(delete) => {
257                self.bind_operator(&delete.input)?;
258                // Validate that the variable to delete is defined
259                if !self.context.contains(&delete.variable) {
260                    return Err(undefined_variable_error(
261                        &delete.variable,
262                        &self.context,
263                        " in DELETE",
264                    ));
265                }
266                Ok(())
267            }
268            LogicalOperator::DeleteEdge(delete) => {
269                self.bind_operator(&delete.input)?;
270                // Validate that the variable to delete is defined
271                if !self.context.contains(&delete.variable) {
272                    return Err(undefined_variable_error(
273                        &delete.variable,
274                        &self.context,
275                        " in DELETE",
276                    ));
277                }
278                Ok(())
279            }
280            LogicalOperator::SetProperty(set) => {
281                self.bind_operator(&set.input)?;
282                // Validate that the variable to update is defined
283                if !self.context.contains(&set.variable) {
284                    return Err(undefined_variable_error(
285                        &set.variable,
286                        &self.context,
287                        " in SET",
288                    ));
289                }
290                // Validate property value expressions
291                for (_, expr) in &set.properties {
292                    self.validate_expression(expr)?;
293                }
294                Ok(())
295            }
296            LogicalOperator::Empty => Ok(()),
297
298            LogicalOperator::Unwind(unwind) => {
299                // First bind the input
300                self.bind_operator(&unwind.input)?;
301                // Validate the expression being unwound
302                self.validate_expression(&unwind.expression)?;
303                // Add the new variable to the context
304                self.context.add_variable(
305                    unwind.variable.clone(),
306                    VariableInfo {
307                        name: unwind.variable.clone(),
308                        data_type: LogicalType::Any, // Unwound elements can be any type
309                        is_node: false,
310                        is_edge: false,
311                    },
312                );
313                Ok(())
314            }
315
316            // RDF/SPARQL operators
317            LogicalOperator::TripleScan(scan) => self.bind_triple_scan(scan),
318            LogicalOperator::Union(union) => {
319                for input in &union.inputs {
320                    self.bind_operator(input)?;
321                }
322                Ok(())
323            }
324            LogicalOperator::LeftJoin(lj) => {
325                self.bind_operator(&lj.left)?;
326                self.bind_operator(&lj.right)?;
327                if let Some(ref cond) = lj.condition {
328                    self.validate_expression(cond)?;
329                }
330                Ok(())
331            }
332            LogicalOperator::AntiJoin(aj) => {
333                self.bind_operator(&aj.left)?;
334                self.bind_operator(&aj.right)?;
335                Ok(())
336            }
337            LogicalOperator::Bind(bind) => {
338                self.bind_operator(&bind.input)?;
339                self.validate_expression(&bind.expression)?;
340                self.context.add_variable(
341                    bind.variable.clone(),
342                    VariableInfo {
343                        name: bind.variable.clone(),
344                        data_type: LogicalType::Any,
345                        is_node: false,
346                        is_edge: false,
347                    },
348                );
349                Ok(())
350            }
351            LogicalOperator::Merge(merge) => {
352                // First bind the input
353                self.bind_operator(&merge.input)?;
354                // Validate the match property expressions
355                for (_, expr) in &merge.match_properties {
356                    self.validate_expression(expr)?;
357                }
358                // Validate the ON CREATE property expressions
359                for (_, expr) in &merge.on_create {
360                    self.validate_expression(expr)?;
361                }
362                // Validate the ON MATCH property expressions
363                for (_, expr) in &merge.on_match {
364                    self.validate_expression(expr)?;
365                }
366                // MERGE introduces a new variable
367                self.context.add_variable(
368                    merge.variable.clone(),
369                    VariableInfo {
370                        name: merge.variable.clone(),
371                        data_type: LogicalType::Node,
372                        is_node: true,
373                        is_edge: false,
374                    },
375                );
376                Ok(())
377            }
378            LogicalOperator::AddLabel(add_label) => {
379                self.bind_operator(&add_label.input)?;
380                // Validate that the variable exists
381                if !self.context.contains(&add_label.variable) {
382                    return Err(undefined_variable_error(
383                        &add_label.variable,
384                        &self.context,
385                        " in SET labels",
386                    ));
387                }
388                Ok(())
389            }
390            LogicalOperator::RemoveLabel(remove_label) => {
391                self.bind_operator(&remove_label.input)?;
392                // Validate that the variable exists
393                if !self.context.contains(&remove_label.variable) {
394                    return Err(undefined_variable_error(
395                        &remove_label.variable,
396                        &self.context,
397                        " in REMOVE labels",
398                    ));
399                }
400                Ok(())
401            }
402            LogicalOperator::ShortestPath(sp) => {
403                // First bind the input
404                self.bind_operator(&sp.input)?;
405                // Validate that source and target variables are defined
406                if !self.context.contains(&sp.source_var) {
407                    return Err(undefined_variable_error(
408                        &sp.source_var,
409                        &self.context,
410                        " (source in shortestPath)",
411                    ));
412                }
413                if !self.context.contains(&sp.target_var) {
414                    return Err(undefined_variable_error(
415                        &sp.target_var,
416                        &self.context,
417                        " (target in shortestPath)",
418                    ));
419                }
420                // Add the path alias variable to the context
421                self.context.add_variable(
422                    sp.path_alias.clone(),
423                    VariableInfo {
424                        name: sp.path_alias.clone(),
425                        data_type: LogicalType::Any, // Path is a complex type
426                        is_node: false,
427                        is_edge: false,
428                    },
429                );
430                // Also add the path length variable for length(p) calls
431                let path_length_var = format!("_path_length_{}", sp.path_alias);
432                self.context.add_variable(
433                    path_length_var.clone(),
434                    VariableInfo {
435                        name: path_length_var,
436                        data_type: LogicalType::Int64,
437                        is_node: false,
438                        is_edge: false,
439                    },
440                );
441                Ok(())
442            }
443            // SPARQL Update operators - these don't require variable binding
444            LogicalOperator::InsertTriple(insert) => {
445                if let Some(ref input) = insert.input {
446                    self.bind_operator(input)?;
447                }
448                Ok(())
449            }
450            LogicalOperator::DeleteTriple(delete) => {
451                if let Some(ref input) = delete.input {
452                    self.bind_operator(input)?;
453                }
454                Ok(())
455            }
456            LogicalOperator::Modify(modify) => {
457                self.bind_operator(&modify.where_clause)?;
458                Ok(())
459            }
460            LogicalOperator::ClearGraph(_)
461            | LogicalOperator::CreateGraph(_)
462            | LogicalOperator::DropGraph(_)
463            | LogicalOperator::LoadGraph(_)
464            | LogicalOperator::CopyGraph(_)
465            | LogicalOperator::MoveGraph(_)
466            | LogicalOperator::AddGraph(_) => Ok(()),
467            LogicalOperator::VectorScan(scan) => {
468                // VectorScan introduces a variable for matched nodes
469                if let Some(ref input) = scan.input {
470                    self.bind_operator(input)?;
471                }
472                self.context.add_variable(
473                    scan.variable.clone(),
474                    VariableInfo {
475                        name: scan.variable.clone(),
476                        data_type: LogicalType::Node,
477                        is_node: true,
478                        is_edge: false,
479                    },
480                );
481                // Validate the query vector expression
482                self.validate_expression(&scan.query_vector)?;
483                Ok(())
484            }
485            LogicalOperator::VectorJoin(join) => {
486                // VectorJoin takes input from left side and produces right-side matches
487                self.bind_operator(&join.input)?;
488                // Add right variable for matched nodes
489                self.context.add_variable(
490                    join.right_variable.clone(),
491                    VariableInfo {
492                        name: join.right_variable.clone(),
493                        data_type: LogicalType::Node,
494                        is_node: true,
495                        is_edge: false,
496                    },
497                );
498                // Optionally add score variable
499                if let Some(ref score_var) = join.score_variable {
500                    self.context.add_variable(
501                        score_var.clone(),
502                        VariableInfo {
503                            name: score_var.clone(),
504                            data_type: LogicalType::Float64,
505                            is_node: false,
506                            is_edge: false,
507                        },
508                    );
509                }
510                // Validate the query vector expression
511                self.validate_expression(&join.query_vector)?;
512                Ok(())
513            }
514            // DDL operators don't need binding — they're handled before the binder
515            LogicalOperator::CreatePropertyGraph(_) => Ok(()),
516            // Procedure calls don't need variable binding — arguments are constants
517            LogicalOperator::CallProcedure(_) => Ok(()),
518        }
519    }
520
521    /// Binds a triple scan operator (for RDF/SPARQL).
522    fn bind_triple_scan(&mut self, scan: &TripleScanOp) -> Result<()> {
523        use crate::query::plan::TripleComponent;
524
525        // First bind the input if present
526        if let Some(ref input) = scan.input {
527            self.bind_operator(input)?;
528        }
529
530        // Add variables for subject, predicate, object
531        if let TripleComponent::Variable(name) = &scan.subject
532            && !self.context.contains(name)
533        {
534            self.context.add_variable(
535                name.clone(),
536                VariableInfo {
537                    name: name.clone(),
538                    data_type: LogicalType::Any, // RDF term
539                    is_node: false,
540                    is_edge: false,
541                },
542            );
543        }
544
545        if let TripleComponent::Variable(name) = &scan.predicate
546            && !self.context.contains(name)
547        {
548            self.context.add_variable(
549                name.clone(),
550                VariableInfo {
551                    name: name.clone(),
552                    data_type: LogicalType::Any, // IRI
553                    is_node: false,
554                    is_edge: false,
555                },
556            );
557        }
558
559        if let TripleComponent::Variable(name) = &scan.object
560            && !self.context.contains(name)
561        {
562            self.context.add_variable(
563                name.clone(),
564                VariableInfo {
565                    name: name.clone(),
566                    data_type: LogicalType::Any, // RDF term
567                    is_node: false,
568                    is_edge: false,
569                },
570            );
571        }
572
573        if let Some(TripleComponent::Variable(name)) = &scan.graph
574            && !self.context.contains(name)
575        {
576            self.context.add_variable(
577                name.clone(),
578                VariableInfo {
579                    name: name.clone(),
580                    data_type: LogicalType::Any, // IRI
581                    is_node: false,
582                    is_edge: false,
583                },
584            );
585        }
586
587        Ok(())
588    }
589
590    /// Binds a node scan operator.
591    fn bind_node_scan(&mut self, scan: &NodeScanOp) -> Result<()> {
592        // First bind the input if present
593        if let Some(ref input) = scan.input {
594            self.bind_operator(input)?;
595        }
596
597        // Add the scanned variable to scope
598        self.context.add_variable(
599            scan.variable.clone(),
600            VariableInfo {
601                name: scan.variable.clone(),
602                data_type: LogicalType::Node,
603                is_node: true,
604                is_edge: false,
605            },
606        );
607
608        Ok(())
609    }
610
611    /// Binds an expand operator.
612    fn bind_expand(&mut self, expand: &ExpandOp) -> Result<()> {
613        // First bind the input
614        self.bind_operator(&expand.input)?;
615
616        // Validate that the source variable is defined
617        if !self.context.contains(&expand.from_variable) {
618            return Err(undefined_variable_error(
619                &expand.from_variable,
620                &self.context,
621                " in EXPAND",
622            ));
623        }
624
625        // Validate that the source is a node
626        if let Some(info) = self.context.get(&expand.from_variable)
627            && !info.is_node
628        {
629            return Err(binding_error(format!(
630                "Variable '{}' is not a node, cannot expand from it",
631                expand.from_variable
632            )));
633        }
634
635        // Add edge variable if present
636        if let Some(ref edge_var) = expand.edge_variable {
637            self.context.add_variable(
638                edge_var.clone(),
639                VariableInfo {
640                    name: edge_var.clone(),
641                    data_type: LogicalType::Edge,
642                    is_node: false,
643                    is_edge: true,
644                },
645            );
646        }
647
648        // Add target variable
649        self.context.add_variable(
650            expand.to_variable.clone(),
651            VariableInfo {
652                name: expand.to_variable.clone(),
653                data_type: LogicalType::Node,
654                is_node: true,
655                is_edge: false,
656            },
657        );
658
659        // Add path variables for variable-length paths
660        if let Some(ref path_alias) = expand.path_alias {
661            // length(p) → _path_length_p
662            let path_length_var = format!("_path_length_{}", path_alias);
663            self.context.add_variable(
664                path_length_var.clone(),
665                VariableInfo {
666                    name: path_length_var,
667                    data_type: LogicalType::Int64,
668                    is_node: false,
669                    is_edge: false,
670                },
671            );
672            // nodes(p) → _path_nodes_p
673            let path_nodes_var = format!("_path_nodes_{}", path_alias);
674            self.context.add_variable(
675                path_nodes_var.clone(),
676                VariableInfo {
677                    name: path_nodes_var,
678                    data_type: LogicalType::Any,
679                    is_node: false,
680                    is_edge: false,
681                },
682            );
683            // edges(p) → _path_edges_p
684            let path_edges_var = format!("_path_edges_{}", path_alias);
685            self.context.add_variable(
686                path_edges_var.clone(),
687                VariableInfo {
688                    name: path_edges_var,
689                    data_type: LogicalType::Any,
690                    is_node: false,
691                    is_edge: false,
692                },
693            );
694        }
695
696        Ok(())
697    }
698
699    /// Binds a filter operator.
700    fn bind_filter(&mut self, filter: &FilterOp) -> Result<()> {
701        // First bind the input
702        self.bind_operator(&filter.input)?;
703
704        // Validate the predicate expression
705        self.validate_expression(&filter.predicate)?;
706
707        Ok(())
708    }
709
710    /// Binds a return operator.
711    fn bind_return(&mut self, ret: &ReturnOp) -> Result<()> {
712        // First bind the input
713        self.bind_operator(&ret.input)?;
714
715        // Validate all return expressions
716        for item in &ret.items {
717            self.validate_return_item(item)?;
718        }
719
720        Ok(())
721    }
722
723    /// Validates a return item.
724    fn validate_return_item(&self, item: &ReturnItem) -> Result<()> {
725        self.validate_expression(&item.expression)
726    }
727
728    /// Validates that an expression only references defined variables.
729    fn validate_expression(&self, expr: &LogicalExpression) -> Result<()> {
730        match expr {
731            LogicalExpression::Variable(name) => {
732                if !self.context.contains(name) && !name.starts_with("_anon_") {
733                    return Err(undefined_variable_error(name, &self.context, ""));
734                }
735                Ok(())
736            }
737            LogicalExpression::Property { variable, .. } => {
738                if !self.context.contains(variable) && !variable.starts_with("_anon_") {
739                    return Err(undefined_variable_error(
740                        variable,
741                        &self.context,
742                        " in property access",
743                    ));
744                }
745                Ok(())
746            }
747            LogicalExpression::Literal(_) => Ok(()),
748            LogicalExpression::Binary { left, right, .. } => {
749                self.validate_expression(left)?;
750                self.validate_expression(right)
751            }
752            LogicalExpression::Unary { operand, .. } => self.validate_expression(operand),
753            LogicalExpression::FunctionCall { args, .. } => {
754                for arg in args {
755                    self.validate_expression(arg)?;
756                }
757                Ok(())
758            }
759            LogicalExpression::List(items) => {
760                for item in items {
761                    self.validate_expression(item)?;
762                }
763                Ok(())
764            }
765            LogicalExpression::Map(pairs) => {
766                for (_, value) in pairs {
767                    self.validate_expression(value)?;
768                }
769                Ok(())
770            }
771            LogicalExpression::IndexAccess { base, index } => {
772                self.validate_expression(base)?;
773                self.validate_expression(index)
774            }
775            LogicalExpression::SliceAccess { base, start, end } => {
776                self.validate_expression(base)?;
777                if let Some(s) = start {
778                    self.validate_expression(s)?;
779                }
780                if let Some(e) = end {
781                    self.validate_expression(e)?;
782                }
783                Ok(())
784            }
785            LogicalExpression::Case {
786                operand,
787                when_clauses,
788                else_clause,
789            } => {
790                if let Some(op) = operand {
791                    self.validate_expression(op)?;
792                }
793                for (cond, result) in when_clauses {
794                    self.validate_expression(cond)?;
795                    self.validate_expression(result)?;
796                }
797                if let Some(else_expr) = else_clause {
798                    self.validate_expression(else_expr)?;
799                }
800                Ok(())
801            }
802            // Parameter references are validated externally
803            LogicalExpression::Parameter(_) => Ok(()),
804            // labels(n), type(e), id(n) need the variable to be defined
805            LogicalExpression::Labels(var)
806            | LogicalExpression::Type(var)
807            | LogicalExpression::Id(var) => {
808                if !self.context.contains(var) && !var.starts_with("_anon_") {
809                    return Err(undefined_variable_error(var, &self.context, " in function"));
810                }
811                Ok(())
812            }
813            LogicalExpression::ListComprehension {
814                list_expr,
815                filter_expr,
816                map_expr,
817                ..
818            } => {
819                // Validate the list expression
820                self.validate_expression(list_expr)?;
821                // Note: filter_expr and map_expr use the comprehension variable
822                // which is defined within the comprehension scope, so we don't
823                // need to validate it against the outer context
824                if let Some(filter) = filter_expr {
825                    self.validate_expression(filter)?;
826                }
827                self.validate_expression(map_expr)?;
828                Ok(())
829            }
830            LogicalExpression::ExistsSubquery(subquery)
831            | LogicalExpression::CountSubquery(subquery) => {
832                // Subqueries have their own binding context
833                // For now, just validate the structure exists
834                let _ = subquery; // Would need recursive binding
835                Ok(())
836            }
837        }
838    }
839
840    /// Infers the type of an expression for use in WITH clause aliasing.
841    fn infer_expression_type(&self, expr: &LogicalExpression) -> LogicalType {
842        match expr {
843            LogicalExpression::Variable(name) => {
844                // Look up the variable type from context
845                self.context
846                    .get(name)
847                    .map_or(LogicalType::Any, |info| info.data_type.clone())
848            }
849            LogicalExpression::Property { .. } => LogicalType::Any, // Properties can be any type
850            LogicalExpression::Literal(value) => {
851                // Infer type from literal value
852                use grafeo_common::types::Value;
853                match value {
854                    Value::Bool(_) => LogicalType::Bool,
855                    Value::Int64(_) => LogicalType::Int64,
856                    Value::Float64(_) => LogicalType::Float64,
857                    Value::String(_) => LogicalType::String,
858                    Value::List(_) => LogicalType::Any, // Complex type
859                    Value::Map(_) => LogicalType::Any,  // Complex type
860                    Value::Null => LogicalType::Any,
861                    _ => LogicalType::Any,
862                }
863            }
864            LogicalExpression::Binary { .. } => LogicalType::Any, // Could be bool or numeric
865            LogicalExpression::Unary { .. } => LogicalType::Any,
866            LogicalExpression::FunctionCall { name, .. } => {
867                // Infer based on function name
868                match name.to_lowercase().as_str() {
869                    "count" | "sum" | "id" => LogicalType::Int64,
870                    "avg" => LogicalType::Float64,
871                    "type" => LogicalType::String,
872                    // List-returning functions use Any since we don't track element type
873                    "labels" | "collect" => LogicalType::Any,
874                    _ => LogicalType::Any,
875                }
876            }
877            LogicalExpression::List(_) => LogicalType::Any, // Complex type
878            LogicalExpression::Map(_) => LogicalType::Any,  // Complex type
879            _ => LogicalType::Any,
880        }
881    }
882
883    /// Binds a join operator.
884    fn bind_join(&mut self, join: &crate::query::plan::JoinOp) -> Result<()> {
885        // Bind both sides of the join
886        self.bind_operator(&join.left)?;
887        self.bind_operator(&join.right)?;
888
889        // Validate join conditions
890        for condition in &join.conditions {
891            self.validate_expression(&condition.left)?;
892            self.validate_expression(&condition.right)?;
893        }
894
895        Ok(())
896    }
897
898    /// Binds an aggregate operator.
899    fn bind_aggregate(&mut self, agg: &crate::query::plan::AggregateOp) -> Result<()> {
900        // Bind the input first
901        self.bind_operator(&agg.input)?;
902
903        // Validate group by expressions
904        for expr in &agg.group_by {
905            self.validate_expression(expr)?;
906        }
907
908        // Validate aggregate expressions
909        for agg_expr in &agg.aggregates {
910            if let Some(ref expr) = agg_expr.expression {
911                self.validate_expression(expr)?;
912            }
913            // Add the alias as a new variable if present
914            if let Some(ref alias) = agg_expr.alias {
915                self.context.add_variable(
916                    alias.clone(),
917                    VariableInfo {
918                        name: alias.clone(),
919                        data_type: LogicalType::Any,
920                        is_node: false,
921                        is_edge: false,
922                    },
923                );
924            }
925        }
926
927        Ok(())
928    }
929}
930
931impl Default for Binder {
932    fn default() -> Self {
933        Self::new()
934    }
935}
936
937#[cfg(test)]
938mod tests {
939    use super::*;
940    use crate::query::plan::{BinaryOp, FilterOp};
941
942    #[test]
943    fn test_bind_simple_scan() {
944        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
945            items: vec![ReturnItem {
946                expression: LogicalExpression::Variable("n".to_string()),
947                alias: None,
948            }],
949            distinct: false,
950            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
951                variable: "n".to_string(),
952                label: Some("Person".to_string()),
953                input: None,
954            })),
955        }));
956
957        let mut binder = Binder::new();
958        let result = binder.bind(&plan);
959
960        assert!(result.is_ok());
961        let ctx = result.unwrap();
962        assert!(ctx.contains("n"));
963        assert!(ctx.get("n").unwrap().is_node);
964    }
965
966    #[test]
967    fn test_bind_undefined_variable() {
968        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
969            items: vec![ReturnItem {
970                expression: LogicalExpression::Variable("undefined".to_string()),
971                alias: None,
972            }],
973            distinct: false,
974            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
975                variable: "n".to_string(),
976                label: None,
977                input: None,
978            })),
979        }));
980
981        let mut binder = Binder::new();
982        let result = binder.bind(&plan);
983
984        assert!(result.is_err());
985        let err = result.unwrap_err();
986        assert!(err.to_string().contains("Undefined variable"));
987    }
988
989    #[test]
990    fn test_bind_property_access() {
991        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
992            items: vec![ReturnItem {
993                expression: LogicalExpression::Property {
994                    variable: "n".to_string(),
995                    property: "name".to_string(),
996                },
997                alias: None,
998            }],
999            distinct: false,
1000            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1001                variable: "n".to_string(),
1002                label: Some("Person".to_string()),
1003                input: None,
1004            })),
1005        }));
1006
1007        let mut binder = Binder::new();
1008        let result = binder.bind(&plan);
1009
1010        assert!(result.is_ok());
1011    }
1012
1013    #[test]
1014    fn test_bind_filter_with_undefined_variable() {
1015        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1016            items: vec![ReturnItem {
1017                expression: LogicalExpression::Variable("n".to_string()),
1018                alias: None,
1019            }],
1020            distinct: false,
1021            input: Box::new(LogicalOperator::Filter(FilterOp {
1022                predicate: LogicalExpression::Binary {
1023                    left: Box::new(LogicalExpression::Property {
1024                        variable: "m".to_string(), // undefined!
1025                        property: "age".to_string(),
1026                    }),
1027                    op: BinaryOp::Gt,
1028                    right: Box::new(LogicalExpression::Literal(
1029                        grafeo_common::types::Value::Int64(30),
1030                    )),
1031                },
1032                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1033                    variable: "n".to_string(),
1034                    label: None,
1035                    input: None,
1036                })),
1037            })),
1038        }));
1039
1040        let mut binder = Binder::new();
1041        let result = binder.bind(&plan);
1042
1043        assert!(result.is_err());
1044        let err = result.unwrap_err();
1045        assert!(err.to_string().contains("Undefined variable 'm'"));
1046    }
1047
1048    #[test]
1049    fn test_bind_expand() {
1050        use crate::query::plan::{ExpandDirection, ExpandOp};
1051
1052        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1053            items: vec![
1054                ReturnItem {
1055                    expression: LogicalExpression::Variable("a".to_string()),
1056                    alias: None,
1057                },
1058                ReturnItem {
1059                    expression: LogicalExpression::Variable("b".to_string()),
1060                    alias: None,
1061                },
1062            ],
1063            distinct: false,
1064            input: Box::new(LogicalOperator::Expand(ExpandOp {
1065                from_variable: "a".to_string(),
1066                to_variable: "b".to_string(),
1067                edge_variable: Some("e".to_string()),
1068                direction: ExpandDirection::Outgoing,
1069                edge_type: Some("KNOWS".to_string()),
1070                min_hops: 1,
1071                max_hops: Some(1),
1072                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1073                    variable: "a".to_string(),
1074                    label: Some("Person".to_string()),
1075                    input: None,
1076                })),
1077                path_alias: None,
1078            })),
1079        }));
1080
1081        let mut binder = Binder::new();
1082        let result = binder.bind(&plan);
1083
1084        assert!(result.is_ok());
1085        let ctx = result.unwrap();
1086        assert!(ctx.contains("a"));
1087        assert!(ctx.contains("b"));
1088        assert!(ctx.contains("e"));
1089        assert!(ctx.get("a").unwrap().is_node);
1090        assert!(ctx.get("b").unwrap().is_node);
1091        assert!(ctx.get("e").unwrap().is_edge);
1092    }
1093
1094    #[test]
1095    fn test_bind_expand_from_undefined_variable() {
1096        // Tests that expanding from an undefined variable produces a clear error
1097        use crate::query::plan::{ExpandDirection, ExpandOp};
1098
1099        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1100            items: vec![ReturnItem {
1101                expression: LogicalExpression::Variable("b".to_string()),
1102                alias: None,
1103            }],
1104            distinct: false,
1105            input: Box::new(LogicalOperator::Expand(ExpandOp {
1106                from_variable: "undefined".to_string(), // not defined!
1107                to_variable: "b".to_string(),
1108                edge_variable: None,
1109                direction: ExpandDirection::Outgoing,
1110                edge_type: None,
1111                min_hops: 1,
1112                max_hops: Some(1),
1113                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1114                    variable: "a".to_string(),
1115                    label: None,
1116                    input: None,
1117                })),
1118                path_alias: None,
1119            })),
1120        }));
1121
1122        let mut binder = Binder::new();
1123        let result = binder.bind(&plan);
1124
1125        assert!(result.is_err());
1126        let err = result.unwrap_err();
1127        assert!(
1128            err.to_string().contains("Undefined variable 'undefined'"),
1129            "Expected error about undefined variable, got: {}",
1130            err
1131        );
1132    }
1133
1134    #[test]
1135    fn test_bind_return_with_aggregate_and_non_aggregate() {
1136        // Tests binding of aggregate functions alongside regular expressions
1137        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1138            items: vec![
1139                ReturnItem {
1140                    expression: LogicalExpression::FunctionCall {
1141                        name: "count".to_string(),
1142                        args: vec![LogicalExpression::Variable("n".to_string())],
1143                        distinct: false,
1144                    },
1145                    alias: Some("cnt".to_string()),
1146                },
1147                ReturnItem {
1148                    expression: LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1149                    alias: Some("one".to_string()),
1150                },
1151            ],
1152            distinct: false,
1153            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1154                variable: "n".to_string(),
1155                label: Some("Person".to_string()),
1156                input: None,
1157            })),
1158        }));
1159
1160        let mut binder = Binder::new();
1161        let result = binder.bind(&plan);
1162
1163        // This should succeed - count(n) with literal is valid
1164        assert!(result.is_ok());
1165    }
1166
1167    #[test]
1168    fn test_bind_nested_property_access() {
1169        // Tests that nested property access on the same variable works
1170        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1171            items: vec![
1172                ReturnItem {
1173                    expression: LogicalExpression::Property {
1174                        variable: "n".to_string(),
1175                        property: "name".to_string(),
1176                    },
1177                    alias: None,
1178                },
1179                ReturnItem {
1180                    expression: LogicalExpression::Property {
1181                        variable: "n".to_string(),
1182                        property: "age".to_string(),
1183                    },
1184                    alias: None,
1185                },
1186            ],
1187            distinct: false,
1188            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1189                variable: "n".to_string(),
1190                label: Some("Person".to_string()),
1191                input: None,
1192            })),
1193        }));
1194
1195        let mut binder = Binder::new();
1196        let result = binder.bind(&plan);
1197
1198        assert!(result.is_ok());
1199    }
1200
1201    #[test]
1202    fn test_bind_binary_expression_with_undefined() {
1203        // Tests that binary expressions with undefined variables produce errors
1204        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1205            items: vec![ReturnItem {
1206                expression: LogicalExpression::Binary {
1207                    left: Box::new(LogicalExpression::Property {
1208                        variable: "n".to_string(),
1209                        property: "age".to_string(),
1210                    }),
1211                    op: BinaryOp::Add,
1212                    right: Box::new(LogicalExpression::Property {
1213                        variable: "m".to_string(), // undefined!
1214                        property: "age".to_string(),
1215                    }),
1216                },
1217                alias: Some("total".to_string()),
1218            }],
1219            distinct: false,
1220            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1221                variable: "n".to_string(),
1222                label: None,
1223                input: None,
1224            })),
1225        }));
1226
1227        let mut binder = Binder::new();
1228        let result = binder.bind(&plan);
1229
1230        assert!(result.is_err());
1231        assert!(
1232            result
1233                .unwrap_err()
1234                .to_string()
1235                .contains("Undefined variable 'm'")
1236        );
1237    }
1238
1239    #[test]
1240    fn test_bind_duplicate_variable_definition() {
1241        // Tests behavior when the same variable is defined twice (via two NodeScans)
1242        // This is typically not allowed or the second shadows the first
1243        use crate::query::plan::{JoinOp, JoinType};
1244
1245        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1246            items: vec![ReturnItem {
1247                expression: LogicalExpression::Variable("n".to_string()),
1248                alias: None,
1249            }],
1250            distinct: false,
1251            input: Box::new(LogicalOperator::Join(JoinOp {
1252                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1253                    variable: "n".to_string(),
1254                    label: Some("A".to_string()),
1255                    input: None,
1256                })),
1257                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1258                    variable: "m".to_string(), // different variable is fine
1259                    label: Some("B".to_string()),
1260                    input: None,
1261                })),
1262                join_type: JoinType::Inner,
1263                conditions: vec![],
1264            })),
1265        }));
1266
1267        let mut binder = Binder::new();
1268        let result = binder.bind(&plan);
1269
1270        // Join with different variables should work
1271        assert!(result.is_ok());
1272        let ctx = result.unwrap();
1273        assert!(ctx.contains("n"));
1274        assert!(ctx.contains("m"));
1275    }
1276
1277    #[test]
1278    fn test_bind_function_with_wrong_arity() {
1279        // Tests that functions with wrong number of arguments are handled
1280        // (behavior depends on whether binder validates arity)
1281        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1282            items: vec![ReturnItem {
1283                expression: LogicalExpression::FunctionCall {
1284                    name: "count".to_string(),
1285                    args: vec![], // count() needs an argument
1286                    distinct: false,
1287                },
1288                alias: None,
1289            }],
1290            distinct: false,
1291            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1292                variable: "n".to_string(),
1293                label: None,
1294                input: None,
1295            })),
1296        }));
1297
1298        let mut binder = Binder::new();
1299        let result = binder.bind(&plan);
1300
1301        // The binder may or may not catch this - if it passes, execution will fail
1302        // This test documents current behavior
1303        // If binding fails, that's fine; if it passes, execution will handle it
1304        let _ = result; // We're just testing it doesn't panic
1305    }
1306
1307    // --- Mutation operator validation ---
1308
1309    #[test]
1310    fn test_create_edge_rejects_undefined_source() {
1311        use crate::query::plan::CreateEdgeOp;
1312
1313        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1314            variable: Some("e".to_string()),
1315            from_variable: "ghost".to_string(), // not defined!
1316            to_variable: "b".to_string(),
1317            edge_type: "KNOWS".to_string(),
1318            properties: vec![],
1319            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1320                variable: "b".to_string(),
1321                label: None,
1322                input: None,
1323            })),
1324        }));
1325
1326        let mut binder = Binder::new();
1327        let err = binder.bind(&plan).unwrap_err();
1328        assert!(
1329            err.to_string().contains("Undefined variable 'ghost'"),
1330            "Should reject undefined source variable, got: {err}"
1331        );
1332    }
1333
1334    #[test]
1335    fn test_create_edge_rejects_undefined_target() {
1336        use crate::query::plan::CreateEdgeOp;
1337
1338        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1339            variable: None,
1340            from_variable: "a".to_string(),
1341            to_variable: "missing".to_string(), // not defined!
1342            edge_type: "KNOWS".to_string(),
1343            properties: vec![],
1344            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1345                variable: "a".to_string(),
1346                label: None,
1347                input: None,
1348            })),
1349        }));
1350
1351        let mut binder = Binder::new();
1352        let err = binder.bind(&plan).unwrap_err();
1353        assert!(
1354            err.to_string().contains("Undefined variable 'missing'"),
1355            "Should reject undefined target variable, got: {err}"
1356        );
1357    }
1358
1359    #[test]
1360    fn test_create_edge_validates_property_expressions() {
1361        use crate::query::plan::CreateEdgeOp;
1362
1363        // Source and target defined, but property references undefined variable
1364        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1365            variable: Some("e".to_string()),
1366            from_variable: "a".to_string(),
1367            to_variable: "b".to_string(),
1368            edge_type: "KNOWS".to_string(),
1369            properties: vec![(
1370                "since".to_string(),
1371                LogicalExpression::Property {
1372                    variable: "x".to_string(), // undefined!
1373                    property: "year".to_string(),
1374                },
1375            )],
1376            input: Box::new(LogicalOperator::Join(crate::query::plan::JoinOp {
1377                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1378                    variable: "a".to_string(),
1379                    label: None,
1380                    input: None,
1381                })),
1382                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1383                    variable: "b".to_string(),
1384                    label: None,
1385                    input: None,
1386                })),
1387                join_type: crate::query::plan::JoinType::Inner,
1388                conditions: vec![],
1389            })),
1390        }));
1391
1392        let mut binder = Binder::new();
1393        let err = binder.bind(&plan).unwrap_err();
1394        assert!(err.to_string().contains("Undefined variable 'x'"));
1395    }
1396
1397    #[test]
1398    fn test_set_property_rejects_undefined_variable() {
1399        use crate::query::plan::SetPropertyOp;
1400
1401        let plan = LogicalPlan::new(LogicalOperator::SetProperty(SetPropertyOp {
1402            variable: "ghost".to_string(),
1403            properties: vec![(
1404                "name".to_string(),
1405                LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1406            )],
1407            replace: false,
1408            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1409                variable: "n".to_string(),
1410                label: None,
1411                input: None,
1412            })),
1413        }));
1414
1415        let mut binder = Binder::new();
1416        let err = binder.bind(&plan).unwrap_err();
1417        assert!(
1418            err.to_string().contains("in SET"),
1419            "Error should indicate SET context, got: {err}"
1420        );
1421    }
1422
1423    #[test]
1424    fn test_delete_node_rejects_undefined_variable() {
1425        use crate::query::plan::DeleteNodeOp;
1426
1427        let plan = LogicalPlan::new(LogicalOperator::DeleteNode(DeleteNodeOp {
1428            variable: "phantom".to_string(),
1429            detach: false,
1430            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1431                variable: "n".to_string(),
1432                label: None,
1433                input: None,
1434            })),
1435        }));
1436
1437        let mut binder = Binder::new();
1438        let err = binder.bind(&plan).unwrap_err();
1439        assert!(err.to_string().contains("Undefined variable 'phantom'"));
1440    }
1441
1442    #[test]
1443    fn test_delete_edge_rejects_undefined_variable() {
1444        use crate::query::plan::DeleteEdgeOp;
1445
1446        let plan = LogicalPlan::new(LogicalOperator::DeleteEdge(DeleteEdgeOp {
1447            variable: "gone".to_string(),
1448            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1449                variable: "n".to_string(),
1450                label: None,
1451                input: None,
1452            })),
1453        }));
1454
1455        let mut binder = Binder::new();
1456        let err = binder.bind(&plan).unwrap_err();
1457        assert!(err.to_string().contains("Undefined variable 'gone'"));
1458    }
1459
1460    // --- WITH/Project clause ---
1461
1462    #[test]
1463    fn test_project_alias_becomes_available_downstream() {
1464        use crate::query::plan::{ProjectOp, Projection};
1465
1466        // WITH n.name AS person_name RETURN person_name
1467        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1468            items: vec![ReturnItem {
1469                expression: LogicalExpression::Variable("person_name".to_string()),
1470                alias: None,
1471            }],
1472            distinct: false,
1473            input: Box::new(LogicalOperator::Project(ProjectOp {
1474                projections: vec![Projection {
1475                    expression: LogicalExpression::Property {
1476                        variable: "n".to_string(),
1477                        property: "name".to_string(),
1478                    },
1479                    alias: Some("person_name".to_string()),
1480                }],
1481                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1482                    variable: "n".to_string(),
1483                    label: None,
1484                    input: None,
1485                })),
1486            })),
1487        }));
1488
1489        let mut binder = Binder::new();
1490        let ctx = binder.bind(&plan).unwrap();
1491        assert!(
1492            ctx.contains("person_name"),
1493            "WITH alias should be available to RETURN"
1494        );
1495    }
1496
1497    #[test]
1498    fn test_project_rejects_undefined_expression() {
1499        use crate::query::plan::{ProjectOp, Projection};
1500
1501        let plan = LogicalPlan::new(LogicalOperator::Project(ProjectOp {
1502            projections: vec![Projection {
1503                expression: LogicalExpression::Variable("nope".to_string()),
1504                alias: Some("x".to_string()),
1505            }],
1506            input: Box::new(LogicalOperator::Empty),
1507        }));
1508
1509        let mut binder = Binder::new();
1510        let result = binder.bind(&plan);
1511        assert!(result.is_err(), "WITH on undefined variable should fail");
1512    }
1513
1514    // --- UNWIND ---
1515
1516    #[test]
1517    fn test_unwind_adds_element_variable() {
1518        use crate::query::plan::UnwindOp;
1519
1520        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1521            items: vec![ReturnItem {
1522                expression: LogicalExpression::Variable("item".to_string()),
1523                alias: None,
1524            }],
1525            distinct: false,
1526            input: Box::new(LogicalOperator::Unwind(UnwindOp {
1527                expression: LogicalExpression::List(vec![
1528                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1529                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(2)),
1530                ]),
1531                variable: "item".to_string(),
1532                input: Box::new(LogicalOperator::Empty),
1533            })),
1534        }));
1535
1536        let mut binder = Binder::new();
1537        let ctx = binder.bind(&plan).unwrap();
1538        assert!(ctx.contains("item"), "UNWIND variable should be in scope");
1539        let info = ctx.get("item").unwrap();
1540        assert!(
1541            !info.is_node && !info.is_edge,
1542            "UNWIND variable is not a graph element"
1543        );
1544    }
1545
1546    // --- MERGE ---
1547
1548    #[test]
1549    fn test_merge_adds_variable_and_validates_properties() {
1550        use crate::query::plan::MergeOp;
1551
1552        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1553            items: vec![ReturnItem {
1554                expression: LogicalExpression::Variable("m".to_string()),
1555                alias: None,
1556            }],
1557            distinct: false,
1558            input: Box::new(LogicalOperator::Merge(MergeOp {
1559                variable: "m".to_string(),
1560                labels: vec!["Person".to_string()],
1561                match_properties: vec![(
1562                    "name".to_string(),
1563                    LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1564                )],
1565                on_create: vec![(
1566                    "created".to_string(),
1567                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1568                )],
1569                on_match: vec![(
1570                    "updated".to_string(),
1571                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1572                )],
1573                input: Box::new(LogicalOperator::Empty),
1574            })),
1575        }));
1576
1577        let mut binder = Binder::new();
1578        let ctx = binder.bind(&plan).unwrap();
1579        assert!(ctx.contains("m"));
1580        assert!(
1581            ctx.get("m").unwrap().is_node,
1582            "MERGE variable should be a node"
1583        );
1584    }
1585
1586    #[test]
1587    fn test_merge_rejects_undefined_in_on_create() {
1588        use crate::query::plan::MergeOp;
1589
1590        let plan = LogicalPlan::new(LogicalOperator::Merge(MergeOp {
1591            variable: "m".to_string(),
1592            labels: vec![],
1593            match_properties: vec![],
1594            on_create: vec![(
1595                "name".to_string(),
1596                LogicalExpression::Property {
1597                    variable: "other".to_string(), // undefined!
1598                    property: "name".to_string(),
1599                },
1600            )],
1601            on_match: vec![],
1602            input: Box::new(LogicalOperator::Empty),
1603        }));
1604
1605        let mut binder = Binder::new();
1606        let result = binder.bind(&plan);
1607        assert!(
1608            result.is_err(),
1609            "ON CREATE referencing undefined variable should fail"
1610        );
1611    }
1612
1613    // --- ShortestPath ---
1614
1615    #[test]
1616    fn test_shortest_path_rejects_undefined_source() {
1617        use crate::query::plan::{ExpandDirection, ShortestPathOp};
1618
1619        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1620            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1621                variable: "b".to_string(),
1622                label: None,
1623                input: None,
1624            })),
1625            source_var: "missing".to_string(), // not defined
1626            target_var: "b".to_string(),
1627            edge_type: None,
1628            direction: ExpandDirection::Both,
1629            path_alias: "p".to_string(),
1630            all_paths: false,
1631        }));
1632
1633        let mut binder = Binder::new();
1634        let err = binder.bind(&plan).unwrap_err();
1635        assert!(
1636            err.to_string().contains("source in shortestPath"),
1637            "Error should mention shortestPath source context, got: {err}"
1638        );
1639    }
1640
1641    #[test]
1642    fn test_shortest_path_adds_path_and_length_variables() {
1643        use crate::query::plan::{ExpandDirection, JoinOp, JoinType, ShortestPathOp};
1644
1645        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1646            input: Box::new(LogicalOperator::Join(JoinOp {
1647                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1648                    variable: "a".to_string(),
1649                    label: None,
1650                    input: None,
1651                })),
1652                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1653                    variable: "b".to_string(),
1654                    label: None,
1655                    input: None,
1656                })),
1657                join_type: JoinType::Cross,
1658                conditions: vec![],
1659            })),
1660            source_var: "a".to_string(),
1661            target_var: "b".to_string(),
1662            edge_type: Some("ROAD".to_string()),
1663            direction: ExpandDirection::Outgoing,
1664            path_alias: "p".to_string(),
1665            all_paths: false,
1666        }));
1667
1668        let mut binder = Binder::new();
1669        let ctx = binder.bind(&plan).unwrap();
1670        assert!(ctx.contains("p"), "Path alias should be bound");
1671        assert!(
1672            ctx.contains("_path_length_p"),
1673            "Path length variable should be auto-created"
1674        );
1675    }
1676
1677    // --- Expression validation edge cases ---
1678
1679    #[test]
1680    fn test_case_expression_validates_all_branches() {
1681        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1682            items: vec![ReturnItem {
1683                expression: LogicalExpression::Case {
1684                    operand: None,
1685                    when_clauses: vec![
1686                        (
1687                            LogicalExpression::Binary {
1688                                left: Box::new(LogicalExpression::Property {
1689                                    variable: "n".to_string(),
1690                                    property: "age".to_string(),
1691                                }),
1692                                op: BinaryOp::Gt,
1693                                right: Box::new(LogicalExpression::Literal(
1694                                    grafeo_common::types::Value::Int64(18),
1695                                )),
1696                            },
1697                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1698                                "adult".into(),
1699                            )),
1700                        ),
1701                        (
1702                            // This branch references undefined variable
1703                            LogicalExpression::Property {
1704                                variable: "ghost".to_string(),
1705                                property: "flag".to_string(),
1706                            },
1707                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1708                                "flagged".into(),
1709                            )),
1710                        ),
1711                    ],
1712                    else_clause: Some(Box::new(LogicalExpression::Literal(
1713                        grafeo_common::types::Value::String("other".into()),
1714                    ))),
1715                },
1716                alias: None,
1717            }],
1718            distinct: false,
1719            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1720                variable: "n".to_string(),
1721                label: None,
1722                input: None,
1723            })),
1724        }));
1725
1726        let mut binder = Binder::new();
1727        let err = binder.bind(&plan).unwrap_err();
1728        assert!(
1729            err.to_string().contains("ghost"),
1730            "CASE should validate all when-clause conditions"
1731        );
1732    }
1733
1734    #[test]
1735    fn test_case_expression_validates_else_clause() {
1736        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1737            items: vec![ReturnItem {
1738                expression: LogicalExpression::Case {
1739                    operand: None,
1740                    when_clauses: vec![(
1741                        LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1742                        LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1743                    )],
1744                    else_clause: Some(Box::new(LogicalExpression::Property {
1745                        variable: "missing".to_string(),
1746                        property: "x".to_string(),
1747                    })),
1748                },
1749                alias: None,
1750            }],
1751            distinct: false,
1752            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1753                variable: "n".to_string(),
1754                label: None,
1755                input: None,
1756            })),
1757        }));
1758
1759        let mut binder = Binder::new();
1760        let err = binder.bind(&plan).unwrap_err();
1761        assert!(
1762            err.to_string().contains("missing"),
1763            "CASE ELSE should validate its expression too"
1764        );
1765    }
1766
1767    #[test]
1768    fn test_slice_access_validates_expressions() {
1769        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1770            items: vec![ReturnItem {
1771                expression: LogicalExpression::SliceAccess {
1772                    base: Box::new(LogicalExpression::Variable("n".to_string())),
1773                    start: Some(Box::new(LogicalExpression::Variable(
1774                        "undefined_start".to_string(),
1775                    ))),
1776                    end: None,
1777                },
1778                alias: None,
1779            }],
1780            distinct: false,
1781            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1782                variable: "n".to_string(),
1783                label: None,
1784                input: None,
1785            })),
1786        }));
1787
1788        let mut binder = Binder::new();
1789        let err = binder.bind(&plan).unwrap_err();
1790        assert!(err.to_string().contains("undefined_start"));
1791    }
1792
1793    #[test]
1794    fn test_list_comprehension_validates_list_source() {
1795        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1796            items: vec![ReturnItem {
1797                expression: LogicalExpression::ListComprehension {
1798                    variable: "x".to_string(),
1799                    list_expr: Box::new(LogicalExpression::Variable("not_defined".to_string())),
1800                    filter_expr: None,
1801                    map_expr: Box::new(LogicalExpression::Variable("x".to_string())),
1802                },
1803                alias: None,
1804            }],
1805            distinct: false,
1806            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1807                variable: "n".to_string(),
1808                label: None,
1809                input: None,
1810            })),
1811        }));
1812
1813        let mut binder = Binder::new();
1814        let err = binder.bind(&plan).unwrap_err();
1815        assert!(
1816            err.to_string().contains("not_defined"),
1817            "List comprehension should validate source list expression"
1818        );
1819    }
1820
1821    #[test]
1822    fn test_labels_type_id_reject_undefined() {
1823        // labels(x) where x is not defined
1824        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1825            items: vec![ReturnItem {
1826                expression: LogicalExpression::Labels("x".to_string()),
1827                alias: None,
1828            }],
1829            distinct: false,
1830            input: Box::new(LogicalOperator::Empty),
1831        }));
1832
1833        let mut binder = Binder::new();
1834        assert!(
1835            binder.bind(&plan).is_err(),
1836            "labels(x) on undefined x should fail"
1837        );
1838
1839        // type(e) where e is not defined
1840        let plan2 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1841            items: vec![ReturnItem {
1842                expression: LogicalExpression::Type("e".to_string()),
1843                alias: None,
1844            }],
1845            distinct: false,
1846            input: Box::new(LogicalOperator::Empty),
1847        }));
1848
1849        let mut binder2 = Binder::new();
1850        assert!(
1851            binder2.bind(&plan2).is_err(),
1852            "type(e) on undefined e should fail"
1853        );
1854
1855        // id(n) where n is not defined
1856        let plan3 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1857            items: vec![ReturnItem {
1858                expression: LogicalExpression::Id("n".to_string()),
1859                alias: None,
1860            }],
1861            distinct: false,
1862            input: Box::new(LogicalOperator::Empty),
1863        }));
1864
1865        let mut binder3 = Binder::new();
1866        assert!(
1867            binder3.bind(&plan3).is_err(),
1868            "id(n) on undefined n should fail"
1869        );
1870    }
1871
1872    #[test]
1873    fn test_expand_rejects_non_node_source() {
1874        use crate::query::plan::{ExpandDirection, ExpandOp, UnwindOp};
1875
1876        // UNWIND [1,2] AS x  -- x is not a node
1877        // MATCH (x)-[:E]->(b)  -- should fail: x isn't a node
1878        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1879            items: vec![ReturnItem {
1880                expression: LogicalExpression::Variable("b".to_string()),
1881                alias: None,
1882            }],
1883            distinct: false,
1884            input: Box::new(LogicalOperator::Expand(ExpandOp {
1885                from_variable: "x".to_string(),
1886                to_variable: "b".to_string(),
1887                edge_variable: None,
1888                direction: ExpandDirection::Outgoing,
1889                edge_type: None,
1890                min_hops: 1,
1891                max_hops: Some(1),
1892                input: Box::new(LogicalOperator::Unwind(UnwindOp {
1893                    expression: LogicalExpression::List(vec![]),
1894                    variable: "x".to_string(),
1895                    input: Box::new(LogicalOperator::Empty),
1896                })),
1897                path_alias: None,
1898            })),
1899        }));
1900
1901        let mut binder = Binder::new();
1902        let err = binder.bind(&plan).unwrap_err();
1903        assert!(
1904            err.to_string().contains("not a node"),
1905            "Expanding from non-node should fail, got: {err}"
1906        );
1907    }
1908
1909    #[test]
1910    fn test_add_label_rejects_undefined_variable() {
1911        use crate::query::plan::AddLabelOp;
1912
1913        let plan = LogicalPlan::new(LogicalOperator::AddLabel(AddLabelOp {
1914            variable: "missing".to_string(),
1915            labels: vec!["Admin".to_string()],
1916            input: Box::new(LogicalOperator::Empty),
1917        }));
1918
1919        let mut binder = Binder::new();
1920        let err = binder.bind(&plan).unwrap_err();
1921        assert!(err.to_string().contains("SET labels"));
1922    }
1923
1924    #[test]
1925    fn test_remove_label_rejects_undefined_variable() {
1926        use crate::query::plan::RemoveLabelOp;
1927
1928        let plan = LogicalPlan::new(LogicalOperator::RemoveLabel(RemoveLabelOp {
1929            variable: "missing".to_string(),
1930            labels: vec!["Admin".to_string()],
1931            input: Box::new(LogicalOperator::Empty),
1932        }));
1933
1934        let mut binder = Binder::new();
1935        let err = binder.bind(&plan).unwrap_err();
1936        assert!(err.to_string().contains("REMOVE labels"));
1937    }
1938
1939    #[test]
1940    fn test_sort_validates_key_expressions() {
1941        use crate::query::plan::{SortKey, SortOp, SortOrder};
1942
1943        let plan = LogicalPlan::new(LogicalOperator::Sort(SortOp {
1944            keys: vec![SortKey {
1945                expression: LogicalExpression::Property {
1946                    variable: "missing".to_string(),
1947                    property: "name".to_string(),
1948                },
1949                order: SortOrder::Ascending,
1950            }],
1951            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1952                variable: "n".to_string(),
1953                label: None,
1954                input: None,
1955            })),
1956        }));
1957
1958        let mut binder = Binder::new();
1959        assert!(
1960            binder.bind(&plan).is_err(),
1961            "ORDER BY on undefined variable should fail"
1962        );
1963    }
1964
1965    #[test]
1966    fn test_create_node_adds_variable_before_property_validation() {
1967        use crate::query::plan::CreateNodeOp;
1968
1969        // CREATE (n:Person {friend: n.name}) - referencing the node being created
1970        // The variable should be available for property expressions (self-reference)
1971        let plan = LogicalPlan::new(LogicalOperator::CreateNode(CreateNodeOp {
1972            variable: "n".to_string(),
1973            labels: vec!["Person".to_string()],
1974            properties: vec![(
1975                "self_ref".to_string(),
1976                LogicalExpression::Property {
1977                    variable: "n".to_string(),
1978                    property: "name".to_string(),
1979                },
1980            )],
1981            input: None,
1982        }));
1983
1984        let mut binder = Binder::new();
1985        // This should succeed because CreateNode adds the variable before validating properties
1986        let ctx = binder.bind(&plan).unwrap();
1987        assert!(ctx.get("n").unwrap().is_node);
1988    }
1989
1990    #[test]
1991    fn test_undefined_variable_suggests_similar() {
1992        // 'person' is defined, user types 'persn' - should get a suggestion
1993        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1994            items: vec![ReturnItem {
1995                expression: LogicalExpression::Variable("persn".to_string()),
1996                alias: None,
1997            }],
1998            distinct: false,
1999            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2000                variable: "person".to_string(),
2001                label: None,
2002                input: None,
2003            })),
2004        }));
2005
2006        let mut binder = Binder::new();
2007        let err = binder.bind(&plan).unwrap_err();
2008        let msg = err.to_string();
2009        // The error should contain the variable name at minimum
2010        assert!(
2011            msg.contains("persn"),
2012            "Error should mention the undefined variable"
2013        );
2014    }
2015
2016    #[test]
2017    fn test_anon_variables_skip_validation() {
2018        // Variables starting with _anon_ are anonymous and should be silently accepted
2019        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2020            items: vec![ReturnItem {
2021                expression: LogicalExpression::Variable("_anon_42".to_string()),
2022                alias: None,
2023            }],
2024            distinct: false,
2025            input: Box::new(LogicalOperator::Empty),
2026        }));
2027
2028        let mut binder = Binder::new();
2029        let result = binder.bind(&plan);
2030        assert!(
2031            result.is_ok(),
2032            "Anonymous variables should bypass validation"
2033        );
2034    }
2035
2036    #[test]
2037    fn test_map_expression_validates_values() {
2038        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2039            items: vec![ReturnItem {
2040                expression: LogicalExpression::Map(vec![(
2041                    "key".to_string(),
2042                    LogicalExpression::Variable("undefined".to_string()),
2043                )]),
2044                alias: None,
2045            }],
2046            distinct: false,
2047            input: Box::new(LogicalOperator::Empty),
2048        }));
2049
2050        let mut binder = Binder::new();
2051        assert!(
2052            binder.bind(&plan).is_err(),
2053            "Map values should be validated"
2054        );
2055    }
2056
2057    #[test]
2058    fn test_vector_scan_validates_query_vector() {
2059        use crate::query::plan::VectorScanOp;
2060
2061        let plan = LogicalPlan::new(LogicalOperator::VectorScan(VectorScanOp {
2062            variable: "result".to_string(),
2063            index_name: None,
2064            property: "embedding".to_string(),
2065            label: Some("Doc".to_string()),
2066            query_vector: LogicalExpression::Variable("undefined_vec".to_string()),
2067            k: 10,
2068            metric: None,
2069            min_similarity: None,
2070            max_distance: None,
2071            input: None,
2072        }));
2073
2074        let mut binder = Binder::new();
2075        let err = binder.bind(&plan).unwrap_err();
2076        assert!(err.to_string().contains("undefined_vec"));
2077    }
2078}