Skip to main content

grafeo_engine/query/
binder.rs

1//! Semantic validation - catching errors before execution.
2//!
3//! The binder walks the logical plan and validates that everything makes sense:
4//! - Is that variable actually defined? (You can't use `RETURN x` if `x` wasn't matched)
5//! - Does that property access make sense? (Accessing `.age` on an integer fails)
6//! - Are types compatible? (Can't compare a string to an integer)
7//!
8//! Better to catch these errors early than waste time executing a broken query.
9
10use crate::query::plan::{
11    ExpandOp, FilterOp, LogicalExpression, LogicalOperator, LogicalPlan, NodeScanOp, ReturnItem,
12    ReturnOp, TripleScanOp,
13};
14use grafeo_common::types::LogicalType;
15use grafeo_common::utils::error::{Error, QueryError, QueryErrorKind, Result};
16use grafeo_common::utils::strings::{find_similar, format_suggestion};
17use std::collections::HashMap;
18
19/// Creates a semantic binding error.
20fn binding_error(message: impl Into<String>) -> Error {
21    Error::Query(QueryError::new(QueryErrorKind::Semantic, message))
22}
23
24/// Creates a semantic binding error with a hint.
25fn binding_error_with_hint(message: impl Into<String>, hint: impl Into<String>) -> Error {
26    Error::Query(QueryError::new(QueryErrorKind::Semantic, message).with_hint(hint))
27}
28
29/// Creates an "undefined variable" error with a suggestion if a similar variable exists.
30fn undefined_variable_error(variable: &str, context: &BindingContext, suffix: &str) -> Error {
31    let candidates: Vec<String> = context.variable_names().to_vec();
32    let candidates_ref: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
33
34    if let Some(suggestion) = find_similar(variable, &candidates_ref) {
35        binding_error_with_hint(
36            format!("Undefined variable '{variable}'{suffix}"),
37            format_suggestion(suggestion),
38        )
39    } else {
40        binding_error(format!("Undefined variable '{variable}'{suffix}"))
41    }
42}
43
44/// Information about a bound variable.
45#[derive(Debug, Clone)]
46pub struct VariableInfo {
47    /// The name of the variable.
48    pub name: String,
49    /// The inferred type of the variable.
50    pub data_type: LogicalType,
51    /// Whether this variable is a node.
52    pub is_node: bool,
53    /// Whether this variable is an edge.
54    pub is_edge: bool,
55}
56
57/// Context containing all bound variables and their information.
58#[derive(Debug, Clone, Default)]
59pub struct BindingContext {
60    /// Map from variable name to its info.
61    variables: HashMap<String, VariableInfo>,
62    /// Variables in order of definition.
63    order: Vec<String>,
64}
65
66impl BindingContext {
67    /// Creates a new empty binding context.
68    #[must_use]
69    pub fn new() -> Self {
70        Self {
71            variables: HashMap::new(),
72            order: Vec::new(),
73        }
74    }
75
76    /// Adds a variable to the context.
77    pub fn add_variable(&mut self, name: String, info: VariableInfo) {
78        if !self.variables.contains_key(&name) {
79            self.order.push(name.clone());
80        }
81        self.variables.insert(name, info);
82    }
83
84    /// Looks up a variable by name.
85    #[must_use]
86    pub fn get(&self, name: &str) -> Option<&VariableInfo> {
87        self.variables.get(name)
88    }
89
90    /// Checks if a variable is defined.
91    #[must_use]
92    pub fn contains(&self, name: &str) -> bool {
93        self.variables.contains_key(name)
94    }
95
96    /// Returns all variable names in definition order.
97    #[must_use]
98    pub fn variable_names(&self) -> &[String] {
99        &self.order
100    }
101
102    /// Returns the number of bound variables.
103    #[must_use]
104    pub fn len(&self) -> usize {
105        self.variables.len()
106    }
107
108    /// Returns true if no variables are bound.
109    #[must_use]
110    pub fn is_empty(&self) -> bool {
111        self.variables.is_empty()
112    }
113}
114
115/// Semantic binder for query plans.
116///
117/// The binder walks the logical plan and:
118/// 1. Collects all variable definitions
119/// 2. Validates that all variable references are valid
120/// 3. Infers types where possible
121/// 4. Reports semantic errors
122pub struct Binder {
123    /// The current binding context.
124    context: BindingContext,
125}
126
127impl Binder {
128    /// Creates a new binder.
129    #[must_use]
130    pub fn new() -> Self {
131        Self {
132            context: BindingContext::new(),
133        }
134    }
135
136    /// Binds a logical plan, returning the binding context.
137    ///
138    /// # Errors
139    ///
140    /// Returns an error if semantic validation fails.
141    pub fn bind(&mut self, plan: &LogicalPlan) -> Result<BindingContext> {
142        self.bind_operator(&plan.root)?;
143        Ok(self.context.clone())
144    }
145
146    /// Binds a single logical operator.
147    fn bind_operator(&mut self, op: &LogicalOperator) -> Result<()> {
148        match op {
149            LogicalOperator::NodeScan(scan) => self.bind_node_scan(scan),
150            LogicalOperator::Expand(expand) => self.bind_expand(expand),
151            LogicalOperator::Filter(filter) => self.bind_filter(filter),
152            LogicalOperator::Return(ret) => self.bind_return(ret),
153            LogicalOperator::Project(project) => {
154                self.bind_operator(&project.input)?;
155                for projection in &project.projections {
156                    self.validate_expression(&projection.expression)?;
157                    // Add the projection alias to the context (for WITH clause support)
158                    if let Some(ref alias) = projection.alias {
159                        // Determine the type from the expression
160                        let data_type = self.infer_expression_type(&projection.expression);
161                        self.context.add_variable(
162                            alias.clone(),
163                            VariableInfo {
164                                name: alias.clone(),
165                                data_type,
166                                is_node: false,
167                                is_edge: false,
168                            },
169                        );
170                    }
171                }
172                Ok(())
173            }
174            LogicalOperator::Limit(limit) => self.bind_operator(&limit.input),
175            LogicalOperator::Skip(skip) => self.bind_operator(&skip.input),
176            LogicalOperator::Sort(sort) => {
177                self.bind_operator(&sort.input)?;
178                for key in &sort.keys {
179                    self.validate_expression(&key.expression)?;
180                }
181                Ok(())
182            }
183            LogicalOperator::CreateNode(create) => {
184                // CreateNode introduces a new variable
185                if let Some(ref input) = create.input {
186                    self.bind_operator(input)?;
187                }
188                self.context.add_variable(
189                    create.variable.clone(),
190                    VariableInfo {
191                        name: create.variable.clone(),
192                        data_type: LogicalType::Node,
193                        is_node: true,
194                        is_edge: false,
195                    },
196                );
197                // Validate property expressions
198                for (_, expr) in &create.properties {
199                    self.validate_expression(expr)?;
200                }
201                Ok(())
202            }
203            LogicalOperator::EdgeScan(scan) => {
204                if let Some(ref input) = scan.input {
205                    self.bind_operator(input)?;
206                }
207                self.context.add_variable(
208                    scan.variable.clone(),
209                    VariableInfo {
210                        name: scan.variable.clone(),
211                        data_type: LogicalType::Edge,
212                        is_node: false,
213                        is_edge: true,
214                    },
215                );
216                Ok(())
217            }
218            LogicalOperator::Distinct(distinct) => self.bind_operator(&distinct.input),
219            LogicalOperator::Join(join) => self.bind_join(join),
220            LogicalOperator::Aggregate(agg) => self.bind_aggregate(agg),
221            LogicalOperator::CreateEdge(create) => {
222                self.bind_operator(&create.input)?;
223                // Validate that source and target variables are defined
224                if !self.context.contains(&create.from_variable) {
225                    return Err(undefined_variable_error(
226                        &create.from_variable,
227                        &self.context,
228                        " (source in CREATE EDGE)",
229                    ));
230                }
231                if !self.context.contains(&create.to_variable) {
232                    return Err(undefined_variable_error(
233                        &create.to_variable,
234                        &self.context,
235                        " (target in CREATE EDGE)",
236                    ));
237                }
238                // Add edge variable if present
239                if let Some(ref var) = create.variable {
240                    self.context.add_variable(
241                        var.clone(),
242                        VariableInfo {
243                            name: var.clone(),
244                            data_type: LogicalType::Edge,
245                            is_node: false,
246                            is_edge: true,
247                        },
248                    );
249                }
250                // Validate property expressions
251                for (_, expr) in &create.properties {
252                    self.validate_expression(expr)?;
253                }
254                Ok(())
255            }
256            LogicalOperator::DeleteNode(delete) => {
257                self.bind_operator(&delete.input)?;
258                // Validate that the variable to delete is defined
259                if !self.context.contains(&delete.variable) {
260                    return Err(undefined_variable_error(
261                        &delete.variable,
262                        &self.context,
263                        " in DELETE",
264                    ));
265                }
266                Ok(())
267            }
268            LogicalOperator::DeleteEdge(delete) => {
269                self.bind_operator(&delete.input)?;
270                // Validate that the variable to delete is defined
271                if !self.context.contains(&delete.variable) {
272                    return Err(undefined_variable_error(
273                        &delete.variable,
274                        &self.context,
275                        " in DELETE",
276                    ));
277                }
278                Ok(())
279            }
280            LogicalOperator::SetProperty(set) => {
281                self.bind_operator(&set.input)?;
282                // Validate that the variable to update is defined
283                if !self.context.contains(&set.variable) {
284                    return Err(undefined_variable_error(
285                        &set.variable,
286                        &self.context,
287                        " in SET",
288                    ));
289                }
290                // Validate property value expressions
291                for (_, expr) in &set.properties {
292                    self.validate_expression(expr)?;
293                }
294                Ok(())
295            }
296            LogicalOperator::Empty => Ok(()),
297
298            LogicalOperator::Unwind(unwind) => {
299                // First bind the input
300                self.bind_operator(&unwind.input)?;
301                // Validate the expression being unwound
302                self.validate_expression(&unwind.expression)?;
303                // Add the new variable to the context
304                self.context.add_variable(
305                    unwind.variable.clone(),
306                    VariableInfo {
307                        name: unwind.variable.clone(),
308                        data_type: LogicalType::Any, // Unwound elements can be any type
309                        is_node: false,
310                        is_edge: false,
311                    },
312                );
313                Ok(())
314            }
315
316            // RDF/SPARQL operators
317            LogicalOperator::TripleScan(scan) => self.bind_triple_scan(scan),
318            LogicalOperator::Union(union) => {
319                for input in &union.inputs {
320                    self.bind_operator(input)?;
321                }
322                Ok(())
323            }
324            LogicalOperator::LeftJoin(lj) => {
325                self.bind_operator(&lj.left)?;
326                self.bind_operator(&lj.right)?;
327                if let Some(ref cond) = lj.condition {
328                    self.validate_expression(cond)?;
329                }
330                Ok(())
331            }
332            LogicalOperator::AntiJoin(aj) => {
333                self.bind_operator(&aj.left)?;
334                self.bind_operator(&aj.right)?;
335                Ok(())
336            }
337            LogicalOperator::Bind(bind) => {
338                self.bind_operator(&bind.input)?;
339                self.validate_expression(&bind.expression)?;
340                self.context.add_variable(
341                    bind.variable.clone(),
342                    VariableInfo {
343                        name: bind.variable.clone(),
344                        data_type: LogicalType::Any,
345                        is_node: false,
346                        is_edge: false,
347                    },
348                );
349                Ok(())
350            }
351            LogicalOperator::Merge(merge) => {
352                // First bind the input
353                self.bind_operator(&merge.input)?;
354                // Validate the match property expressions
355                for (_, expr) in &merge.match_properties {
356                    self.validate_expression(expr)?;
357                }
358                // Validate the ON CREATE property expressions
359                for (_, expr) in &merge.on_create {
360                    self.validate_expression(expr)?;
361                }
362                // Validate the ON MATCH property expressions
363                for (_, expr) in &merge.on_match {
364                    self.validate_expression(expr)?;
365                }
366                // MERGE introduces a new variable
367                self.context.add_variable(
368                    merge.variable.clone(),
369                    VariableInfo {
370                        name: merge.variable.clone(),
371                        data_type: LogicalType::Node,
372                        is_node: true,
373                        is_edge: false,
374                    },
375                );
376                Ok(())
377            }
378            LogicalOperator::AddLabel(add_label) => {
379                self.bind_operator(&add_label.input)?;
380                // Validate that the variable exists
381                if !self.context.contains(&add_label.variable) {
382                    return Err(undefined_variable_error(
383                        &add_label.variable,
384                        &self.context,
385                        " in SET labels",
386                    ));
387                }
388                Ok(())
389            }
390            LogicalOperator::RemoveLabel(remove_label) => {
391                self.bind_operator(&remove_label.input)?;
392                // Validate that the variable exists
393                if !self.context.contains(&remove_label.variable) {
394                    return Err(undefined_variable_error(
395                        &remove_label.variable,
396                        &self.context,
397                        " in REMOVE labels",
398                    ));
399                }
400                Ok(())
401            }
402            LogicalOperator::ShortestPath(sp) => {
403                // First bind the input
404                self.bind_operator(&sp.input)?;
405                // Validate that source and target variables are defined
406                if !self.context.contains(&sp.source_var) {
407                    return Err(undefined_variable_error(
408                        &sp.source_var,
409                        &self.context,
410                        " (source in shortestPath)",
411                    ));
412                }
413                if !self.context.contains(&sp.target_var) {
414                    return Err(undefined_variable_error(
415                        &sp.target_var,
416                        &self.context,
417                        " (target in shortestPath)",
418                    ));
419                }
420                // Add the path alias variable to the context
421                self.context.add_variable(
422                    sp.path_alias.clone(),
423                    VariableInfo {
424                        name: sp.path_alias.clone(),
425                        data_type: LogicalType::Any, // Path is a complex type
426                        is_node: false,
427                        is_edge: false,
428                    },
429                );
430                // Also add the path length variable for length(p) calls
431                let path_length_var = format!("_path_length_{}", sp.path_alias);
432                self.context.add_variable(
433                    path_length_var.clone(),
434                    VariableInfo {
435                        name: path_length_var,
436                        data_type: LogicalType::Int64,
437                        is_node: false,
438                        is_edge: false,
439                    },
440                );
441                Ok(())
442            }
443            // SPARQL Update operators - these don't require variable binding
444            LogicalOperator::InsertTriple(insert) => {
445                if let Some(ref input) = insert.input {
446                    self.bind_operator(input)?;
447                }
448                Ok(())
449            }
450            LogicalOperator::DeleteTriple(delete) => {
451                if let Some(ref input) = delete.input {
452                    self.bind_operator(input)?;
453                }
454                Ok(())
455            }
456            LogicalOperator::Modify(modify) => {
457                self.bind_operator(&modify.where_clause)?;
458                Ok(())
459            }
460            LogicalOperator::ClearGraph(_)
461            | LogicalOperator::CreateGraph(_)
462            | LogicalOperator::DropGraph(_)
463            | LogicalOperator::LoadGraph(_)
464            | LogicalOperator::CopyGraph(_)
465            | LogicalOperator::MoveGraph(_)
466            | LogicalOperator::AddGraph(_) => Ok(()),
467            LogicalOperator::VectorScan(scan) => {
468                // VectorScan introduces a variable for matched nodes
469                if let Some(ref input) = scan.input {
470                    self.bind_operator(input)?;
471                }
472                self.context.add_variable(
473                    scan.variable.clone(),
474                    VariableInfo {
475                        name: scan.variable.clone(),
476                        data_type: LogicalType::Node,
477                        is_node: true,
478                        is_edge: false,
479                    },
480                );
481                // Validate the query vector expression
482                self.validate_expression(&scan.query_vector)?;
483                Ok(())
484            }
485            LogicalOperator::VectorJoin(join) => {
486                // VectorJoin takes input from left side and produces right-side matches
487                self.bind_operator(&join.input)?;
488                // Add right variable for matched nodes
489                self.context.add_variable(
490                    join.right_variable.clone(),
491                    VariableInfo {
492                        name: join.right_variable.clone(),
493                        data_type: LogicalType::Node,
494                        is_node: true,
495                        is_edge: false,
496                    },
497                );
498                // Optionally add score variable
499                if let Some(ref score_var) = join.score_variable {
500                    self.context.add_variable(
501                        score_var.clone(),
502                        VariableInfo {
503                            name: score_var.clone(),
504                            data_type: LogicalType::Float64,
505                            is_node: false,
506                            is_edge: false,
507                        },
508                    );
509                }
510                // Validate the query vector expression
511                self.validate_expression(&join.query_vector)?;
512                Ok(())
513            }
514            // DDL operators don't need binding — they're handled before the binder
515            LogicalOperator::CreatePropertyGraph(_) => Ok(()),
516            // Procedure calls: register yielded columns as variables for downstream operators
517            LogicalOperator::CallProcedure(call) => {
518                if let Some(yields) = &call.yield_items {
519                    for item in yields {
520                        let var_name = item.alias.as_deref().unwrap_or(&item.field_name);
521                        self.context.add_variable(
522                            var_name.to_string(),
523                            VariableInfo {
524                                name: var_name.to_string(),
525                                data_type: LogicalType::Any,
526                                is_node: false,
527                                is_edge: false,
528                            },
529                        );
530                    }
531                }
532                Ok(())
533            }
534        }
535    }
536
537    /// Binds a triple scan operator (for RDF/SPARQL).
538    fn bind_triple_scan(&mut self, scan: &TripleScanOp) -> Result<()> {
539        use crate::query::plan::TripleComponent;
540
541        // First bind the input if present
542        if let Some(ref input) = scan.input {
543            self.bind_operator(input)?;
544        }
545
546        // Add variables for subject, predicate, object
547        if let TripleComponent::Variable(name) = &scan.subject
548            && !self.context.contains(name)
549        {
550            self.context.add_variable(
551                name.clone(),
552                VariableInfo {
553                    name: name.clone(),
554                    data_type: LogicalType::Any, // RDF term
555                    is_node: false,
556                    is_edge: false,
557                },
558            );
559        }
560
561        if let TripleComponent::Variable(name) = &scan.predicate
562            && !self.context.contains(name)
563        {
564            self.context.add_variable(
565                name.clone(),
566                VariableInfo {
567                    name: name.clone(),
568                    data_type: LogicalType::Any, // IRI
569                    is_node: false,
570                    is_edge: false,
571                },
572            );
573        }
574
575        if let TripleComponent::Variable(name) = &scan.object
576            && !self.context.contains(name)
577        {
578            self.context.add_variable(
579                name.clone(),
580                VariableInfo {
581                    name: name.clone(),
582                    data_type: LogicalType::Any, // RDF term
583                    is_node: false,
584                    is_edge: false,
585                },
586            );
587        }
588
589        if let Some(TripleComponent::Variable(name)) = &scan.graph
590            && !self.context.contains(name)
591        {
592            self.context.add_variable(
593                name.clone(),
594                VariableInfo {
595                    name: name.clone(),
596                    data_type: LogicalType::Any, // IRI
597                    is_node: false,
598                    is_edge: false,
599                },
600            );
601        }
602
603        Ok(())
604    }
605
606    /// Binds a node scan operator.
607    fn bind_node_scan(&mut self, scan: &NodeScanOp) -> Result<()> {
608        // First bind the input if present
609        if let Some(ref input) = scan.input {
610            self.bind_operator(input)?;
611        }
612
613        // Add the scanned variable to scope
614        self.context.add_variable(
615            scan.variable.clone(),
616            VariableInfo {
617                name: scan.variable.clone(),
618                data_type: LogicalType::Node,
619                is_node: true,
620                is_edge: false,
621            },
622        );
623
624        Ok(())
625    }
626
627    /// Binds an expand operator.
628    fn bind_expand(&mut self, expand: &ExpandOp) -> Result<()> {
629        // First bind the input
630        self.bind_operator(&expand.input)?;
631
632        // Validate that the source variable is defined
633        if !self.context.contains(&expand.from_variable) {
634            return Err(undefined_variable_error(
635                &expand.from_variable,
636                &self.context,
637                " in EXPAND",
638            ));
639        }
640
641        // Validate that the source is a node
642        if let Some(info) = self.context.get(&expand.from_variable)
643            && !info.is_node
644        {
645            return Err(binding_error(format!(
646                "Variable '{}' is not a node, cannot expand from it",
647                expand.from_variable
648            )));
649        }
650
651        // Add edge variable if present
652        if let Some(ref edge_var) = expand.edge_variable {
653            self.context.add_variable(
654                edge_var.clone(),
655                VariableInfo {
656                    name: edge_var.clone(),
657                    data_type: LogicalType::Edge,
658                    is_node: false,
659                    is_edge: true,
660                },
661            );
662        }
663
664        // Add target variable
665        self.context.add_variable(
666            expand.to_variable.clone(),
667            VariableInfo {
668                name: expand.to_variable.clone(),
669                data_type: LogicalType::Node,
670                is_node: true,
671                is_edge: false,
672            },
673        );
674
675        // Add path variables for variable-length paths
676        if let Some(ref path_alias) = expand.path_alias {
677            // length(p) → _path_length_p
678            let path_length_var = format!("_path_length_{}", path_alias);
679            self.context.add_variable(
680                path_length_var.clone(),
681                VariableInfo {
682                    name: path_length_var,
683                    data_type: LogicalType::Int64,
684                    is_node: false,
685                    is_edge: false,
686                },
687            );
688            // nodes(p) → _path_nodes_p
689            let path_nodes_var = format!("_path_nodes_{}", path_alias);
690            self.context.add_variable(
691                path_nodes_var.clone(),
692                VariableInfo {
693                    name: path_nodes_var,
694                    data_type: LogicalType::Any,
695                    is_node: false,
696                    is_edge: false,
697                },
698            );
699            // edges(p) → _path_edges_p
700            let path_edges_var = format!("_path_edges_{}", path_alias);
701            self.context.add_variable(
702                path_edges_var.clone(),
703                VariableInfo {
704                    name: path_edges_var,
705                    data_type: LogicalType::Any,
706                    is_node: false,
707                    is_edge: false,
708                },
709            );
710        }
711
712        Ok(())
713    }
714
715    /// Binds a filter operator.
716    fn bind_filter(&mut self, filter: &FilterOp) -> Result<()> {
717        // First bind the input
718        self.bind_operator(&filter.input)?;
719
720        // Validate the predicate expression
721        self.validate_expression(&filter.predicate)?;
722
723        Ok(())
724    }
725
726    /// Binds a return operator.
727    fn bind_return(&mut self, ret: &ReturnOp) -> Result<()> {
728        // First bind the input
729        self.bind_operator(&ret.input)?;
730
731        // Validate all return expressions
732        for item in &ret.items {
733            self.validate_return_item(item)?;
734        }
735
736        Ok(())
737    }
738
739    /// Validates a return item.
740    fn validate_return_item(&self, item: &ReturnItem) -> Result<()> {
741        self.validate_expression(&item.expression)
742    }
743
744    /// Validates that an expression only references defined variables.
745    fn validate_expression(&self, expr: &LogicalExpression) -> Result<()> {
746        match expr {
747            LogicalExpression::Variable(name) => {
748                if !self.context.contains(name) && !name.starts_with("_anon_") {
749                    return Err(undefined_variable_error(name, &self.context, ""));
750                }
751                Ok(())
752            }
753            LogicalExpression::Property { variable, .. } => {
754                if !self.context.contains(variable) && !variable.starts_with("_anon_") {
755                    return Err(undefined_variable_error(
756                        variable,
757                        &self.context,
758                        " in property access",
759                    ));
760                }
761                Ok(())
762            }
763            LogicalExpression::Literal(_) => Ok(()),
764            LogicalExpression::Binary { left, right, .. } => {
765                self.validate_expression(left)?;
766                self.validate_expression(right)
767            }
768            LogicalExpression::Unary { operand, .. } => self.validate_expression(operand),
769            LogicalExpression::FunctionCall { args, .. } => {
770                for arg in args {
771                    self.validate_expression(arg)?;
772                }
773                Ok(())
774            }
775            LogicalExpression::List(items) => {
776                for item in items {
777                    self.validate_expression(item)?;
778                }
779                Ok(())
780            }
781            LogicalExpression::Map(pairs) => {
782                for (_, value) in pairs {
783                    self.validate_expression(value)?;
784                }
785                Ok(())
786            }
787            LogicalExpression::IndexAccess { base, index } => {
788                self.validate_expression(base)?;
789                self.validate_expression(index)
790            }
791            LogicalExpression::SliceAccess { base, start, end } => {
792                self.validate_expression(base)?;
793                if let Some(s) = start {
794                    self.validate_expression(s)?;
795                }
796                if let Some(e) = end {
797                    self.validate_expression(e)?;
798                }
799                Ok(())
800            }
801            LogicalExpression::Case {
802                operand,
803                when_clauses,
804                else_clause,
805            } => {
806                if let Some(op) = operand {
807                    self.validate_expression(op)?;
808                }
809                for (cond, result) in when_clauses {
810                    self.validate_expression(cond)?;
811                    self.validate_expression(result)?;
812                }
813                if let Some(else_expr) = else_clause {
814                    self.validate_expression(else_expr)?;
815                }
816                Ok(())
817            }
818            // Parameter references are validated externally
819            LogicalExpression::Parameter(_) => Ok(()),
820            // labels(n), type(e), id(n) need the variable to be defined
821            LogicalExpression::Labels(var)
822            | LogicalExpression::Type(var)
823            | LogicalExpression::Id(var) => {
824                if !self.context.contains(var) && !var.starts_with("_anon_") {
825                    return Err(undefined_variable_error(var, &self.context, " in function"));
826                }
827                Ok(())
828            }
829            LogicalExpression::ListComprehension {
830                list_expr,
831                filter_expr,
832                map_expr,
833                ..
834            } => {
835                // Validate the list expression
836                self.validate_expression(list_expr)?;
837                // Note: filter_expr and map_expr use the comprehension variable
838                // which is defined within the comprehension scope, so we don't
839                // need to validate it against the outer context
840                if let Some(filter) = filter_expr {
841                    self.validate_expression(filter)?;
842                }
843                self.validate_expression(map_expr)?;
844                Ok(())
845            }
846            LogicalExpression::ExistsSubquery(subquery)
847            | LogicalExpression::CountSubquery(subquery) => {
848                // Subqueries have their own binding context
849                // For now, just validate the structure exists
850                let _ = subquery; // Would need recursive binding
851                Ok(())
852            }
853        }
854    }
855
856    /// Infers the type of an expression for use in WITH clause aliasing.
857    fn infer_expression_type(&self, expr: &LogicalExpression) -> LogicalType {
858        match expr {
859            LogicalExpression::Variable(name) => {
860                // Look up the variable type from context
861                self.context
862                    .get(name)
863                    .map_or(LogicalType::Any, |info| info.data_type.clone())
864            }
865            LogicalExpression::Property { .. } => LogicalType::Any, // Properties can be any type
866            LogicalExpression::Literal(value) => {
867                // Infer type from literal value
868                use grafeo_common::types::Value;
869                match value {
870                    Value::Bool(_) => LogicalType::Bool,
871                    Value::Int64(_) => LogicalType::Int64,
872                    Value::Float64(_) => LogicalType::Float64,
873                    Value::String(_) => LogicalType::String,
874                    Value::List(_) => LogicalType::Any, // Complex type
875                    Value::Map(_) => LogicalType::Any,  // Complex type
876                    Value::Null => LogicalType::Any,
877                    _ => LogicalType::Any,
878                }
879            }
880            LogicalExpression::Binary { .. } => LogicalType::Any, // Could be bool or numeric
881            LogicalExpression::Unary { .. } => LogicalType::Any,
882            LogicalExpression::FunctionCall { name, .. } => {
883                // Infer based on function name
884                match name.to_lowercase().as_str() {
885                    "count" | "sum" | "id" => LogicalType::Int64,
886                    "avg" => LogicalType::Float64,
887                    "type" => LogicalType::String,
888                    // List-returning functions use Any since we don't track element type
889                    "labels" | "collect" => LogicalType::Any,
890                    _ => LogicalType::Any,
891                }
892            }
893            LogicalExpression::List(_) => LogicalType::Any, // Complex type
894            LogicalExpression::Map(_) => LogicalType::Any,  // Complex type
895            _ => LogicalType::Any,
896        }
897    }
898
899    /// Binds a join operator.
900    fn bind_join(&mut self, join: &crate::query::plan::JoinOp) -> Result<()> {
901        // Bind both sides of the join
902        self.bind_operator(&join.left)?;
903        self.bind_operator(&join.right)?;
904
905        // Validate join conditions
906        for condition in &join.conditions {
907            self.validate_expression(&condition.left)?;
908            self.validate_expression(&condition.right)?;
909        }
910
911        Ok(())
912    }
913
914    /// Binds an aggregate operator.
915    fn bind_aggregate(&mut self, agg: &crate::query::plan::AggregateOp) -> Result<()> {
916        // Bind the input first
917        self.bind_operator(&agg.input)?;
918
919        // Validate group by expressions
920        for expr in &agg.group_by {
921            self.validate_expression(expr)?;
922        }
923
924        // Validate aggregate expressions
925        for agg_expr in &agg.aggregates {
926            if let Some(ref expr) = agg_expr.expression {
927                self.validate_expression(expr)?;
928            }
929            // Add the alias as a new variable if present
930            if let Some(ref alias) = agg_expr.alias {
931                self.context.add_variable(
932                    alias.clone(),
933                    VariableInfo {
934                        name: alias.clone(),
935                        data_type: LogicalType::Any,
936                        is_node: false,
937                        is_edge: false,
938                    },
939                );
940            }
941        }
942
943        Ok(())
944    }
945}
946
947impl Default for Binder {
948    fn default() -> Self {
949        Self::new()
950    }
951}
952
953#[cfg(test)]
954mod tests {
955    use super::*;
956    use crate::query::plan::{BinaryOp, FilterOp};
957
958    #[test]
959    fn test_bind_simple_scan() {
960        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
961            items: vec![ReturnItem {
962                expression: LogicalExpression::Variable("n".to_string()),
963                alias: None,
964            }],
965            distinct: false,
966            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
967                variable: "n".to_string(),
968                label: Some("Person".to_string()),
969                input: None,
970            })),
971        }));
972
973        let mut binder = Binder::new();
974        let result = binder.bind(&plan);
975
976        assert!(result.is_ok());
977        let ctx = result.unwrap();
978        assert!(ctx.contains("n"));
979        assert!(ctx.get("n").unwrap().is_node);
980    }
981
982    #[test]
983    fn test_bind_undefined_variable() {
984        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
985            items: vec![ReturnItem {
986                expression: LogicalExpression::Variable("undefined".to_string()),
987                alias: None,
988            }],
989            distinct: false,
990            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
991                variable: "n".to_string(),
992                label: None,
993                input: None,
994            })),
995        }));
996
997        let mut binder = Binder::new();
998        let result = binder.bind(&plan);
999
1000        assert!(result.is_err());
1001        let err = result.unwrap_err();
1002        assert!(err.to_string().contains("Undefined variable"));
1003    }
1004
1005    #[test]
1006    fn test_bind_property_access() {
1007        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1008            items: vec![ReturnItem {
1009                expression: LogicalExpression::Property {
1010                    variable: "n".to_string(),
1011                    property: "name".to_string(),
1012                },
1013                alias: None,
1014            }],
1015            distinct: false,
1016            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1017                variable: "n".to_string(),
1018                label: Some("Person".to_string()),
1019                input: None,
1020            })),
1021        }));
1022
1023        let mut binder = Binder::new();
1024        let result = binder.bind(&plan);
1025
1026        assert!(result.is_ok());
1027    }
1028
1029    #[test]
1030    fn test_bind_filter_with_undefined_variable() {
1031        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1032            items: vec![ReturnItem {
1033                expression: LogicalExpression::Variable("n".to_string()),
1034                alias: None,
1035            }],
1036            distinct: false,
1037            input: Box::new(LogicalOperator::Filter(FilterOp {
1038                predicate: LogicalExpression::Binary {
1039                    left: Box::new(LogicalExpression::Property {
1040                        variable: "m".to_string(), // undefined!
1041                        property: "age".to_string(),
1042                    }),
1043                    op: BinaryOp::Gt,
1044                    right: Box::new(LogicalExpression::Literal(
1045                        grafeo_common::types::Value::Int64(30),
1046                    )),
1047                },
1048                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1049                    variable: "n".to_string(),
1050                    label: None,
1051                    input: None,
1052                })),
1053            })),
1054        }));
1055
1056        let mut binder = Binder::new();
1057        let result = binder.bind(&plan);
1058
1059        assert!(result.is_err());
1060        let err = result.unwrap_err();
1061        assert!(err.to_string().contains("Undefined variable 'm'"));
1062    }
1063
1064    #[test]
1065    fn test_bind_expand() {
1066        use crate::query::plan::{ExpandDirection, ExpandOp};
1067
1068        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1069            items: vec![
1070                ReturnItem {
1071                    expression: LogicalExpression::Variable("a".to_string()),
1072                    alias: None,
1073                },
1074                ReturnItem {
1075                    expression: LogicalExpression::Variable("b".to_string()),
1076                    alias: None,
1077                },
1078            ],
1079            distinct: false,
1080            input: Box::new(LogicalOperator::Expand(ExpandOp {
1081                from_variable: "a".to_string(),
1082                to_variable: "b".to_string(),
1083                edge_variable: Some("e".to_string()),
1084                direction: ExpandDirection::Outgoing,
1085                edge_type: Some("KNOWS".to_string()),
1086                min_hops: 1,
1087                max_hops: Some(1),
1088                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1089                    variable: "a".to_string(),
1090                    label: Some("Person".to_string()),
1091                    input: None,
1092                })),
1093                path_alias: None,
1094            })),
1095        }));
1096
1097        let mut binder = Binder::new();
1098        let result = binder.bind(&plan);
1099
1100        assert!(result.is_ok());
1101        let ctx = result.unwrap();
1102        assert!(ctx.contains("a"));
1103        assert!(ctx.contains("b"));
1104        assert!(ctx.contains("e"));
1105        assert!(ctx.get("a").unwrap().is_node);
1106        assert!(ctx.get("b").unwrap().is_node);
1107        assert!(ctx.get("e").unwrap().is_edge);
1108    }
1109
1110    #[test]
1111    fn test_bind_expand_from_undefined_variable() {
1112        // Tests that expanding from an undefined variable produces a clear error
1113        use crate::query::plan::{ExpandDirection, ExpandOp};
1114
1115        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1116            items: vec![ReturnItem {
1117                expression: LogicalExpression::Variable("b".to_string()),
1118                alias: None,
1119            }],
1120            distinct: false,
1121            input: Box::new(LogicalOperator::Expand(ExpandOp {
1122                from_variable: "undefined".to_string(), // not defined!
1123                to_variable: "b".to_string(),
1124                edge_variable: None,
1125                direction: ExpandDirection::Outgoing,
1126                edge_type: None,
1127                min_hops: 1,
1128                max_hops: Some(1),
1129                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1130                    variable: "a".to_string(),
1131                    label: None,
1132                    input: None,
1133                })),
1134                path_alias: None,
1135            })),
1136        }));
1137
1138        let mut binder = Binder::new();
1139        let result = binder.bind(&plan);
1140
1141        assert!(result.is_err());
1142        let err = result.unwrap_err();
1143        assert!(
1144            err.to_string().contains("Undefined variable 'undefined'"),
1145            "Expected error about undefined variable, got: {}",
1146            err
1147        );
1148    }
1149
1150    #[test]
1151    fn test_bind_return_with_aggregate_and_non_aggregate() {
1152        // Tests binding of aggregate functions alongside regular expressions
1153        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1154            items: vec![
1155                ReturnItem {
1156                    expression: LogicalExpression::FunctionCall {
1157                        name: "count".to_string(),
1158                        args: vec![LogicalExpression::Variable("n".to_string())],
1159                        distinct: false,
1160                    },
1161                    alias: Some("cnt".to_string()),
1162                },
1163                ReturnItem {
1164                    expression: LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1165                    alias: Some("one".to_string()),
1166                },
1167            ],
1168            distinct: false,
1169            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1170                variable: "n".to_string(),
1171                label: Some("Person".to_string()),
1172                input: None,
1173            })),
1174        }));
1175
1176        let mut binder = Binder::new();
1177        let result = binder.bind(&plan);
1178
1179        // This should succeed - count(n) with literal is valid
1180        assert!(result.is_ok());
1181    }
1182
1183    #[test]
1184    fn test_bind_nested_property_access() {
1185        // Tests that nested property access on the same variable works
1186        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1187            items: vec![
1188                ReturnItem {
1189                    expression: LogicalExpression::Property {
1190                        variable: "n".to_string(),
1191                        property: "name".to_string(),
1192                    },
1193                    alias: None,
1194                },
1195                ReturnItem {
1196                    expression: LogicalExpression::Property {
1197                        variable: "n".to_string(),
1198                        property: "age".to_string(),
1199                    },
1200                    alias: None,
1201                },
1202            ],
1203            distinct: false,
1204            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1205                variable: "n".to_string(),
1206                label: Some("Person".to_string()),
1207                input: None,
1208            })),
1209        }));
1210
1211        let mut binder = Binder::new();
1212        let result = binder.bind(&plan);
1213
1214        assert!(result.is_ok());
1215    }
1216
1217    #[test]
1218    fn test_bind_binary_expression_with_undefined() {
1219        // Tests that binary expressions with undefined variables produce errors
1220        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1221            items: vec![ReturnItem {
1222                expression: LogicalExpression::Binary {
1223                    left: Box::new(LogicalExpression::Property {
1224                        variable: "n".to_string(),
1225                        property: "age".to_string(),
1226                    }),
1227                    op: BinaryOp::Add,
1228                    right: Box::new(LogicalExpression::Property {
1229                        variable: "m".to_string(), // undefined!
1230                        property: "age".to_string(),
1231                    }),
1232                },
1233                alias: Some("total".to_string()),
1234            }],
1235            distinct: false,
1236            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1237                variable: "n".to_string(),
1238                label: None,
1239                input: None,
1240            })),
1241        }));
1242
1243        let mut binder = Binder::new();
1244        let result = binder.bind(&plan);
1245
1246        assert!(result.is_err());
1247        assert!(
1248            result
1249                .unwrap_err()
1250                .to_string()
1251                .contains("Undefined variable 'm'")
1252        );
1253    }
1254
1255    #[test]
1256    fn test_bind_duplicate_variable_definition() {
1257        // Tests behavior when the same variable is defined twice (via two NodeScans)
1258        // This is typically not allowed or the second shadows the first
1259        use crate::query::plan::{JoinOp, JoinType};
1260
1261        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1262            items: vec![ReturnItem {
1263                expression: LogicalExpression::Variable("n".to_string()),
1264                alias: None,
1265            }],
1266            distinct: false,
1267            input: Box::new(LogicalOperator::Join(JoinOp {
1268                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1269                    variable: "n".to_string(),
1270                    label: Some("A".to_string()),
1271                    input: None,
1272                })),
1273                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1274                    variable: "m".to_string(), // different variable is fine
1275                    label: Some("B".to_string()),
1276                    input: None,
1277                })),
1278                join_type: JoinType::Inner,
1279                conditions: vec![],
1280            })),
1281        }));
1282
1283        let mut binder = Binder::new();
1284        let result = binder.bind(&plan);
1285
1286        // Join with different variables should work
1287        assert!(result.is_ok());
1288        let ctx = result.unwrap();
1289        assert!(ctx.contains("n"));
1290        assert!(ctx.contains("m"));
1291    }
1292
1293    #[test]
1294    fn test_bind_function_with_wrong_arity() {
1295        // Tests that functions with wrong number of arguments are handled
1296        // (behavior depends on whether binder validates arity)
1297        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1298            items: vec![ReturnItem {
1299                expression: LogicalExpression::FunctionCall {
1300                    name: "count".to_string(),
1301                    args: vec![], // count() needs an argument
1302                    distinct: false,
1303                },
1304                alias: None,
1305            }],
1306            distinct: false,
1307            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1308                variable: "n".to_string(),
1309                label: None,
1310                input: None,
1311            })),
1312        }));
1313
1314        let mut binder = Binder::new();
1315        let result = binder.bind(&plan);
1316
1317        // The binder may or may not catch this - if it passes, execution will fail
1318        // This test documents current behavior
1319        // If binding fails, that's fine; if it passes, execution will handle it
1320        let _ = result; // We're just testing it doesn't panic
1321    }
1322
1323    // --- Mutation operator validation ---
1324
1325    #[test]
1326    fn test_create_edge_rejects_undefined_source() {
1327        use crate::query::plan::CreateEdgeOp;
1328
1329        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1330            variable: Some("e".to_string()),
1331            from_variable: "ghost".to_string(), // not defined!
1332            to_variable: "b".to_string(),
1333            edge_type: "KNOWS".to_string(),
1334            properties: vec![],
1335            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1336                variable: "b".to_string(),
1337                label: None,
1338                input: None,
1339            })),
1340        }));
1341
1342        let mut binder = Binder::new();
1343        let err = binder.bind(&plan).unwrap_err();
1344        assert!(
1345            err.to_string().contains("Undefined variable 'ghost'"),
1346            "Should reject undefined source variable, got: {err}"
1347        );
1348    }
1349
1350    #[test]
1351    fn test_create_edge_rejects_undefined_target() {
1352        use crate::query::plan::CreateEdgeOp;
1353
1354        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1355            variable: None,
1356            from_variable: "a".to_string(),
1357            to_variable: "missing".to_string(), // not defined!
1358            edge_type: "KNOWS".to_string(),
1359            properties: vec![],
1360            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1361                variable: "a".to_string(),
1362                label: None,
1363                input: None,
1364            })),
1365        }));
1366
1367        let mut binder = Binder::new();
1368        let err = binder.bind(&plan).unwrap_err();
1369        assert!(
1370            err.to_string().contains("Undefined variable 'missing'"),
1371            "Should reject undefined target variable, got: {err}"
1372        );
1373    }
1374
1375    #[test]
1376    fn test_create_edge_validates_property_expressions() {
1377        use crate::query::plan::CreateEdgeOp;
1378
1379        // Source and target defined, but property references undefined variable
1380        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1381            variable: Some("e".to_string()),
1382            from_variable: "a".to_string(),
1383            to_variable: "b".to_string(),
1384            edge_type: "KNOWS".to_string(),
1385            properties: vec![(
1386                "since".to_string(),
1387                LogicalExpression::Property {
1388                    variable: "x".to_string(), // undefined!
1389                    property: "year".to_string(),
1390                },
1391            )],
1392            input: Box::new(LogicalOperator::Join(crate::query::plan::JoinOp {
1393                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1394                    variable: "a".to_string(),
1395                    label: None,
1396                    input: None,
1397                })),
1398                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1399                    variable: "b".to_string(),
1400                    label: None,
1401                    input: None,
1402                })),
1403                join_type: crate::query::plan::JoinType::Inner,
1404                conditions: vec![],
1405            })),
1406        }));
1407
1408        let mut binder = Binder::new();
1409        let err = binder.bind(&plan).unwrap_err();
1410        assert!(err.to_string().contains("Undefined variable 'x'"));
1411    }
1412
1413    #[test]
1414    fn test_set_property_rejects_undefined_variable() {
1415        use crate::query::plan::SetPropertyOp;
1416
1417        let plan = LogicalPlan::new(LogicalOperator::SetProperty(SetPropertyOp {
1418            variable: "ghost".to_string(),
1419            properties: vec![(
1420                "name".to_string(),
1421                LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1422            )],
1423            replace: false,
1424            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1425                variable: "n".to_string(),
1426                label: None,
1427                input: None,
1428            })),
1429        }));
1430
1431        let mut binder = Binder::new();
1432        let err = binder.bind(&plan).unwrap_err();
1433        assert!(
1434            err.to_string().contains("in SET"),
1435            "Error should indicate SET context, got: {err}"
1436        );
1437    }
1438
1439    #[test]
1440    fn test_delete_node_rejects_undefined_variable() {
1441        use crate::query::plan::DeleteNodeOp;
1442
1443        let plan = LogicalPlan::new(LogicalOperator::DeleteNode(DeleteNodeOp {
1444            variable: "phantom".to_string(),
1445            detach: false,
1446            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1447                variable: "n".to_string(),
1448                label: None,
1449                input: None,
1450            })),
1451        }));
1452
1453        let mut binder = Binder::new();
1454        let err = binder.bind(&plan).unwrap_err();
1455        assert!(err.to_string().contains("Undefined variable 'phantom'"));
1456    }
1457
1458    #[test]
1459    fn test_delete_edge_rejects_undefined_variable() {
1460        use crate::query::plan::DeleteEdgeOp;
1461
1462        let plan = LogicalPlan::new(LogicalOperator::DeleteEdge(DeleteEdgeOp {
1463            variable: "gone".to_string(),
1464            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1465                variable: "n".to_string(),
1466                label: None,
1467                input: None,
1468            })),
1469        }));
1470
1471        let mut binder = Binder::new();
1472        let err = binder.bind(&plan).unwrap_err();
1473        assert!(err.to_string().contains("Undefined variable 'gone'"));
1474    }
1475
1476    // --- WITH/Project clause ---
1477
1478    #[test]
1479    fn test_project_alias_becomes_available_downstream() {
1480        use crate::query::plan::{ProjectOp, Projection};
1481
1482        // WITH n.name AS person_name RETURN person_name
1483        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1484            items: vec![ReturnItem {
1485                expression: LogicalExpression::Variable("person_name".to_string()),
1486                alias: None,
1487            }],
1488            distinct: false,
1489            input: Box::new(LogicalOperator::Project(ProjectOp {
1490                projections: vec![Projection {
1491                    expression: LogicalExpression::Property {
1492                        variable: "n".to_string(),
1493                        property: "name".to_string(),
1494                    },
1495                    alias: Some("person_name".to_string()),
1496                }],
1497                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1498                    variable: "n".to_string(),
1499                    label: None,
1500                    input: None,
1501                })),
1502            })),
1503        }));
1504
1505        let mut binder = Binder::new();
1506        let ctx = binder.bind(&plan).unwrap();
1507        assert!(
1508            ctx.contains("person_name"),
1509            "WITH alias should be available to RETURN"
1510        );
1511    }
1512
1513    #[test]
1514    fn test_project_rejects_undefined_expression() {
1515        use crate::query::plan::{ProjectOp, Projection};
1516
1517        let plan = LogicalPlan::new(LogicalOperator::Project(ProjectOp {
1518            projections: vec![Projection {
1519                expression: LogicalExpression::Variable("nope".to_string()),
1520                alias: Some("x".to_string()),
1521            }],
1522            input: Box::new(LogicalOperator::Empty),
1523        }));
1524
1525        let mut binder = Binder::new();
1526        let result = binder.bind(&plan);
1527        assert!(result.is_err(), "WITH on undefined variable should fail");
1528    }
1529
1530    // --- UNWIND ---
1531
1532    #[test]
1533    fn test_unwind_adds_element_variable() {
1534        use crate::query::plan::UnwindOp;
1535
1536        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1537            items: vec![ReturnItem {
1538                expression: LogicalExpression::Variable("item".to_string()),
1539                alias: None,
1540            }],
1541            distinct: false,
1542            input: Box::new(LogicalOperator::Unwind(UnwindOp {
1543                expression: LogicalExpression::List(vec![
1544                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1545                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(2)),
1546                ]),
1547                variable: "item".to_string(),
1548                input: Box::new(LogicalOperator::Empty),
1549            })),
1550        }));
1551
1552        let mut binder = Binder::new();
1553        let ctx = binder.bind(&plan).unwrap();
1554        assert!(ctx.contains("item"), "UNWIND variable should be in scope");
1555        let info = ctx.get("item").unwrap();
1556        assert!(
1557            !info.is_node && !info.is_edge,
1558            "UNWIND variable is not a graph element"
1559        );
1560    }
1561
1562    // --- MERGE ---
1563
1564    #[test]
1565    fn test_merge_adds_variable_and_validates_properties() {
1566        use crate::query::plan::MergeOp;
1567
1568        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1569            items: vec![ReturnItem {
1570                expression: LogicalExpression::Variable("m".to_string()),
1571                alias: None,
1572            }],
1573            distinct: false,
1574            input: Box::new(LogicalOperator::Merge(MergeOp {
1575                variable: "m".to_string(),
1576                labels: vec!["Person".to_string()],
1577                match_properties: vec![(
1578                    "name".to_string(),
1579                    LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1580                )],
1581                on_create: vec![(
1582                    "created".to_string(),
1583                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1584                )],
1585                on_match: vec![(
1586                    "updated".to_string(),
1587                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1588                )],
1589                input: Box::new(LogicalOperator::Empty),
1590            })),
1591        }));
1592
1593        let mut binder = Binder::new();
1594        let ctx = binder.bind(&plan).unwrap();
1595        assert!(ctx.contains("m"));
1596        assert!(
1597            ctx.get("m").unwrap().is_node,
1598            "MERGE variable should be a node"
1599        );
1600    }
1601
1602    #[test]
1603    fn test_merge_rejects_undefined_in_on_create() {
1604        use crate::query::plan::MergeOp;
1605
1606        let plan = LogicalPlan::new(LogicalOperator::Merge(MergeOp {
1607            variable: "m".to_string(),
1608            labels: vec![],
1609            match_properties: vec![],
1610            on_create: vec![(
1611                "name".to_string(),
1612                LogicalExpression::Property {
1613                    variable: "other".to_string(), // undefined!
1614                    property: "name".to_string(),
1615                },
1616            )],
1617            on_match: vec![],
1618            input: Box::new(LogicalOperator::Empty),
1619        }));
1620
1621        let mut binder = Binder::new();
1622        let result = binder.bind(&plan);
1623        assert!(
1624            result.is_err(),
1625            "ON CREATE referencing undefined variable should fail"
1626        );
1627    }
1628
1629    // --- ShortestPath ---
1630
1631    #[test]
1632    fn test_shortest_path_rejects_undefined_source() {
1633        use crate::query::plan::{ExpandDirection, ShortestPathOp};
1634
1635        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1636            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1637                variable: "b".to_string(),
1638                label: None,
1639                input: None,
1640            })),
1641            source_var: "missing".to_string(), // not defined
1642            target_var: "b".to_string(),
1643            edge_type: None,
1644            direction: ExpandDirection::Both,
1645            path_alias: "p".to_string(),
1646            all_paths: false,
1647        }));
1648
1649        let mut binder = Binder::new();
1650        let err = binder.bind(&plan).unwrap_err();
1651        assert!(
1652            err.to_string().contains("source in shortestPath"),
1653            "Error should mention shortestPath source context, got: {err}"
1654        );
1655    }
1656
1657    #[test]
1658    fn test_shortest_path_adds_path_and_length_variables() {
1659        use crate::query::plan::{ExpandDirection, JoinOp, JoinType, ShortestPathOp};
1660
1661        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1662            input: Box::new(LogicalOperator::Join(JoinOp {
1663                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1664                    variable: "a".to_string(),
1665                    label: None,
1666                    input: None,
1667                })),
1668                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1669                    variable: "b".to_string(),
1670                    label: None,
1671                    input: None,
1672                })),
1673                join_type: JoinType::Cross,
1674                conditions: vec![],
1675            })),
1676            source_var: "a".to_string(),
1677            target_var: "b".to_string(),
1678            edge_type: Some("ROAD".to_string()),
1679            direction: ExpandDirection::Outgoing,
1680            path_alias: "p".to_string(),
1681            all_paths: false,
1682        }));
1683
1684        let mut binder = Binder::new();
1685        let ctx = binder.bind(&plan).unwrap();
1686        assert!(ctx.contains("p"), "Path alias should be bound");
1687        assert!(
1688            ctx.contains("_path_length_p"),
1689            "Path length variable should be auto-created"
1690        );
1691    }
1692
1693    // --- Expression validation edge cases ---
1694
1695    #[test]
1696    fn test_case_expression_validates_all_branches() {
1697        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1698            items: vec![ReturnItem {
1699                expression: LogicalExpression::Case {
1700                    operand: None,
1701                    when_clauses: vec![
1702                        (
1703                            LogicalExpression::Binary {
1704                                left: Box::new(LogicalExpression::Property {
1705                                    variable: "n".to_string(),
1706                                    property: "age".to_string(),
1707                                }),
1708                                op: BinaryOp::Gt,
1709                                right: Box::new(LogicalExpression::Literal(
1710                                    grafeo_common::types::Value::Int64(18),
1711                                )),
1712                            },
1713                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1714                                "adult".into(),
1715                            )),
1716                        ),
1717                        (
1718                            // This branch references undefined variable
1719                            LogicalExpression::Property {
1720                                variable: "ghost".to_string(),
1721                                property: "flag".to_string(),
1722                            },
1723                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1724                                "flagged".into(),
1725                            )),
1726                        ),
1727                    ],
1728                    else_clause: Some(Box::new(LogicalExpression::Literal(
1729                        grafeo_common::types::Value::String("other".into()),
1730                    ))),
1731                },
1732                alias: None,
1733            }],
1734            distinct: false,
1735            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1736                variable: "n".to_string(),
1737                label: None,
1738                input: None,
1739            })),
1740        }));
1741
1742        let mut binder = Binder::new();
1743        let err = binder.bind(&plan).unwrap_err();
1744        assert!(
1745            err.to_string().contains("ghost"),
1746            "CASE should validate all when-clause conditions"
1747        );
1748    }
1749
1750    #[test]
1751    fn test_case_expression_validates_else_clause() {
1752        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1753            items: vec![ReturnItem {
1754                expression: LogicalExpression::Case {
1755                    operand: None,
1756                    when_clauses: vec![(
1757                        LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1758                        LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1759                    )],
1760                    else_clause: Some(Box::new(LogicalExpression::Property {
1761                        variable: "missing".to_string(),
1762                        property: "x".to_string(),
1763                    })),
1764                },
1765                alias: None,
1766            }],
1767            distinct: false,
1768            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1769                variable: "n".to_string(),
1770                label: None,
1771                input: None,
1772            })),
1773        }));
1774
1775        let mut binder = Binder::new();
1776        let err = binder.bind(&plan).unwrap_err();
1777        assert!(
1778            err.to_string().contains("missing"),
1779            "CASE ELSE should validate its expression too"
1780        );
1781    }
1782
1783    #[test]
1784    fn test_slice_access_validates_expressions() {
1785        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1786            items: vec![ReturnItem {
1787                expression: LogicalExpression::SliceAccess {
1788                    base: Box::new(LogicalExpression::Variable("n".to_string())),
1789                    start: Some(Box::new(LogicalExpression::Variable(
1790                        "undefined_start".to_string(),
1791                    ))),
1792                    end: None,
1793                },
1794                alias: None,
1795            }],
1796            distinct: false,
1797            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1798                variable: "n".to_string(),
1799                label: None,
1800                input: None,
1801            })),
1802        }));
1803
1804        let mut binder = Binder::new();
1805        let err = binder.bind(&plan).unwrap_err();
1806        assert!(err.to_string().contains("undefined_start"));
1807    }
1808
1809    #[test]
1810    fn test_list_comprehension_validates_list_source() {
1811        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1812            items: vec![ReturnItem {
1813                expression: LogicalExpression::ListComprehension {
1814                    variable: "x".to_string(),
1815                    list_expr: Box::new(LogicalExpression::Variable("not_defined".to_string())),
1816                    filter_expr: None,
1817                    map_expr: Box::new(LogicalExpression::Variable("x".to_string())),
1818                },
1819                alias: None,
1820            }],
1821            distinct: false,
1822            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1823                variable: "n".to_string(),
1824                label: None,
1825                input: None,
1826            })),
1827        }));
1828
1829        let mut binder = Binder::new();
1830        let err = binder.bind(&plan).unwrap_err();
1831        assert!(
1832            err.to_string().contains("not_defined"),
1833            "List comprehension should validate source list expression"
1834        );
1835    }
1836
1837    #[test]
1838    fn test_labels_type_id_reject_undefined() {
1839        // labels(x) where x is not defined
1840        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1841            items: vec![ReturnItem {
1842                expression: LogicalExpression::Labels("x".to_string()),
1843                alias: None,
1844            }],
1845            distinct: false,
1846            input: Box::new(LogicalOperator::Empty),
1847        }));
1848
1849        let mut binder = Binder::new();
1850        assert!(
1851            binder.bind(&plan).is_err(),
1852            "labels(x) on undefined x should fail"
1853        );
1854
1855        // type(e) where e is not defined
1856        let plan2 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1857            items: vec![ReturnItem {
1858                expression: LogicalExpression::Type("e".to_string()),
1859                alias: None,
1860            }],
1861            distinct: false,
1862            input: Box::new(LogicalOperator::Empty),
1863        }));
1864
1865        let mut binder2 = Binder::new();
1866        assert!(
1867            binder2.bind(&plan2).is_err(),
1868            "type(e) on undefined e should fail"
1869        );
1870
1871        // id(n) where n is not defined
1872        let plan3 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1873            items: vec![ReturnItem {
1874                expression: LogicalExpression::Id("n".to_string()),
1875                alias: None,
1876            }],
1877            distinct: false,
1878            input: Box::new(LogicalOperator::Empty),
1879        }));
1880
1881        let mut binder3 = Binder::new();
1882        assert!(
1883            binder3.bind(&plan3).is_err(),
1884            "id(n) on undefined n should fail"
1885        );
1886    }
1887
1888    #[test]
1889    fn test_expand_rejects_non_node_source() {
1890        use crate::query::plan::{ExpandDirection, ExpandOp, UnwindOp};
1891
1892        // UNWIND [1,2] AS x  -- x is not a node
1893        // MATCH (x)-[:E]->(b)  -- should fail: x isn't a node
1894        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1895            items: vec![ReturnItem {
1896                expression: LogicalExpression::Variable("b".to_string()),
1897                alias: None,
1898            }],
1899            distinct: false,
1900            input: Box::new(LogicalOperator::Expand(ExpandOp {
1901                from_variable: "x".to_string(),
1902                to_variable: "b".to_string(),
1903                edge_variable: None,
1904                direction: ExpandDirection::Outgoing,
1905                edge_type: None,
1906                min_hops: 1,
1907                max_hops: Some(1),
1908                input: Box::new(LogicalOperator::Unwind(UnwindOp {
1909                    expression: LogicalExpression::List(vec![]),
1910                    variable: "x".to_string(),
1911                    input: Box::new(LogicalOperator::Empty),
1912                })),
1913                path_alias: None,
1914            })),
1915        }));
1916
1917        let mut binder = Binder::new();
1918        let err = binder.bind(&plan).unwrap_err();
1919        assert!(
1920            err.to_string().contains("not a node"),
1921            "Expanding from non-node should fail, got: {err}"
1922        );
1923    }
1924
1925    #[test]
1926    fn test_add_label_rejects_undefined_variable() {
1927        use crate::query::plan::AddLabelOp;
1928
1929        let plan = LogicalPlan::new(LogicalOperator::AddLabel(AddLabelOp {
1930            variable: "missing".to_string(),
1931            labels: vec!["Admin".to_string()],
1932            input: Box::new(LogicalOperator::Empty),
1933        }));
1934
1935        let mut binder = Binder::new();
1936        let err = binder.bind(&plan).unwrap_err();
1937        assert!(err.to_string().contains("SET labels"));
1938    }
1939
1940    #[test]
1941    fn test_remove_label_rejects_undefined_variable() {
1942        use crate::query::plan::RemoveLabelOp;
1943
1944        let plan = LogicalPlan::new(LogicalOperator::RemoveLabel(RemoveLabelOp {
1945            variable: "missing".to_string(),
1946            labels: vec!["Admin".to_string()],
1947            input: Box::new(LogicalOperator::Empty),
1948        }));
1949
1950        let mut binder = Binder::new();
1951        let err = binder.bind(&plan).unwrap_err();
1952        assert!(err.to_string().contains("REMOVE labels"));
1953    }
1954
1955    #[test]
1956    fn test_sort_validates_key_expressions() {
1957        use crate::query::plan::{SortKey, SortOp, SortOrder};
1958
1959        let plan = LogicalPlan::new(LogicalOperator::Sort(SortOp {
1960            keys: vec![SortKey {
1961                expression: LogicalExpression::Property {
1962                    variable: "missing".to_string(),
1963                    property: "name".to_string(),
1964                },
1965                order: SortOrder::Ascending,
1966            }],
1967            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1968                variable: "n".to_string(),
1969                label: None,
1970                input: None,
1971            })),
1972        }));
1973
1974        let mut binder = Binder::new();
1975        assert!(
1976            binder.bind(&plan).is_err(),
1977            "ORDER BY on undefined variable should fail"
1978        );
1979    }
1980
1981    #[test]
1982    fn test_create_node_adds_variable_before_property_validation() {
1983        use crate::query::plan::CreateNodeOp;
1984
1985        // CREATE (n:Person {friend: n.name}) - referencing the node being created
1986        // The variable should be available for property expressions (self-reference)
1987        let plan = LogicalPlan::new(LogicalOperator::CreateNode(CreateNodeOp {
1988            variable: "n".to_string(),
1989            labels: vec!["Person".to_string()],
1990            properties: vec![(
1991                "self_ref".to_string(),
1992                LogicalExpression::Property {
1993                    variable: "n".to_string(),
1994                    property: "name".to_string(),
1995                },
1996            )],
1997            input: None,
1998        }));
1999
2000        let mut binder = Binder::new();
2001        // This should succeed because CreateNode adds the variable before validating properties
2002        let ctx = binder.bind(&plan).unwrap();
2003        assert!(ctx.get("n").unwrap().is_node);
2004    }
2005
2006    #[test]
2007    fn test_undefined_variable_suggests_similar() {
2008        // 'person' is defined, user types 'persn' - should get a suggestion
2009        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2010            items: vec![ReturnItem {
2011                expression: LogicalExpression::Variable("persn".to_string()),
2012                alias: None,
2013            }],
2014            distinct: false,
2015            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2016                variable: "person".to_string(),
2017                label: None,
2018                input: None,
2019            })),
2020        }));
2021
2022        let mut binder = Binder::new();
2023        let err = binder.bind(&plan).unwrap_err();
2024        let msg = err.to_string();
2025        // The error should contain the variable name at minimum
2026        assert!(
2027            msg.contains("persn"),
2028            "Error should mention the undefined variable"
2029        );
2030    }
2031
2032    #[test]
2033    fn test_anon_variables_skip_validation() {
2034        // Variables starting with _anon_ are anonymous and should be silently accepted
2035        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2036            items: vec![ReturnItem {
2037                expression: LogicalExpression::Variable("_anon_42".to_string()),
2038                alias: None,
2039            }],
2040            distinct: false,
2041            input: Box::new(LogicalOperator::Empty),
2042        }));
2043
2044        let mut binder = Binder::new();
2045        let result = binder.bind(&plan);
2046        assert!(
2047            result.is_ok(),
2048            "Anonymous variables should bypass validation"
2049        );
2050    }
2051
2052    #[test]
2053    fn test_map_expression_validates_values() {
2054        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2055            items: vec![ReturnItem {
2056                expression: LogicalExpression::Map(vec![(
2057                    "key".to_string(),
2058                    LogicalExpression::Variable("undefined".to_string()),
2059                )]),
2060                alias: None,
2061            }],
2062            distinct: false,
2063            input: Box::new(LogicalOperator::Empty),
2064        }));
2065
2066        let mut binder = Binder::new();
2067        assert!(
2068            binder.bind(&plan).is_err(),
2069            "Map values should be validated"
2070        );
2071    }
2072
2073    #[test]
2074    fn test_vector_scan_validates_query_vector() {
2075        use crate::query::plan::VectorScanOp;
2076
2077        let plan = LogicalPlan::new(LogicalOperator::VectorScan(VectorScanOp {
2078            variable: "result".to_string(),
2079            index_name: None,
2080            property: "embedding".to_string(),
2081            label: Some("Doc".to_string()),
2082            query_vector: LogicalExpression::Variable("undefined_vec".to_string()),
2083            k: 10,
2084            metric: None,
2085            min_similarity: None,
2086            max_distance: None,
2087            input: None,
2088        }));
2089
2090        let mut binder = Binder::new();
2091        let err = binder.bind(&plan).unwrap_err();
2092        assert!(err.to_string().contains("undefined_vec"));
2093    }
2094}