Skip to main content

grafeo_engine/query/
binder.rs

1//! Semantic validation - catching errors before execution.
2//!
3//! The binder walks the logical plan and validates that everything makes sense:
4//! - Is that variable actually defined? (You can't use `RETURN x` if `x` wasn't matched)
5//! - Does that property access make sense? (Accessing `.age` on an integer fails)
6//! - Are types compatible? (Can't compare a string to an integer)
7//!
8//! Better to catch these errors early than waste time executing a broken query.
9
10use crate::query::plan::{
11    ExpandOp, FilterOp, LogicalExpression, LogicalOperator, LogicalPlan, NodeScanOp, ReturnItem,
12    ReturnOp, TripleScanOp,
13};
14use grafeo_common::types::LogicalType;
15use grafeo_common::utils::error::{Error, QueryError, QueryErrorKind, Result};
16use grafeo_common::utils::strings::{find_similar, format_suggestion};
17use std::collections::HashMap;
18
19/// Creates a semantic binding error.
20fn binding_error(message: impl Into<String>) -> Error {
21    Error::Query(QueryError::new(QueryErrorKind::Semantic, message))
22}
23
24/// Creates a semantic binding error with a hint.
25fn binding_error_with_hint(message: impl Into<String>, hint: impl Into<String>) -> Error {
26    Error::Query(QueryError::new(QueryErrorKind::Semantic, message).with_hint(hint))
27}
28
29/// Creates an "undefined variable" error with a suggestion if a similar variable exists.
30fn undefined_variable_error(variable: &str, context: &BindingContext, suffix: &str) -> Error {
31    let candidates: Vec<String> = context.variable_names().to_vec();
32    let candidates_ref: Vec<&str> = candidates.iter().map(|s| s.as_str()).collect();
33
34    if let Some(suggestion) = find_similar(variable, &candidates_ref) {
35        binding_error_with_hint(
36            format!("Undefined variable '{variable}'{suffix}"),
37            format_suggestion(suggestion),
38        )
39    } else {
40        binding_error(format!("Undefined variable '{variable}'{suffix}"))
41    }
42}
43
44/// Information about a bound variable.
45#[derive(Debug, Clone)]
46pub struct VariableInfo {
47    /// The name of the variable.
48    pub name: String,
49    /// The inferred type of the variable.
50    pub data_type: LogicalType,
51    /// Whether this variable is a node.
52    pub is_node: bool,
53    /// Whether this variable is an edge.
54    pub is_edge: bool,
55}
56
57/// Context containing all bound variables and their information.
58#[derive(Debug, Clone, Default)]
59pub struct BindingContext {
60    /// Map from variable name to its info.
61    variables: HashMap<String, VariableInfo>,
62    /// Variables in order of definition.
63    order: Vec<String>,
64}
65
66impl BindingContext {
67    /// Creates a new empty binding context.
68    #[must_use]
69    pub fn new() -> Self {
70        Self {
71            variables: HashMap::new(),
72            order: Vec::new(),
73        }
74    }
75
76    /// Adds a variable to the context.
77    pub fn add_variable(&mut self, name: String, info: VariableInfo) {
78        if !self.variables.contains_key(&name) {
79            self.order.push(name.clone());
80        }
81        self.variables.insert(name, info);
82    }
83
84    /// Looks up a variable by name.
85    #[must_use]
86    pub fn get(&self, name: &str) -> Option<&VariableInfo> {
87        self.variables.get(name)
88    }
89
90    /// Checks if a variable is defined.
91    #[must_use]
92    pub fn contains(&self, name: &str) -> bool {
93        self.variables.contains_key(name)
94    }
95
96    /// Returns all variable names in definition order.
97    #[must_use]
98    pub fn variable_names(&self) -> &[String] {
99        &self.order
100    }
101
102    /// Returns the number of bound variables.
103    #[must_use]
104    pub fn len(&self) -> usize {
105        self.variables.len()
106    }
107
108    /// Returns true if no variables are bound.
109    #[must_use]
110    pub fn is_empty(&self) -> bool {
111        self.variables.is_empty()
112    }
113}
114
115/// Semantic binder for query plans.
116///
117/// The binder walks the logical plan and:
118/// 1. Collects all variable definitions
119/// 2. Validates that all variable references are valid
120/// 3. Infers types where possible
121/// 4. Reports semantic errors
122pub struct Binder {
123    /// The current binding context.
124    context: BindingContext,
125}
126
127impl Binder {
128    /// Creates a new binder.
129    #[must_use]
130    pub fn new() -> Self {
131        Self {
132            context: BindingContext::new(),
133        }
134    }
135
136    /// Binds a logical plan, returning the binding context.
137    ///
138    /// # Errors
139    ///
140    /// Returns an error if semantic validation fails.
141    pub fn bind(&mut self, plan: &LogicalPlan) -> Result<BindingContext> {
142        self.bind_operator(&plan.root)?;
143        Ok(self.context.clone())
144    }
145
146    /// Binds a single logical operator.
147    fn bind_operator(&mut self, op: &LogicalOperator) -> Result<()> {
148        match op {
149            LogicalOperator::NodeScan(scan) => self.bind_node_scan(scan),
150            LogicalOperator::Expand(expand) => self.bind_expand(expand),
151            LogicalOperator::Filter(filter) => self.bind_filter(filter),
152            LogicalOperator::Return(ret) => self.bind_return(ret),
153            LogicalOperator::Project(project) => {
154                self.bind_operator(&project.input)?;
155                for projection in &project.projections {
156                    self.validate_expression(&projection.expression)?;
157                    // Add the projection alias to the context (for WITH clause support)
158                    if let Some(ref alias) = projection.alias {
159                        // Determine the type from the expression
160                        let data_type = self.infer_expression_type(&projection.expression);
161                        self.context.add_variable(
162                            alias.clone(),
163                            VariableInfo {
164                                name: alias.clone(),
165                                data_type,
166                                is_node: false,
167                                is_edge: false,
168                            },
169                        );
170                    }
171                }
172                Ok(())
173            }
174            LogicalOperator::Limit(limit) => self.bind_operator(&limit.input),
175            LogicalOperator::Skip(skip) => self.bind_operator(&skip.input),
176            LogicalOperator::Sort(sort) => {
177                self.bind_operator(&sort.input)?;
178                for key in &sort.keys {
179                    self.validate_expression(&key.expression)?;
180                }
181                Ok(())
182            }
183            LogicalOperator::CreateNode(create) => {
184                // CreateNode introduces a new variable
185                if let Some(ref input) = create.input {
186                    self.bind_operator(input)?;
187                }
188                self.context.add_variable(
189                    create.variable.clone(),
190                    VariableInfo {
191                        name: create.variable.clone(),
192                        data_type: LogicalType::Node,
193                        is_node: true,
194                        is_edge: false,
195                    },
196                );
197                // Validate property expressions
198                for (_, expr) in &create.properties {
199                    self.validate_expression(expr)?;
200                }
201                Ok(())
202            }
203            LogicalOperator::EdgeScan(scan) => {
204                if let Some(ref input) = scan.input {
205                    self.bind_operator(input)?;
206                }
207                self.context.add_variable(
208                    scan.variable.clone(),
209                    VariableInfo {
210                        name: scan.variable.clone(),
211                        data_type: LogicalType::Edge,
212                        is_node: false,
213                        is_edge: true,
214                    },
215                );
216                Ok(())
217            }
218            LogicalOperator::Distinct(distinct) => self.bind_operator(&distinct.input),
219            LogicalOperator::Join(join) => self.bind_join(join),
220            LogicalOperator::Aggregate(agg) => self.bind_aggregate(agg),
221            LogicalOperator::CreateEdge(create) => {
222                self.bind_operator(&create.input)?;
223                // Validate that source and target variables are defined
224                if !self.context.contains(&create.from_variable) {
225                    return Err(undefined_variable_error(
226                        &create.from_variable,
227                        &self.context,
228                        " (source in CREATE EDGE)",
229                    ));
230                }
231                if !self.context.contains(&create.to_variable) {
232                    return Err(undefined_variable_error(
233                        &create.to_variable,
234                        &self.context,
235                        " (target in CREATE EDGE)",
236                    ));
237                }
238                // Add edge variable if present
239                if let Some(ref var) = create.variable {
240                    self.context.add_variable(
241                        var.clone(),
242                        VariableInfo {
243                            name: var.clone(),
244                            data_type: LogicalType::Edge,
245                            is_node: false,
246                            is_edge: true,
247                        },
248                    );
249                }
250                // Validate property expressions
251                for (_, expr) in &create.properties {
252                    self.validate_expression(expr)?;
253                }
254                Ok(())
255            }
256            LogicalOperator::DeleteNode(delete) => {
257                self.bind_operator(&delete.input)?;
258                // Validate that the variable to delete is defined
259                if !self.context.contains(&delete.variable) {
260                    return Err(undefined_variable_error(
261                        &delete.variable,
262                        &self.context,
263                        " in DELETE",
264                    ));
265                }
266                Ok(())
267            }
268            LogicalOperator::DeleteEdge(delete) => {
269                self.bind_operator(&delete.input)?;
270                // Validate that the variable to delete is defined
271                if !self.context.contains(&delete.variable) {
272                    return Err(undefined_variable_error(
273                        &delete.variable,
274                        &self.context,
275                        " in DELETE",
276                    ));
277                }
278                Ok(())
279            }
280            LogicalOperator::SetProperty(set) => {
281                self.bind_operator(&set.input)?;
282                // Validate that the variable to update is defined
283                if !self.context.contains(&set.variable) {
284                    return Err(undefined_variable_error(
285                        &set.variable,
286                        &self.context,
287                        " in SET",
288                    ));
289                }
290                // Validate property value expressions
291                for (_, expr) in &set.properties {
292                    self.validate_expression(expr)?;
293                }
294                Ok(())
295            }
296            LogicalOperator::Empty => Ok(()),
297
298            LogicalOperator::Unwind(unwind) => {
299                // First bind the input
300                self.bind_operator(&unwind.input)?;
301                // Validate the expression being unwound
302                self.validate_expression(&unwind.expression)?;
303                // Add the new variable to the context
304                self.context.add_variable(
305                    unwind.variable.clone(),
306                    VariableInfo {
307                        name: unwind.variable.clone(),
308                        data_type: LogicalType::Any, // Unwound elements can be any type
309                        is_node: false,
310                        is_edge: false,
311                    },
312                );
313                // Add ORDINALITY variable if present (1-based index)
314                if let Some(ref ord_var) = unwind.ordinality_var {
315                    self.context.add_variable(
316                        ord_var.clone(),
317                        VariableInfo {
318                            name: ord_var.clone(),
319                            data_type: LogicalType::Int64,
320                            is_node: false,
321                            is_edge: false,
322                        },
323                    );
324                }
325                // Add OFFSET variable if present (0-based index)
326                if let Some(ref off_var) = unwind.offset_var {
327                    self.context.add_variable(
328                        off_var.clone(),
329                        VariableInfo {
330                            name: off_var.clone(),
331                            data_type: LogicalType::Int64,
332                            is_node: false,
333                            is_edge: false,
334                        },
335                    );
336                }
337                Ok(())
338            }
339
340            // RDF/SPARQL operators
341            LogicalOperator::TripleScan(scan) => self.bind_triple_scan(scan),
342            LogicalOperator::Union(union) => {
343                for input in &union.inputs {
344                    self.bind_operator(input)?;
345                }
346                Ok(())
347            }
348            LogicalOperator::LeftJoin(lj) => {
349                self.bind_operator(&lj.left)?;
350                self.bind_operator(&lj.right)?;
351                if let Some(ref cond) = lj.condition {
352                    self.validate_expression(cond)?;
353                }
354                Ok(())
355            }
356            LogicalOperator::AntiJoin(aj) => {
357                self.bind_operator(&aj.left)?;
358                self.bind_operator(&aj.right)?;
359                Ok(())
360            }
361            LogicalOperator::Bind(bind) => {
362                self.bind_operator(&bind.input)?;
363                self.validate_expression(&bind.expression)?;
364                self.context.add_variable(
365                    bind.variable.clone(),
366                    VariableInfo {
367                        name: bind.variable.clone(),
368                        data_type: LogicalType::Any,
369                        is_node: false,
370                        is_edge: false,
371                    },
372                );
373                Ok(())
374            }
375            LogicalOperator::Merge(merge) => {
376                // First bind the input
377                self.bind_operator(&merge.input)?;
378                // Validate the match property expressions
379                for (_, expr) in &merge.match_properties {
380                    self.validate_expression(expr)?;
381                }
382                // Validate the ON CREATE property expressions
383                for (_, expr) in &merge.on_create {
384                    self.validate_expression(expr)?;
385                }
386                // Validate the ON MATCH property expressions
387                for (_, expr) in &merge.on_match {
388                    self.validate_expression(expr)?;
389                }
390                // MERGE introduces a new variable
391                self.context.add_variable(
392                    merge.variable.clone(),
393                    VariableInfo {
394                        name: merge.variable.clone(),
395                        data_type: LogicalType::Node,
396                        is_node: true,
397                        is_edge: false,
398                    },
399                );
400                Ok(())
401            }
402            LogicalOperator::AddLabel(add_label) => {
403                self.bind_operator(&add_label.input)?;
404                // Validate that the variable exists
405                if !self.context.contains(&add_label.variable) {
406                    return Err(undefined_variable_error(
407                        &add_label.variable,
408                        &self.context,
409                        " in SET labels",
410                    ));
411                }
412                Ok(())
413            }
414            LogicalOperator::RemoveLabel(remove_label) => {
415                self.bind_operator(&remove_label.input)?;
416                // Validate that the variable exists
417                if !self.context.contains(&remove_label.variable) {
418                    return Err(undefined_variable_error(
419                        &remove_label.variable,
420                        &self.context,
421                        " in REMOVE labels",
422                    ));
423                }
424                Ok(())
425            }
426            LogicalOperator::ShortestPath(sp) => {
427                // First bind the input
428                self.bind_operator(&sp.input)?;
429                // Validate that source and target variables are defined
430                if !self.context.contains(&sp.source_var) {
431                    return Err(undefined_variable_error(
432                        &sp.source_var,
433                        &self.context,
434                        " (source in shortestPath)",
435                    ));
436                }
437                if !self.context.contains(&sp.target_var) {
438                    return Err(undefined_variable_error(
439                        &sp.target_var,
440                        &self.context,
441                        " (target in shortestPath)",
442                    ));
443                }
444                // Add the path alias variable to the context
445                self.context.add_variable(
446                    sp.path_alias.clone(),
447                    VariableInfo {
448                        name: sp.path_alias.clone(),
449                        data_type: LogicalType::Any, // Path is a complex type
450                        is_node: false,
451                        is_edge: false,
452                    },
453                );
454                // Also add the path length variable for length(p) calls
455                let path_length_var = format!("_path_length_{}", sp.path_alias);
456                self.context.add_variable(
457                    path_length_var.clone(),
458                    VariableInfo {
459                        name: path_length_var,
460                        data_type: LogicalType::Int64,
461                        is_node: false,
462                        is_edge: false,
463                    },
464                );
465                Ok(())
466            }
467            // SPARQL Update operators - these don't require variable binding
468            LogicalOperator::InsertTriple(insert) => {
469                if let Some(ref input) = insert.input {
470                    self.bind_operator(input)?;
471                }
472                Ok(())
473            }
474            LogicalOperator::DeleteTriple(delete) => {
475                if let Some(ref input) = delete.input {
476                    self.bind_operator(input)?;
477                }
478                Ok(())
479            }
480            LogicalOperator::Modify(modify) => {
481                self.bind_operator(&modify.where_clause)?;
482                Ok(())
483            }
484            LogicalOperator::ClearGraph(_)
485            | LogicalOperator::CreateGraph(_)
486            | LogicalOperator::DropGraph(_)
487            | LogicalOperator::LoadGraph(_)
488            | LogicalOperator::CopyGraph(_)
489            | LogicalOperator::MoveGraph(_)
490            | LogicalOperator::AddGraph(_) => Ok(()),
491            LogicalOperator::VectorScan(scan) => {
492                // VectorScan introduces a variable for matched nodes
493                if let Some(ref input) = scan.input {
494                    self.bind_operator(input)?;
495                }
496                self.context.add_variable(
497                    scan.variable.clone(),
498                    VariableInfo {
499                        name: scan.variable.clone(),
500                        data_type: LogicalType::Node,
501                        is_node: true,
502                        is_edge: false,
503                    },
504                );
505                // Validate the query vector expression
506                self.validate_expression(&scan.query_vector)?;
507                Ok(())
508            }
509            LogicalOperator::VectorJoin(join) => {
510                // VectorJoin takes input from left side and produces right-side matches
511                self.bind_operator(&join.input)?;
512                // Add right variable for matched nodes
513                self.context.add_variable(
514                    join.right_variable.clone(),
515                    VariableInfo {
516                        name: join.right_variable.clone(),
517                        data_type: LogicalType::Node,
518                        is_node: true,
519                        is_edge: false,
520                    },
521                );
522                // Optionally add score variable
523                if let Some(ref score_var) = join.score_variable {
524                    self.context.add_variable(
525                        score_var.clone(),
526                        VariableInfo {
527                            name: score_var.clone(),
528                            data_type: LogicalType::Float64,
529                            is_node: false,
530                            is_edge: false,
531                        },
532                    );
533                }
534                // Validate the query vector expression
535                self.validate_expression(&join.query_vector)?;
536                Ok(())
537            }
538            // DDL operators don't need binding — they're handled before the binder
539            LogicalOperator::CreatePropertyGraph(_) => Ok(()),
540            // Procedure calls: register yielded columns as variables for downstream operators
541            LogicalOperator::CallProcedure(call) => {
542                if let Some(yields) = &call.yield_items {
543                    for item in yields {
544                        let var_name = item.alias.as_deref().unwrap_or(&item.field_name);
545                        self.context.add_variable(
546                            var_name.to_string(),
547                            VariableInfo {
548                                name: var_name.to_string(),
549                                data_type: LogicalType::Any,
550                                is_node: false,
551                                is_edge: false,
552                            },
553                        );
554                    }
555                }
556                Ok(())
557            }
558        }
559    }
560
561    /// Binds a triple scan operator (for RDF/SPARQL).
562    fn bind_triple_scan(&mut self, scan: &TripleScanOp) -> Result<()> {
563        use crate::query::plan::TripleComponent;
564
565        // First bind the input if present
566        if let Some(ref input) = scan.input {
567            self.bind_operator(input)?;
568        }
569
570        // Add variables for subject, predicate, object
571        if let TripleComponent::Variable(name) = &scan.subject
572            && !self.context.contains(name)
573        {
574            self.context.add_variable(
575                name.clone(),
576                VariableInfo {
577                    name: name.clone(),
578                    data_type: LogicalType::Any, // RDF term
579                    is_node: false,
580                    is_edge: false,
581                },
582            );
583        }
584
585        if let TripleComponent::Variable(name) = &scan.predicate
586            && !self.context.contains(name)
587        {
588            self.context.add_variable(
589                name.clone(),
590                VariableInfo {
591                    name: name.clone(),
592                    data_type: LogicalType::Any, // IRI
593                    is_node: false,
594                    is_edge: false,
595                },
596            );
597        }
598
599        if let TripleComponent::Variable(name) = &scan.object
600            && !self.context.contains(name)
601        {
602            self.context.add_variable(
603                name.clone(),
604                VariableInfo {
605                    name: name.clone(),
606                    data_type: LogicalType::Any, // RDF term
607                    is_node: false,
608                    is_edge: false,
609                },
610            );
611        }
612
613        if let Some(TripleComponent::Variable(name)) = &scan.graph
614            && !self.context.contains(name)
615        {
616            self.context.add_variable(
617                name.clone(),
618                VariableInfo {
619                    name: name.clone(),
620                    data_type: LogicalType::Any, // IRI
621                    is_node: false,
622                    is_edge: false,
623                },
624            );
625        }
626
627        Ok(())
628    }
629
630    /// Binds a node scan operator.
631    fn bind_node_scan(&mut self, scan: &NodeScanOp) -> Result<()> {
632        // First bind the input if present
633        if let Some(ref input) = scan.input {
634            self.bind_operator(input)?;
635        }
636
637        // Add the scanned variable to scope
638        self.context.add_variable(
639            scan.variable.clone(),
640            VariableInfo {
641                name: scan.variable.clone(),
642                data_type: LogicalType::Node,
643                is_node: true,
644                is_edge: false,
645            },
646        );
647
648        Ok(())
649    }
650
651    /// Binds an expand operator.
652    fn bind_expand(&mut self, expand: &ExpandOp) -> Result<()> {
653        // First bind the input
654        self.bind_operator(&expand.input)?;
655
656        // Validate that the source variable is defined
657        if !self.context.contains(&expand.from_variable) {
658            return Err(undefined_variable_error(
659                &expand.from_variable,
660                &self.context,
661                " in EXPAND",
662            ));
663        }
664
665        // Validate that the source is a node
666        if let Some(info) = self.context.get(&expand.from_variable)
667            && !info.is_node
668        {
669            return Err(binding_error(format!(
670                "Variable '{}' is not a node, cannot expand from it",
671                expand.from_variable
672            )));
673        }
674
675        // Add edge variable if present
676        if let Some(ref edge_var) = expand.edge_variable {
677            self.context.add_variable(
678                edge_var.clone(),
679                VariableInfo {
680                    name: edge_var.clone(),
681                    data_type: LogicalType::Edge,
682                    is_node: false,
683                    is_edge: true,
684                },
685            );
686        }
687
688        // Add target variable
689        self.context.add_variable(
690            expand.to_variable.clone(),
691            VariableInfo {
692                name: expand.to_variable.clone(),
693                data_type: LogicalType::Node,
694                is_node: true,
695                is_edge: false,
696            },
697        );
698
699        // Add path variables for variable-length paths
700        if let Some(ref path_alias) = expand.path_alias {
701            // length(p) → _path_length_p
702            let path_length_var = format!("_path_length_{}", path_alias);
703            self.context.add_variable(
704                path_length_var.clone(),
705                VariableInfo {
706                    name: path_length_var,
707                    data_type: LogicalType::Int64,
708                    is_node: false,
709                    is_edge: false,
710                },
711            );
712            // nodes(p) → _path_nodes_p
713            let path_nodes_var = format!("_path_nodes_{}", path_alias);
714            self.context.add_variable(
715                path_nodes_var.clone(),
716                VariableInfo {
717                    name: path_nodes_var,
718                    data_type: LogicalType::Any,
719                    is_node: false,
720                    is_edge: false,
721                },
722            );
723            // edges(p) → _path_edges_p
724            let path_edges_var = format!("_path_edges_{}", path_alias);
725            self.context.add_variable(
726                path_edges_var.clone(),
727                VariableInfo {
728                    name: path_edges_var,
729                    data_type: LogicalType::Any,
730                    is_node: false,
731                    is_edge: false,
732                },
733            );
734        }
735
736        Ok(())
737    }
738
739    /// Binds a filter operator.
740    fn bind_filter(&mut self, filter: &FilterOp) -> Result<()> {
741        // First bind the input
742        self.bind_operator(&filter.input)?;
743
744        // Validate the predicate expression
745        self.validate_expression(&filter.predicate)?;
746
747        Ok(())
748    }
749
750    /// Binds a return operator.
751    fn bind_return(&mut self, ret: &ReturnOp) -> Result<()> {
752        // First bind the input
753        self.bind_operator(&ret.input)?;
754
755        // Validate all return expressions
756        for item in &ret.items {
757            self.validate_return_item(item)?;
758        }
759
760        Ok(())
761    }
762
763    /// Validates a return item.
764    fn validate_return_item(&self, item: &ReturnItem) -> Result<()> {
765        self.validate_expression(&item.expression)
766    }
767
768    /// Validates that an expression only references defined variables.
769    fn validate_expression(&self, expr: &LogicalExpression) -> Result<()> {
770        match expr {
771            LogicalExpression::Variable(name) => {
772                if !self.context.contains(name) && !name.starts_with("_anon_") {
773                    return Err(undefined_variable_error(name, &self.context, ""));
774                }
775                Ok(())
776            }
777            LogicalExpression::Property { variable, .. } => {
778                if !self.context.contains(variable) && !variable.starts_with("_anon_") {
779                    return Err(undefined_variable_error(
780                        variable,
781                        &self.context,
782                        " in property access",
783                    ));
784                }
785                Ok(())
786            }
787            LogicalExpression::Literal(_) => Ok(()),
788            LogicalExpression::Binary { left, right, .. } => {
789                self.validate_expression(left)?;
790                self.validate_expression(right)
791            }
792            LogicalExpression::Unary { operand, .. } => self.validate_expression(operand),
793            LogicalExpression::FunctionCall { args, .. } => {
794                for arg in args {
795                    self.validate_expression(arg)?;
796                }
797                Ok(())
798            }
799            LogicalExpression::List(items) => {
800                for item in items {
801                    self.validate_expression(item)?;
802                }
803                Ok(())
804            }
805            LogicalExpression::Map(pairs) => {
806                for (_, value) in pairs {
807                    self.validate_expression(value)?;
808                }
809                Ok(())
810            }
811            LogicalExpression::IndexAccess { base, index } => {
812                self.validate_expression(base)?;
813                self.validate_expression(index)
814            }
815            LogicalExpression::SliceAccess { base, start, end } => {
816                self.validate_expression(base)?;
817                if let Some(s) = start {
818                    self.validate_expression(s)?;
819                }
820                if let Some(e) = end {
821                    self.validate_expression(e)?;
822                }
823                Ok(())
824            }
825            LogicalExpression::Case {
826                operand,
827                when_clauses,
828                else_clause,
829            } => {
830                if let Some(op) = operand {
831                    self.validate_expression(op)?;
832                }
833                for (cond, result) in when_clauses {
834                    self.validate_expression(cond)?;
835                    self.validate_expression(result)?;
836                }
837                if let Some(else_expr) = else_clause {
838                    self.validate_expression(else_expr)?;
839                }
840                Ok(())
841            }
842            // Parameter references are validated externally
843            LogicalExpression::Parameter(_) => Ok(()),
844            // labels(n), type(e), id(n) need the variable to be defined
845            LogicalExpression::Labels(var)
846            | LogicalExpression::Type(var)
847            | LogicalExpression::Id(var) => {
848                if !self.context.contains(var) && !var.starts_with("_anon_") {
849                    return Err(undefined_variable_error(var, &self.context, " in function"));
850                }
851                Ok(())
852            }
853            LogicalExpression::ListComprehension {
854                list_expr,
855                filter_expr,
856                map_expr,
857                ..
858            } => {
859                // Validate the list expression
860                self.validate_expression(list_expr)?;
861                // Note: filter_expr and map_expr use the comprehension variable
862                // which is defined within the comprehension scope, so we don't
863                // need to validate it against the outer context
864                if let Some(filter) = filter_expr {
865                    self.validate_expression(filter)?;
866                }
867                self.validate_expression(map_expr)?;
868                Ok(())
869            }
870            LogicalExpression::ExistsSubquery(subquery)
871            | LogicalExpression::CountSubquery(subquery) => {
872                // Subqueries have their own binding context
873                // For now, just validate the structure exists
874                let _ = subquery; // Would need recursive binding
875                Ok(())
876            }
877        }
878    }
879
880    /// Infers the type of an expression for use in WITH clause aliasing.
881    fn infer_expression_type(&self, expr: &LogicalExpression) -> LogicalType {
882        match expr {
883            LogicalExpression::Variable(name) => {
884                // Look up the variable type from context
885                self.context
886                    .get(name)
887                    .map_or(LogicalType::Any, |info| info.data_type.clone())
888            }
889            LogicalExpression::Property { .. } => LogicalType::Any, // Properties can be any type
890            LogicalExpression::Literal(value) => {
891                // Infer type from literal value
892                use grafeo_common::types::Value;
893                match value {
894                    Value::Bool(_) => LogicalType::Bool,
895                    Value::Int64(_) => LogicalType::Int64,
896                    Value::Float64(_) => LogicalType::Float64,
897                    Value::String(_) => LogicalType::String,
898                    Value::List(_) => LogicalType::Any, // Complex type
899                    Value::Map(_) => LogicalType::Any,  // Complex type
900                    Value::Null => LogicalType::Any,
901                    _ => LogicalType::Any,
902                }
903            }
904            LogicalExpression::Binary { .. } => LogicalType::Any, // Could be bool or numeric
905            LogicalExpression::Unary { .. } => LogicalType::Any,
906            LogicalExpression::FunctionCall { name, .. } => {
907                // Infer based on function name
908                match name.to_lowercase().as_str() {
909                    "count" | "sum" | "id" => LogicalType::Int64,
910                    "avg" => LogicalType::Float64,
911                    "type" => LogicalType::String,
912                    // List-returning functions use Any since we don't track element type
913                    "labels" | "collect" => LogicalType::Any,
914                    _ => LogicalType::Any,
915                }
916            }
917            LogicalExpression::List(_) => LogicalType::Any, // Complex type
918            LogicalExpression::Map(_) => LogicalType::Any,  // Complex type
919            _ => LogicalType::Any,
920        }
921    }
922
923    /// Binds a join operator.
924    fn bind_join(&mut self, join: &crate::query::plan::JoinOp) -> Result<()> {
925        // Bind both sides of the join
926        self.bind_operator(&join.left)?;
927        self.bind_operator(&join.right)?;
928
929        // Validate join conditions
930        for condition in &join.conditions {
931            self.validate_expression(&condition.left)?;
932            self.validate_expression(&condition.right)?;
933        }
934
935        Ok(())
936    }
937
938    /// Binds an aggregate operator.
939    fn bind_aggregate(&mut self, agg: &crate::query::plan::AggregateOp) -> Result<()> {
940        // Bind the input first
941        self.bind_operator(&agg.input)?;
942
943        // Validate group by expressions
944        for expr in &agg.group_by {
945            self.validate_expression(expr)?;
946        }
947
948        // Validate aggregate expressions
949        for agg_expr in &agg.aggregates {
950            if let Some(ref expr) = agg_expr.expression {
951                self.validate_expression(expr)?;
952            }
953            // Add the alias as a new variable if present
954            if let Some(ref alias) = agg_expr.alias {
955                self.context.add_variable(
956                    alias.clone(),
957                    VariableInfo {
958                        name: alias.clone(),
959                        data_type: LogicalType::Any,
960                        is_node: false,
961                        is_edge: false,
962                    },
963                );
964            }
965        }
966
967        Ok(())
968    }
969}
970
971impl Default for Binder {
972    fn default() -> Self {
973        Self::new()
974    }
975}
976
977#[cfg(test)]
978mod tests {
979    use super::*;
980    use crate::query::plan::{BinaryOp, FilterOp};
981
982    #[test]
983    fn test_bind_simple_scan() {
984        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
985            items: vec![ReturnItem {
986                expression: LogicalExpression::Variable("n".to_string()),
987                alias: None,
988            }],
989            distinct: false,
990            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
991                variable: "n".to_string(),
992                label: Some("Person".to_string()),
993                input: None,
994            })),
995        }));
996
997        let mut binder = Binder::new();
998        let result = binder.bind(&plan);
999
1000        assert!(result.is_ok());
1001        let ctx = result.unwrap();
1002        assert!(ctx.contains("n"));
1003        assert!(ctx.get("n").unwrap().is_node);
1004    }
1005
1006    #[test]
1007    fn test_bind_undefined_variable() {
1008        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1009            items: vec![ReturnItem {
1010                expression: LogicalExpression::Variable("undefined".to_string()),
1011                alias: None,
1012            }],
1013            distinct: false,
1014            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1015                variable: "n".to_string(),
1016                label: None,
1017                input: None,
1018            })),
1019        }));
1020
1021        let mut binder = Binder::new();
1022        let result = binder.bind(&plan);
1023
1024        assert!(result.is_err());
1025        let err = result.unwrap_err();
1026        assert!(err.to_string().contains("Undefined variable"));
1027    }
1028
1029    #[test]
1030    fn test_bind_property_access() {
1031        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1032            items: vec![ReturnItem {
1033                expression: LogicalExpression::Property {
1034                    variable: "n".to_string(),
1035                    property: "name".to_string(),
1036                },
1037                alias: None,
1038            }],
1039            distinct: false,
1040            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1041                variable: "n".to_string(),
1042                label: Some("Person".to_string()),
1043                input: None,
1044            })),
1045        }));
1046
1047        let mut binder = Binder::new();
1048        let result = binder.bind(&plan);
1049
1050        assert!(result.is_ok());
1051    }
1052
1053    #[test]
1054    fn test_bind_filter_with_undefined_variable() {
1055        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1056            items: vec![ReturnItem {
1057                expression: LogicalExpression::Variable("n".to_string()),
1058                alias: None,
1059            }],
1060            distinct: false,
1061            input: Box::new(LogicalOperator::Filter(FilterOp {
1062                predicate: LogicalExpression::Binary {
1063                    left: Box::new(LogicalExpression::Property {
1064                        variable: "m".to_string(), // undefined!
1065                        property: "age".to_string(),
1066                    }),
1067                    op: BinaryOp::Gt,
1068                    right: Box::new(LogicalExpression::Literal(
1069                        grafeo_common::types::Value::Int64(30),
1070                    )),
1071                },
1072                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1073                    variable: "n".to_string(),
1074                    label: None,
1075                    input: None,
1076                })),
1077            })),
1078        }));
1079
1080        let mut binder = Binder::new();
1081        let result = binder.bind(&plan);
1082
1083        assert!(result.is_err());
1084        let err = result.unwrap_err();
1085        assert!(err.to_string().contains("Undefined variable 'm'"));
1086    }
1087
1088    #[test]
1089    fn test_bind_expand() {
1090        use crate::query::plan::{ExpandDirection, ExpandOp};
1091
1092        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1093            items: vec![
1094                ReturnItem {
1095                    expression: LogicalExpression::Variable("a".to_string()),
1096                    alias: None,
1097                },
1098                ReturnItem {
1099                    expression: LogicalExpression::Variable("b".to_string()),
1100                    alias: None,
1101                },
1102            ],
1103            distinct: false,
1104            input: Box::new(LogicalOperator::Expand(ExpandOp {
1105                from_variable: "a".to_string(),
1106                to_variable: "b".to_string(),
1107                edge_variable: Some("e".to_string()),
1108                direction: ExpandDirection::Outgoing,
1109                edge_type: Some("KNOWS".to_string()),
1110                min_hops: 1,
1111                max_hops: Some(1),
1112                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1113                    variable: "a".to_string(),
1114                    label: Some("Person".to_string()),
1115                    input: None,
1116                })),
1117                path_alias: None,
1118            })),
1119        }));
1120
1121        let mut binder = Binder::new();
1122        let result = binder.bind(&plan);
1123
1124        assert!(result.is_ok());
1125        let ctx = result.unwrap();
1126        assert!(ctx.contains("a"));
1127        assert!(ctx.contains("b"));
1128        assert!(ctx.contains("e"));
1129        assert!(ctx.get("a").unwrap().is_node);
1130        assert!(ctx.get("b").unwrap().is_node);
1131        assert!(ctx.get("e").unwrap().is_edge);
1132    }
1133
1134    #[test]
1135    fn test_bind_expand_from_undefined_variable() {
1136        // Tests that expanding from an undefined variable produces a clear error
1137        use crate::query::plan::{ExpandDirection, ExpandOp};
1138
1139        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1140            items: vec![ReturnItem {
1141                expression: LogicalExpression::Variable("b".to_string()),
1142                alias: None,
1143            }],
1144            distinct: false,
1145            input: Box::new(LogicalOperator::Expand(ExpandOp {
1146                from_variable: "undefined".to_string(), // not defined!
1147                to_variable: "b".to_string(),
1148                edge_variable: None,
1149                direction: ExpandDirection::Outgoing,
1150                edge_type: None,
1151                min_hops: 1,
1152                max_hops: Some(1),
1153                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1154                    variable: "a".to_string(),
1155                    label: None,
1156                    input: None,
1157                })),
1158                path_alias: None,
1159            })),
1160        }));
1161
1162        let mut binder = Binder::new();
1163        let result = binder.bind(&plan);
1164
1165        assert!(result.is_err());
1166        let err = result.unwrap_err();
1167        assert!(
1168            err.to_string().contains("Undefined variable 'undefined'"),
1169            "Expected error about undefined variable, got: {}",
1170            err
1171        );
1172    }
1173
1174    #[test]
1175    fn test_bind_return_with_aggregate_and_non_aggregate() {
1176        // Tests binding of aggregate functions alongside regular expressions
1177        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1178            items: vec![
1179                ReturnItem {
1180                    expression: LogicalExpression::FunctionCall {
1181                        name: "count".to_string(),
1182                        args: vec![LogicalExpression::Variable("n".to_string())],
1183                        distinct: false,
1184                    },
1185                    alias: Some("cnt".to_string()),
1186                },
1187                ReturnItem {
1188                    expression: LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1189                    alias: Some("one".to_string()),
1190                },
1191            ],
1192            distinct: false,
1193            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1194                variable: "n".to_string(),
1195                label: Some("Person".to_string()),
1196                input: None,
1197            })),
1198        }));
1199
1200        let mut binder = Binder::new();
1201        let result = binder.bind(&plan);
1202
1203        // This should succeed - count(n) with literal is valid
1204        assert!(result.is_ok());
1205    }
1206
1207    #[test]
1208    fn test_bind_nested_property_access() {
1209        // Tests that nested property access on the same variable works
1210        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1211            items: vec![
1212                ReturnItem {
1213                    expression: LogicalExpression::Property {
1214                        variable: "n".to_string(),
1215                        property: "name".to_string(),
1216                    },
1217                    alias: None,
1218                },
1219                ReturnItem {
1220                    expression: LogicalExpression::Property {
1221                        variable: "n".to_string(),
1222                        property: "age".to_string(),
1223                    },
1224                    alias: None,
1225                },
1226            ],
1227            distinct: false,
1228            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1229                variable: "n".to_string(),
1230                label: Some("Person".to_string()),
1231                input: None,
1232            })),
1233        }));
1234
1235        let mut binder = Binder::new();
1236        let result = binder.bind(&plan);
1237
1238        assert!(result.is_ok());
1239    }
1240
1241    #[test]
1242    fn test_bind_binary_expression_with_undefined() {
1243        // Tests that binary expressions with undefined variables produce errors
1244        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1245            items: vec![ReturnItem {
1246                expression: LogicalExpression::Binary {
1247                    left: Box::new(LogicalExpression::Property {
1248                        variable: "n".to_string(),
1249                        property: "age".to_string(),
1250                    }),
1251                    op: BinaryOp::Add,
1252                    right: Box::new(LogicalExpression::Property {
1253                        variable: "m".to_string(), // undefined!
1254                        property: "age".to_string(),
1255                    }),
1256                },
1257                alias: Some("total".to_string()),
1258            }],
1259            distinct: false,
1260            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1261                variable: "n".to_string(),
1262                label: None,
1263                input: None,
1264            })),
1265        }));
1266
1267        let mut binder = Binder::new();
1268        let result = binder.bind(&plan);
1269
1270        assert!(result.is_err());
1271        assert!(
1272            result
1273                .unwrap_err()
1274                .to_string()
1275                .contains("Undefined variable 'm'")
1276        );
1277    }
1278
1279    #[test]
1280    fn test_bind_duplicate_variable_definition() {
1281        // Tests behavior when the same variable is defined twice (via two NodeScans)
1282        // This is typically not allowed or the second shadows the first
1283        use crate::query::plan::{JoinOp, JoinType};
1284
1285        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1286            items: vec![ReturnItem {
1287                expression: LogicalExpression::Variable("n".to_string()),
1288                alias: None,
1289            }],
1290            distinct: false,
1291            input: Box::new(LogicalOperator::Join(JoinOp {
1292                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1293                    variable: "n".to_string(),
1294                    label: Some("A".to_string()),
1295                    input: None,
1296                })),
1297                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1298                    variable: "m".to_string(), // different variable is fine
1299                    label: Some("B".to_string()),
1300                    input: None,
1301                })),
1302                join_type: JoinType::Inner,
1303                conditions: vec![],
1304            })),
1305        }));
1306
1307        let mut binder = Binder::new();
1308        let result = binder.bind(&plan);
1309
1310        // Join with different variables should work
1311        assert!(result.is_ok());
1312        let ctx = result.unwrap();
1313        assert!(ctx.contains("n"));
1314        assert!(ctx.contains("m"));
1315    }
1316
1317    #[test]
1318    fn test_bind_function_with_wrong_arity() {
1319        // Tests that functions with wrong number of arguments are handled
1320        // (behavior depends on whether binder validates arity)
1321        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1322            items: vec![ReturnItem {
1323                expression: LogicalExpression::FunctionCall {
1324                    name: "count".to_string(),
1325                    args: vec![], // count() needs an argument
1326                    distinct: false,
1327                },
1328                alias: None,
1329            }],
1330            distinct: false,
1331            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1332                variable: "n".to_string(),
1333                label: None,
1334                input: None,
1335            })),
1336        }));
1337
1338        let mut binder = Binder::new();
1339        let result = binder.bind(&plan);
1340
1341        // The binder may or may not catch this - if it passes, execution will fail
1342        // This test documents current behavior
1343        // If binding fails, that's fine; if it passes, execution will handle it
1344        let _ = result; // We're just testing it doesn't panic
1345    }
1346
1347    // --- Mutation operator validation ---
1348
1349    #[test]
1350    fn test_create_edge_rejects_undefined_source() {
1351        use crate::query::plan::CreateEdgeOp;
1352
1353        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1354            variable: Some("e".to_string()),
1355            from_variable: "ghost".to_string(), // not defined!
1356            to_variable: "b".to_string(),
1357            edge_type: "KNOWS".to_string(),
1358            properties: vec![],
1359            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1360                variable: "b".to_string(),
1361                label: None,
1362                input: None,
1363            })),
1364        }));
1365
1366        let mut binder = Binder::new();
1367        let err = binder.bind(&plan).unwrap_err();
1368        assert!(
1369            err.to_string().contains("Undefined variable 'ghost'"),
1370            "Should reject undefined source variable, got: {err}"
1371        );
1372    }
1373
1374    #[test]
1375    fn test_create_edge_rejects_undefined_target() {
1376        use crate::query::plan::CreateEdgeOp;
1377
1378        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1379            variable: None,
1380            from_variable: "a".to_string(),
1381            to_variable: "missing".to_string(), // not defined!
1382            edge_type: "KNOWS".to_string(),
1383            properties: vec![],
1384            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1385                variable: "a".to_string(),
1386                label: None,
1387                input: None,
1388            })),
1389        }));
1390
1391        let mut binder = Binder::new();
1392        let err = binder.bind(&plan).unwrap_err();
1393        assert!(
1394            err.to_string().contains("Undefined variable 'missing'"),
1395            "Should reject undefined target variable, got: {err}"
1396        );
1397    }
1398
1399    #[test]
1400    fn test_create_edge_validates_property_expressions() {
1401        use crate::query::plan::CreateEdgeOp;
1402
1403        // Source and target defined, but property references undefined variable
1404        let plan = LogicalPlan::new(LogicalOperator::CreateEdge(CreateEdgeOp {
1405            variable: Some("e".to_string()),
1406            from_variable: "a".to_string(),
1407            to_variable: "b".to_string(),
1408            edge_type: "KNOWS".to_string(),
1409            properties: vec![(
1410                "since".to_string(),
1411                LogicalExpression::Property {
1412                    variable: "x".to_string(), // undefined!
1413                    property: "year".to_string(),
1414                },
1415            )],
1416            input: Box::new(LogicalOperator::Join(crate::query::plan::JoinOp {
1417                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1418                    variable: "a".to_string(),
1419                    label: None,
1420                    input: None,
1421                })),
1422                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1423                    variable: "b".to_string(),
1424                    label: None,
1425                    input: None,
1426                })),
1427                join_type: crate::query::plan::JoinType::Inner,
1428                conditions: vec![],
1429            })),
1430        }));
1431
1432        let mut binder = Binder::new();
1433        let err = binder.bind(&plan).unwrap_err();
1434        assert!(err.to_string().contains("Undefined variable 'x'"));
1435    }
1436
1437    #[test]
1438    fn test_set_property_rejects_undefined_variable() {
1439        use crate::query::plan::SetPropertyOp;
1440
1441        let plan = LogicalPlan::new(LogicalOperator::SetProperty(SetPropertyOp {
1442            variable: "ghost".to_string(),
1443            properties: vec![(
1444                "name".to_string(),
1445                LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1446            )],
1447            replace: false,
1448            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1449                variable: "n".to_string(),
1450                label: None,
1451                input: None,
1452            })),
1453        }));
1454
1455        let mut binder = Binder::new();
1456        let err = binder.bind(&plan).unwrap_err();
1457        assert!(
1458            err.to_string().contains("in SET"),
1459            "Error should indicate SET context, got: {err}"
1460        );
1461    }
1462
1463    #[test]
1464    fn test_delete_node_rejects_undefined_variable() {
1465        use crate::query::plan::DeleteNodeOp;
1466
1467        let plan = LogicalPlan::new(LogicalOperator::DeleteNode(DeleteNodeOp {
1468            variable: "phantom".to_string(),
1469            detach: false,
1470            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1471                variable: "n".to_string(),
1472                label: None,
1473                input: None,
1474            })),
1475        }));
1476
1477        let mut binder = Binder::new();
1478        let err = binder.bind(&plan).unwrap_err();
1479        assert!(err.to_string().contains("Undefined variable 'phantom'"));
1480    }
1481
1482    #[test]
1483    fn test_delete_edge_rejects_undefined_variable() {
1484        use crate::query::plan::DeleteEdgeOp;
1485
1486        let plan = LogicalPlan::new(LogicalOperator::DeleteEdge(DeleteEdgeOp {
1487            variable: "gone".to_string(),
1488            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1489                variable: "n".to_string(),
1490                label: None,
1491                input: None,
1492            })),
1493        }));
1494
1495        let mut binder = Binder::new();
1496        let err = binder.bind(&plan).unwrap_err();
1497        assert!(err.to_string().contains("Undefined variable 'gone'"));
1498    }
1499
1500    // --- WITH/Project clause ---
1501
1502    #[test]
1503    fn test_project_alias_becomes_available_downstream() {
1504        use crate::query::plan::{ProjectOp, Projection};
1505
1506        // WITH n.name AS person_name RETURN person_name
1507        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1508            items: vec![ReturnItem {
1509                expression: LogicalExpression::Variable("person_name".to_string()),
1510                alias: None,
1511            }],
1512            distinct: false,
1513            input: Box::new(LogicalOperator::Project(ProjectOp {
1514                projections: vec![Projection {
1515                    expression: LogicalExpression::Property {
1516                        variable: "n".to_string(),
1517                        property: "name".to_string(),
1518                    },
1519                    alias: Some("person_name".to_string()),
1520                }],
1521                input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1522                    variable: "n".to_string(),
1523                    label: None,
1524                    input: None,
1525                })),
1526            })),
1527        }));
1528
1529        let mut binder = Binder::new();
1530        let ctx = binder.bind(&plan).unwrap();
1531        assert!(
1532            ctx.contains("person_name"),
1533            "WITH alias should be available to RETURN"
1534        );
1535    }
1536
1537    #[test]
1538    fn test_project_rejects_undefined_expression() {
1539        use crate::query::plan::{ProjectOp, Projection};
1540
1541        let plan = LogicalPlan::new(LogicalOperator::Project(ProjectOp {
1542            projections: vec![Projection {
1543                expression: LogicalExpression::Variable("nope".to_string()),
1544                alias: Some("x".to_string()),
1545            }],
1546            input: Box::new(LogicalOperator::Empty),
1547        }));
1548
1549        let mut binder = Binder::new();
1550        let result = binder.bind(&plan);
1551        assert!(result.is_err(), "WITH on undefined variable should fail");
1552    }
1553
1554    // --- UNWIND ---
1555
1556    #[test]
1557    fn test_unwind_adds_element_variable() {
1558        use crate::query::plan::UnwindOp;
1559
1560        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1561            items: vec![ReturnItem {
1562                expression: LogicalExpression::Variable("item".to_string()),
1563                alias: None,
1564            }],
1565            distinct: false,
1566            input: Box::new(LogicalOperator::Unwind(UnwindOp {
1567                expression: LogicalExpression::List(vec![
1568                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1569                    LogicalExpression::Literal(grafeo_common::types::Value::Int64(2)),
1570                ]),
1571                variable: "item".to_string(),
1572                ordinality_var: None,
1573                offset_var: None,
1574                input: Box::new(LogicalOperator::Empty),
1575            })),
1576        }));
1577
1578        let mut binder = Binder::new();
1579        let ctx = binder.bind(&plan).unwrap();
1580        assert!(ctx.contains("item"), "UNWIND variable should be in scope");
1581        let info = ctx.get("item").unwrap();
1582        assert!(
1583            !info.is_node && !info.is_edge,
1584            "UNWIND variable is not a graph element"
1585        );
1586    }
1587
1588    // --- MERGE ---
1589
1590    #[test]
1591    fn test_merge_adds_variable_and_validates_properties() {
1592        use crate::query::plan::MergeOp;
1593
1594        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1595            items: vec![ReturnItem {
1596                expression: LogicalExpression::Variable("m".to_string()),
1597                alias: None,
1598            }],
1599            distinct: false,
1600            input: Box::new(LogicalOperator::Merge(MergeOp {
1601                variable: "m".to_string(),
1602                labels: vec!["Person".to_string()],
1603                match_properties: vec![(
1604                    "name".to_string(),
1605                    LogicalExpression::Literal(grafeo_common::types::Value::String("Alice".into())),
1606                )],
1607                on_create: vec![(
1608                    "created".to_string(),
1609                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1610                )],
1611                on_match: vec![(
1612                    "updated".to_string(),
1613                    LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1614                )],
1615                input: Box::new(LogicalOperator::Empty),
1616            })),
1617        }));
1618
1619        let mut binder = Binder::new();
1620        let ctx = binder.bind(&plan).unwrap();
1621        assert!(ctx.contains("m"));
1622        assert!(
1623            ctx.get("m").unwrap().is_node,
1624            "MERGE variable should be a node"
1625        );
1626    }
1627
1628    #[test]
1629    fn test_merge_rejects_undefined_in_on_create() {
1630        use crate::query::plan::MergeOp;
1631
1632        let plan = LogicalPlan::new(LogicalOperator::Merge(MergeOp {
1633            variable: "m".to_string(),
1634            labels: vec![],
1635            match_properties: vec![],
1636            on_create: vec![(
1637                "name".to_string(),
1638                LogicalExpression::Property {
1639                    variable: "other".to_string(), // undefined!
1640                    property: "name".to_string(),
1641                },
1642            )],
1643            on_match: vec![],
1644            input: Box::new(LogicalOperator::Empty),
1645        }));
1646
1647        let mut binder = Binder::new();
1648        let result = binder.bind(&plan);
1649        assert!(
1650            result.is_err(),
1651            "ON CREATE referencing undefined variable should fail"
1652        );
1653    }
1654
1655    // --- ShortestPath ---
1656
1657    #[test]
1658    fn test_shortest_path_rejects_undefined_source() {
1659        use crate::query::plan::{ExpandDirection, ShortestPathOp};
1660
1661        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1662            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1663                variable: "b".to_string(),
1664                label: None,
1665                input: None,
1666            })),
1667            source_var: "missing".to_string(), // not defined
1668            target_var: "b".to_string(),
1669            edge_type: None,
1670            direction: ExpandDirection::Both,
1671            path_alias: "p".to_string(),
1672            all_paths: false,
1673        }));
1674
1675        let mut binder = Binder::new();
1676        let err = binder.bind(&plan).unwrap_err();
1677        assert!(
1678            err.to_string().contains("source in shortestPath"),
1679            "Error should mention shortestPath source context, got: {err}"
1680        );
1681    }
1682
1683    #[test]
1684    fn test_shortest_path_adds_path_and_length_variables() {
1685        use crate::query::plan::{ExpandDirection, JoinOp, JoinType, ShortestPathOp};
1686
1687        let plan = LogicalPlan::new(LogicalOperator::ShortestPath(ShortestPathOp {
1688            input: Box::new(LogicalOperator::Join(JoinOp {
1689                left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1690                    variable: "a".to_string(),
1691                    label: None,
1692                    input: None,
1693                })),
1694                right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1695                    variable: "b".to_string(),
1696                    label: None,
1697                    input: None,
1698                })),
1699                join_type: JoinType::Cross,
1700                conditions: vec![],
1701            })),
1702            source_var: "a".to_string(),
1703            target_var: "b".to_string(),
1704            edge_type: Some("ROAD".to_string()),
1705            direction: ExpandDirection::Outgoing,
1706            path_alias: "p".to_string(),
1707            all_paths: false,
1708        }));
1709
1710        let mut binder = Binder::new();
1711        let ctx = binder.bind(&plan).unwrap();
1712        assert!(ctx.contains("p"), "Path alias should be bound");
1713        assert!(
1714            ctx.contains("_path_length_p"),
1715            "Path length variable should be auto-created"
1716        );
1717    }
1718
1719    // --- Expression validation edge cases ---
1720
1721    #[test]
1722    fn test_case_expression_validates_all_branches() {
1723        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1724            items: vec![ReturnItem {
1725                expression: LogicalExpression::Case {
1726                    operand: None,
1727                    when_clauses: vec![
1728                        (
1729                            LogicalExpression::Binary {
1730                                left: Box::new(LogicalExpression::Property {
1731                                    variable: "n".to_string(),
1732                                    property: "age".to_string(),
1733                                }),
1734                                op: BinaryOp::Gt,
1735                                right: Box::new(LogicalExpression::Literal(
1736                                    grafeo_common::types::Value::Int64(18),
1737                                )),
1738                            },
1739                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1740                                "adult".into(),
1741                            )),
1742                        ),
1743                        (
1744                            // This branch references undefined variable
1745                            LogicalExpression::Property {
1746                                variable: "ghost".to_string(),
1747                                property: "flag".to_string(),
1748                            },
1749                            LogicalExpression::Literal(grafeo_common::types::Value::String(
1750                                "flagged".into(),
1751                            )),
1752                        ),
1753                    ],
1754                    else_clause: Some(Box::new(LogicalExpression::Literal(
1755                        grafeo_common::types::Value::String("other".into()),
1756                    ))),
1757                },
1758                alias: None,
1759            }],
1760            distinct: false,
1761            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1762                variable: "n".to_string(),
1763                label: None,
1764                input: None,
1765            })),
1766        }));
1767
1768        let mut binder = Binder::new();
1769        let err = binder.bind(&plan).unwrap_err();
1770        assert!(
1771            err.to_string().contains("ghost"),
1772            "CASE should validate all when-clause conditions"
1773        );
1774    }
1775
1776    #[test]
1777    fn test_case_expression_validates_else_clause() {
1778        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1779            items: vec![ReturnItem {
1780                expression: LogicalExpression::Case {
1781                    operand: None,
1782                    when_clauses: vec![(
1783                        LogicalExpression::Literal(grafeo_common::types::Value::Bool(true)),
1784                        LogicalExpression::Literal(grafeo_common::types::Value::Int64(1)),
1785                    )],
1786                    else_clause: Some(Box::new(LogicalExpression::Property {
1787                        variable: "missing".to_string(),
1788                        property: "x".to_string(),
1789                    })),
1790                },
1791                alias: None,
1792            }],
1793            distinct: false,
1794            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1795                variable: "n".to_string(),
1796                label: None,
1797                input: None,
1798            })),
1799        }));
1800
1801        let mut binder = Binder::new();
1802        let err = binder.bind(&plan).unwrap_err();
1803        assert!(
1804            err.to_string().contains("missing"),
1805            "CASE ELSE should validate its expression too"
1806        );
1807    }
1808
1809    #[test]
1810    fn test_slice_access_validates_expressions() {
1811        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1812            items: vec![ReturnItem {
1813                expression: LogicalExpression::SliceAccess {
1814                    base: Box::new(LogicalExpression::Variable("n".to_string())),
1815                    start: Some(Box::new(LogicalExpression::Variable(
1816                        "undefined_start".to_string(),
1817                    ))),
1818                    end: None,
1819                },
1820                alias: None,
1821            }],
1822            distinct: false,
1823            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1824                variable: "n".to_string(),
1825                label: None,
1826                input: None,
1827            })),
1828        }));
1829
1830        let mut binder = Binder::new();
1831        let err = binder.bind(&plan).unwrap_err();
1832        assert!(err.to_string().contains("undefined_start"));
1833    }
1834
1835    #[test]
1836    fn test_list_comprehension_validates_list_source() {
1837        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1838            items: vec![ReturnItem {
1839                expression: LogicalExpression::ListComprehension {
1840                    variable: "x".to_string(),
1841                    list_expr: Box::new(LogicalExpression::Variable("not_defined".to_string())),
1842                    filter_expr: None,
1843                    map_expr: Box::new(LogicalExpression::Variable("x".to_string())),
1844                },
1845                alias: None,
1846            }],
1847            distinct: false,
1848            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1849                variable: "n".to_string(),
1850                label: None,
1851                input: None,
1852            })),
1853        }));
1854
1855        let mut binder = Binder::new();
1856        let err = binder.bind(&plan).unwrap_err();
1857        assert!(
1858            err.to_string().contains("not_defined"),
1859            "List comprehension should validate source list expression"
1860        );
1861    }
1862
1863    #[test]
1864    fn test_labels_type_id_reject_undefined() {
1865        // labels(x) where x is not defined
1866        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1867            items: vec![ReturnItem {
1868                expression: LogicalExpression::Labels("x".to_string()),
1869                alias: None,
1870            }],
1871            distinct: false,
1872            input: Box::new(LogicalOperator::Empty),
1873        }));
1874
1875        let mut binder = Binder::new();
1876        assert!(
1877            binder.bind(&plan).is_err(),
1878            "labels(x) on undefined x should fail"
1879        );
1880
1881        // type(e) where e is not defined
1882        let plan2 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1883            items: vec![ReturnItem {
1884                expression: LogicalExpression::Type("e".to_string()),
1885                alias: None,
1886            }],
1887            distinct: false,
1888            input: Box::new(LogicalOperator::Empty),
1889        }));
1890
1891        let mut binder2 = Binder::new();
1892        assert!(
1893            binder2.bind(&plan2).is_err(),
1894            "type(e) on undefined e should fail"
1895        );
1896
1897        // id(n) where n is not defined
1898        let plan3 = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1899            items: vec![ReturnItem {
1900                expression: LogicalExpression::Id("n".to_string()),
1901                alias: None,
1902            }],
1903            distinct: false,
1904            input: Box::new(LogicalOperator::Empty),
1905        }));
1906
1907        let mut binder3 = Binder::new();
1908        assert!(
1909            binder3.bind(&plan3).is_err(),
1910            "id(n) on undefined n should fail"
1911        );
1912    }
1913
1914    #[test]
1915    fn test_expand_rejects_non_node_source() {
1916        use crate::query::plan::{ExpandDirection, ExpandOp, UnwindOp};
1917
1918        // UNWIND [1,2] AS x  -- x is not a node
1919        // MATCH (x)-[:E]->(b)  -- should fail: x isn't a node
1920        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
1921            items: vec![ReturnItem {
1922                expression: LogicalExpression::Variable("b".to_string()),
1923                alias: None,
1924            }],
1925            distinct: false,
1926            input: Box::new(LogicalOperator::Expand(ExpandOp {
1927                from_variable: "x".to_string(),
1928                to_variable: "b".to_string(),
1929                edge_variable: None,
1930                direction: ExpandDirection::Outgoing,
1931                edge_type: None,
1932                min_hops: 1,
1933                max_hops: Some(1),
1934                input: Box::new(LogicalOperator::Unwind(UnwindOp {
1935                    expression: LogicalExpression::List(vec![]),
1936                    variable: "x".to_string(),
1937                    ordinality_var: None,
1938                    offset_var: None,
1939                    input: Box::new(LogicalOperator::Empty),
1940                })),
1941                path_alias: None,
1942            })),
1943        }));
1944
1945        let mut binder = Binder::new();
1946        let err = binder.bind(&plan).unwrap_err();
1947        assert!(
1948            err.to_string().contains("not a node"),
1949            "Expanding from non-node should fail, got: {err}"
1950        );
1951    }
1952
1953    #[test]
1954    fn test_add_label_rejects_undefined_variable() {
1955        use crate::query::plan::AddLabelOp;
1956
1957        let plan = LogicalPlan::new(LogicalOperator::AddLabel(AddLabelOp {
1958            variable: "missing".to_string(),
1959            labels: vec!["Admin".to_string()],
1960            input: Box::new(LogicalOperator::Empty),
1961        }));
1962
1963        let mut binder = Binder::new();
1964        let err = binder.bind(&plan).unwrap_err();
1965        assert!(err.to_string().contains("SET labels"));
1966    }
1967
1968    #[test]
1969    fn test_remove_label_rejects_undefined_variable() {
1970        use crate::query::plan::RemoveLabelOp;
1971
1972        let plan = LogicalPlan::new(LogicalOperator::RemoveLabel(RemoveLabelOp {
1973            variable: "missing".to_string(),
1974            labels: vec!["Admin".to_string()],
1975            input: Box::new(LogicalOperator::Empty),
1976        }));
1977
1978        let mut binder = Binder::new();
1979        let err = binder.bind(&plan).unwrap_err();
1980        assert!(err.to_string().contains("REMOVE labels"));
1981    }
1982
1983    #[test]
1984    fn test_sort_validates_key_expressions() {
1985        use crate::query::plan::{SortKey, SortOp, SortOrder};
1986
1987        let plan = LogicalPlan::new(LogicalOperator::Sort(SortOp {
1988            keys: vec![SortKey {
1989                expression: LogicalExpression::Property {
1990                    variable: "missing".to_string(),
1991                    property: "name".to_string(),
1992                },
1993                order: SortOrder::Ascending,
1994            }],
1995            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1996                variable: "n".to_string(),
1997                label: None,
1998                input: None,
1999            })),
2000        }));
2001
2002        let mut binder = Binder::new();
2003        assert!(
2004            binder.bind(&plan).is_err(),
2005            "ORDER BY on undefined variable should fail"
2006        );
2007    }
2008
2009    #[test]
2010    fn test_create_node_adds_variable_before_property_validation() {
2011        use crate::query::plan::CreateNodeOp;
2012
2013        // CREATE (n:Person {friend: n.name}) - referencing the node being created
2014        // The variable should be available for property expressions (self-reference)
2015        let plan = LogicalPlan::new(LogicalOperator::CreateNode(CreateNodeOp {
2016            variable: "n".to_string(),
2017            labels: vec!["Person".to_string()],
2018            properties: vec![(
2019                "self_ref".to_string(),
2020                LogicalExpression::Property {
2021                    variable: "n".to_string(),
2022                    property: "name".to_string(),
2023                },
2024            )],
2025            input: None,
2026        }));
2027
2028        let mut binder = Binder::new();
2029        // This should succeed because CreateNode adds the variable before validating properties
2030        let ctx = binder.bind(&plan).unwrap();
2031        assert!(ctx.get("n").unwrap().is_node);
2032    }
2033
2034    #[test]
2035    fn test_undefined_variable_suggests_similar() {
2036        // 'person' is defined, user types 'persn' - should get a suggestion
2037        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2038            items: vec![ReturnItem {
2039                expression: LogicalExpression::Variable("persn".to_string()),
2040                alias: None,
2041            }],
2042            distinct: false,
2043            input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2044                variable: "person".to_string(),
2045                label: None,
2046                input: None,
2047            })),
2048        }));
2049
2050        let mut binder = Binder::new();
2051        let err = binder.bind(&plan).unwrap_err();
2052        let msg = err.to_string();
2053        // The error should contain the variable name at minimum
2054        assert!(
2055            msg.contains("persn"),
2056            "Error should mention the undefined variable"
2057        );
2058    }
2059
2060    #[test]
2061    fn test_anon_variables_skip_validation() {
2062        // Variables starting with _anon_ are anonymous and should be silently accepted
2063        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2064            items: vec![ReturnItem {
2065                expression: LogicalExpression::Variable("_anon_42".to_string()),
2066                alias: None,
2067            }],
2068            distinct: false,
2069            input: Box::new(LogicalOperator::Empty),
2070        }));
2071
2072        let mut binder = Binder::new();
2073        let result = binder.bind(&plan);
2074        assert!(
2075            result.is_ok(),
2076            "Anonymous variables should bypass validation"
2077        );
2078    }
2079
2080    #[test]
2081    fn test_map_expression_validates_values() {
2082        let plan = LogicalPlan::new(LogicalOperator::Return(ReturnOp {
2083            items: vec![ReturnItem {
2084                expression: LogicalExpression::Map(vec![(
2085                    "key".to_string(),
2086                    LogicalExpression::Variable("undefined".to_string()),
2087                )]),
2088                alias: None,
2089            }],
2090            distinct: false,
2091            input: Box::new(LogicalOperator::Empty),
2092        }));
2093
2094        let mut binder = Binder::new();
2095        assert!(
2096            binder.bind(&plan).is_err(),
2097            "Map values should be validated"
2098        );
2099    }
2100
2101    #[test]
2102    fn test_vector_scan_validates_query_vector() {
2103        use crate::query::plan::VectorScanOp;
2104
2105        let plan = LogicalPlan::new(LogicalOperator::VectorScan(VectorScanOp {
2106            variable: "result".to_string(),
2107            index_name: None,
2108            property: "embedding".to_string(),
2109            label: Some("Doc".to_string()),
2110            query_vector: LogicalExpression::Variable("undefined_vec".to_string()),
2111            k: 10,
2112            metric: None,
2113            min_similarity: None,
2114            max_distance: None,
2115            input: None,
2116        }));
2117
2118        let mut binder = Binder::new();
2119        let err = binder.bind(&plan).unwrap_err();
2120        assert!(err.to_string().contains("undefined_vec"));
2121    }
2122}