Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::PredicateAnalyzer;
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig,
14    ScalarIndexConfig, ScalarIndexType, Schema, TokenizerConfig, VectorIndexConfig,
15    VectorIndexType,
16};
17use uni_cypher::ast::{
18    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
19    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
20    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
21    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
22    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
23    WithRecursiveClause,
24};
25
26/// Type of variable in scope for semantic validation.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum VariableType {
29    /// Node variable (from MATCH (n), CREATE (n), etc.)
30    Node,
31    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
32    Edge,
33    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
34    Path,
35    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
36    /// Could hold a map or dynamic value — property access is allowed.
37    Scalar,
38    /// Scalar from a known non-graph literal (int, float, bool, string, list).
39    /// Property access is NOT allowed on these at compile time.
40    ScalarLiteral,
41    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
42    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
43    Imported,
44}
45
46impl VariableType {
47    /// Returns true if this type is compatible with the expected type.
48    ///
49    /// `Imported` is always compatible because the actual type is unknown at plan time.
50    fn is_compatible_with(self, expected: VariableType) -> bool {
51        self == expected
52            || self == VariableType::Imported
53            // ScalarLiteral behaves like Scalar for compatibility checks
54            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
55    }
56}
57
58/// Information about a variable in scope during planning.
59#[derive(Debug, Clone)]
60pub struct VariableInfo {
61    /// Variable name as written in the query.
62    pub name: String,
63    /// Semantic type of the variable.
64    pub var_type: VariableType,
65    /// True if this is a variable-length path (VLP) step variable.
66    ///
67    /// VLP step variables are typed as Edge but semantically hold edge lists.
68    pub is_vlp: bool,
69}
70
71impl VariableInfo {
72    pub fn new(name: String, var_type: VariableType) -> Self {
73        Self {
74            name,
75            var_type,
76            is_vlp: false,
77        }
78    }
79}
80
81/// Find a variable in scope by name.
82fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
83    vars.iter().find(|v| v.name == name)
84}
85
86/// Check if a variable is in scope.
87fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
88    find_var_in_scope(vars, name).is_some()
89}
90
91/// Check if an expression contains a pattern predicate.
92fn contains_pattern_predicate(expr: &Expr) -> bool {
93    if matches!(
94        expr,
95        Expr::Exists {
96            from_pattern_predicate: true,
97            ..
98        }
99    ) {
100        return true;
101    }
102    let mut found = false;
103    expr.for_each_child(&mut |child| {
104        if !found {
105            found = contains_pattern_predicate(child);
106        }
107    });
108    found
109}
110
111/// Add a variable to scope with type conflict validation.
112/// Returns an error if the variable already exists with a different type.
113fn add_var_to_scope(
114    vars: &mut Vec<VariableInfo>,
115    name: &str,
116    var_type: VariableType,
117) -> Result<()> {
118    if name.is_empty() {
119        return Ok(());
120    }
121
122    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
123        if existing.var_type == VariableType::Imported {
124            // Imported vars upgrade to the concrete type
125            existing.var_type = var_type;
126        } else if var_type == VariableType::Imported || existing.var_type == var_type {
127            // New type is Imported (keep existing) or same type — no conflict
128        } else if matches!(
129            existing.var_type,
130            VariableType::Scalar | VariableType::ScalarLiteral
131        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
132        {
133            // Scalar can be used as Node/Edge in CREATE context — a scalar
134            // holding a node/edge reference is valid for pattern use
135            existing.var_type = var_type;
136        } else {
137            return Err(anyhow!(
138                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
139                name,
140                existing.var_type,
141                var_type
142            ));
143        }
144    } else {
145        vars.push(VariableInfo::new(name.to_string(), var_type));
146    }
147    Ok(())
148}
149
150/// Convert VariableInfo vec to String vec for backward compatibility
151fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
152    vars.iter().map(|v| v.name.clone()).collect()
153}
154
155fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
156    match expr {
157        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
158            .map(|info| info.var_type)
159            .unwrap_or(VariableType::Scalar),
160        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
161        // Known non-graph literals: property access is NOT valid on these.
162        Expr::Literal(CypherLiteral::Integer(_))
163        | Expr::Literal(CypherLiteral::Float(_))
164        | Expr::Literal(CypherLiteral::String(_))
165        | Expr::Literal(CypherLiteral::Bool(_))
166        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
167        Expr::FunctionCall { name, args, .. } => {
168            let lower = name.to_lowercase();
169            if lower == "coalesce" {
170                infer_coalesce_type(args, vars_in_scope)
171            } else if lower == "collect" && !args.is_empty() {
172                let collected = infer_with_output_type(&args[0], vars_in_scope);
173                if matches!(
174                    collected,
175                    VariableType::Node
176                        | VariableType::Edge
177                        | VariableType::Path
178                        | VariableType::Imported
179                ) {
180                    collected
181                } else {
182                    VariableType::Scalar
183                }
184            } else {
185                VariableType::Scalar
186            }
187        }
188        // WITH list literals/expressions produce scalar list values. Preserving
189        // entity typing here causes invalid node/edge reuse in later MATCH clauses
190        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
191        // Lists are ScalarLiteral since property access is not valid on them.
192        Expr::List(_) => VariableType::ScalarLiteral,
193        _ => VariableType::Scalar,
194    }
195}
196
197fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
198    let mut resolved: Option<VariableType> = None;
199    let mut saw_imported = false;
200    for arg in args {
201        let t = infer_with_output_type(arg, vars_in_scope);
202        match t {
203            VariableType::Node | VariableType::Edge | VariableType::Path => {
204                if let Some(existing) = resolved {
205                    if existing != t {
206                        return VariableType::Scalar;
207                    }
208                } else {
209                    resolved = Some(t);
210                }
211            }
212            VariableType::Imported => saw_imported = true,
213            VariableType::Scalar | VariableType::ScalarLiteral => {}
214        }
215    }
216    if let Some(t) = resolved {
217        t
218    } else if saw_imported {
219        VariableType::Imported
220    } else {
221        VariableType::Scalar
222    }
223}
224
225fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
226    match expr {
227        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
228            .map(|info| info.var_type)
229            .unwrap_or(VariableType::Scalar),
230        Expr::FunctionCall { name, args, .. }
231            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
232        {
233            infer_with_output_type(&args[0], vars_in_scope)
234        }
235        Expr::List(items) => {
236            let mut inferred: Option<VariableType> = None;
237            for item in items {
238                let t = infer_with_output_type(item, vars_in_scope);
239                if !matches!(
240                    t,
241                    VariableType::Node
242                        | VariableType::Edge
243                        | VariableType::Path
244                        | VariableType::Imported
245                ) {
246                    return VariableType::Scalar;
247                }
248                if let Some(existing) = inferred {
249                    if existing != t
250                        && t != VariableType::Imported
251                        && existing != VariableType::Imported
252                    {
253                        return VariableType::Scalar;
254                    }
255                    if existing == VariableType::Imported && t != VariableType::Imported {
256                        inferred = Some(t);
257                    }
258                } else {
259                    inferred = Some(t);
260                }
261            }
262            inferred.unwrap_or(VariableType::Scalar)
263        }
264        _ => VariableType::Scalar,
265    }
266}
267
268/// Collect all variable names referenced in an expression
269fn collect_expr_variables(expr: &Expr) -> Vec<String> {
270    let mut vars = Vec::new();
271    collect_expr_variables_inner(expr, &mut vars);
272    vars
273}
274
275fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
276    let mut add_var = |name: &String| {
277        if !vars.contains(name) {
278            vars.push(name.clone());
279        }
280    };
281
282    match expr {
283        Expr::Variable(name) => add_var(name),
284        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
285        Expr::BinaryOp { left, right, .. } => {
286            collect_expr_variables_inner(left, vars);
287            collect_expr_variables_inner(right, vars);
288        }
289        Expr::UnaryOp { expr: e, .. }
290        | Expr::IsNull(e)
291        | Expr::IsNotNull(e)
292        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
293        Expr::FunctionCall { args, .. } => {
294            for a in args {
295                collect_expr_variables_inner(a, vars);
296            }
297        }
298        Expr::List(items) => {
299            for item in items {
300                collect_expr_variables_inner(item, vars);
301            }
302        }
303        Expr::In { expr: e, list } => {
304            collect_expr_variables_inner(e, vars);
305            collect_expr_variables_inner(list, vars);
306        }
307        Expr::Case {
308            expr: case_expr,
309            when_then,
310            else_expr,
311        } => {
312            if let Some(e) = case_expr {
313                collect_expr_variables_inner(e, vars);
314            }
315            for (w, t) in when_then {
316                collect_expr_variables_inner(w, vars);
317                collect_expr_variables_inner(t, vars);
318            }
319            if let Some(e) = else_expr {
320                collect_expr_variables_inner(e, vars);
321            }
322        }
323        Expr::Map(entries) => {
324            for (_, v) in entries {
325                collect_expr_variables_inner(v, vars);
326            }
327        }
328        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
329        Expr::ArrayIndex { array, index } => {
330            collect_expr_variables_inner(array, vars);
331            collect_expr_variables_inner(index, vars);
332        }
333        Expr::ArraySlice { array, start, end } => {
334            collect_expr_variables_inner(array, vars);
335            if let Some(s) = start {
336                collect_expr_variables_inner(s, vars);
337            }
338            if let Some(e) = end {
339                collect_expr_variables_inner(e, vars);
340            }
341        }
342        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
343        // they introduce local variable bindings not in outer scope.
344        _ => {}
345    }
346}
347
348/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
349///
350/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
351/// before the final RETURN projection without losing alias semantics.
352fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
353    let repr = expr.to_string_repr();
354    if let Some(rewritten) = aliases.get(&repr) {
355        return rewritten.clone();
356    }
357
358    match expr {
359        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
360        Expr::Property(base, prop) => Expr::Property(
361            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
362            prop.clone(),
363        ),
364        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
365            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
366            op: *op,
367            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
368        },
369        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
370            op: *op,
371            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
372        },
373        Expr::FunctionCall {
374            name,
375            args,
376            distinct,
377            window_spec,
378        } => Expr::FunctionCall {
379            name: name.clone(),
380            args: args
381                .iter()
382                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
383                .collect(),
384            distinct: *distinct,
385            window_spec: window_spec.clone(),
386        },
387        Expr::List(items) => Expr::List(
388            items
389                .iter()
390                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
391                .collect(),
392        ),
393        Expr::Map(entries) => Expr::Map(
394            entries
395                .iter()
396                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
397                .collect(),
398        ),
399        Expr::Case {
400            expr: case_expr,
401            when_then,
402            else_expr,
403        } => Expr::Case {
404            expr: case_expr
405                .as_ref()
406                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
407            when_then: when_then
408                .iter()
409                .map(|(w, t)| {
410                    (
411                        rewrite_order_by_expr_with_aliases(w, aliases),
412                        rewrite_order_by_expr_with_aliases(t, aliases),
413                    )
414                })
415                .collect(),
416            else_expr: else_expr
417                .as_ref()
418                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
419        },
420        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
421        // they introduce local variable bindings that could shadow aliases.
422        _ => expr.clone(),
423    }
424}
425
426/// Validate function call argument types.
427/// Returns error if type constraints are violated.
428fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
429    let name_lower = name.to_lowercase();
430
431    // labels() requires Node
432    if name_lower == "labels"
433        && let Some(Expr::Variable(var_name)) = args.first()
434        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
435        && !info.var_type.is_compatible_with(VariableType::Node)
436    {
437        return Err(anyhow!(
438            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
439        ));
440    }
441
442    // type() requires Edge
443    if name_lower == "type"
444        && let Some(Expr::Variable(var_name)) = args.first()
445        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
446        && !info.var_type.is_compatible_with(VariableType::Edge)
447    {
448        return Err(anyhow!(
449            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
450        ));
451    }
452
453    // properties() requires Node/Edge/Map (not scalar literals)
454    if name_lower == "properties"
455        && let Some(arg) = args.first()
456    {
457        match arg {
458            Expr::Literal(CypherLiteral::Integer(_))
459            | Expr::Literal(CypherLiteral::Float(_))
460            | Expr::Literal(CypherLiteral::String(_))
461            | Expr::Literal(CypherLiteral::Bool(_))
462            | Expr::List(_) => {
463                return Err(anyhow!(
464                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
465                ));
466            }
467            Expr::Variable(var_name) => {
468                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
469                    && matches!(
470                        info.var_type,
471                        VariableType::Scalar | VariableType::ScalarLiteral
472                    )
473                {
474                    return Err(anyhow!(
475                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
476                    ));
477                }
478            }
479            _ => {}
480        }
481    }
482
483    // nodes()/relationships() require Path
484    if (name_lower == "nodes" || name_lower == "relationships")
485        && let Some(Expr::Variable(var_name)) = args.first()
486        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
487        && !info.var_type.is_compatible_with(VariableType::Path)
488    {
489        return Err(anyhow!(
490            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
491            name_lower
492        ));
493    }
494
495    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
496    if name_lower == "size"
497        && let Some(Expr::Variable(var_name)) = args.first()
498        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
499        && info.var_type == VariableType::Path
500    {
501        return Err(anyhow!(
502            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
503        ));
504    }
505
506    // length()/size() do NOT accept Node or single-Edge arguments.
507    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
508    // but are actually edge lists — size()/length() is valid on those.
509    if (name_lower == "length" || name_lower == "size")
510        && let Some(Expr::Variable(var_name)) = args.first()
511        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
512        && (info.var_type == VariableType::Node
513            || (info.var_type == VariableType::Edge && !info.is_vlp))
514    {
515        return Err(anyhow!(
516            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
517            name_lower
518        ));
519    }
520
521    Ok(())
522}
523
524/// Check if an expression is a non-boolean literal.
525fn is_non_boolean_literal(expr: &Expr) -> bool {
526    matches!(
527        expr,
528        Expr::Literal(CypherLiteral::Integer(_))
529            | Expr::Literal(CypherLiteral::Float(_))
530            | Expr::Literal(CypherLiteral::String(_))
531            | Expr::List(_)
532            | Expr::Map(_)
533    )
534}
535
536/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
537fn validate_boolean_expression(expr: &Expr) -> Result<()> {
538    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
539    if let Expr::BinaryOp { left, op, right } = expr
540        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
541    {
542        let op_name = format!("{op:?}").to_uppercase();
543        for operand in [left.as_ref(), right.as_ref()] {
544            if is_non_boolean_literal(operand) {
545                return Err(anyhow!(
546                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
547                    op_name
548                ));
549            }
550        }
551    }
552    if let Expr::UnaryOp {
553        op: uni_cypher::ast::UnaryOp::Not,
554        expr: inner,
555    } = expr
556        && is_non_boolean_literal(inner)
557    {
558        return Err(anyhow!(
559            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
560        ));
561    }
562    let mut result = Ok(());
563    expr.for_each_child(&mut |child| {
564        if result.is_ok() {
565            result = validate_boolean_expression(child);
566        }
567    });
568    result
569}
570
571/// Validate that all variables used in an expression are in scope.
572fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
573    let used_vars = collect_expr_variables(expr);
574    for var in used_vars {
575        if !is_var_in_scope(vars_in_scope, &var) {
576            return Err(anyhow!(
577                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
578                var
579            ));
580        }
581    }
582    Ok(())
583}
584
585/// Check if a function name (lowercase) is an aggregate function.
586fn is_aggregate_function_name(name: &str) -> bool {
587    matches!(
588        name.to_lowercase().as_str(),
589        "count"
590            | "sum"
591            | "avg"
592            | "min"
593            | "max"
594            | "collect"
595            | "stdev"
596            | "stdevp"
597            | "percentiledisc"
598            | "percentilecont"
599    )
600}
601
602/// Returns true if the expression is a window function (FunctionCall with window_spec).
603fn is_window_function(expr: &Expr) -> bool {
604    matches!(
605        expr,
606        Expr::FunctionCall {
607            window_spec: Some(_),
608            ..
609        }
610    )
611}
612
613/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
614/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
615/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
616/// size() call, BinaryOp, etc.).
617fn is_compound_aggregate(expr: &Expr) -> bool {
618    if !expr.is_aggregate() {
619        return false;
620    }
621    match expr {
622        Expr::FunctionCall {
623            name, window_spec, ..
624        } => {
625            // A bare aggregate FunctionCall is NOT compound
626            if window_spec.is_some() {
627                return true; // window wrapping an aggregate — treat as compound
628            }
629            !is_aggregate_function_name(name)
630        }
631        // Subquery aggregates are "bare" (not compound)
632        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
633        // Everything else (ListComprehension, BinaryOp, etc.) is compound
634        _ => true,
635    }
636}
637
638/// Recursively collect all bare aggregate FunctionCall sub-expressions from
639/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
640/// the outermost aggregate boundaries).
641///
642/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
643/// is searched because the body (`map_expr`, `predicate`, `expr`) references
644/// the loop variable, not outer-scope aggregates.
645fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
646    let mut out = Vec::new();
647    extract_inner_aggregates_rec(expr, &mut out);
648    out
649}
650
651fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
652    match expr {
653        Expr::FunctionCall {
654            name, window_spec, ..
655        } if window_spec.is_none() && is_aggregate_function_name(name) => {
656            // Found a bare aggregate — collect it and stop recursing
657            out.push(expr.clone());
658        }
659        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
660            out.push(expr.clone());
661        }
662        // For list comprehension, only search the `list` source for aggregates
663        Expr::ListComprehension { list, .. } => {
664            extract_inner_aggregates_rec(list, out);
665        }
666        // For quantifier, only search the `list` source
667        Expr::Quantifier { list, .. } => {
668            extract_inner_aggregates_rec(list, out);
669        }
670        // For reduce, search `init` and `list` (not the body `expr`)
671        Expr::Reduce { init, list, .. } => {
672            extract_inner_aggregates_rec(init, out);
673            extract_inner_aggregates_rec(list, out);
674        }
675        // Standard recursive cases
676        Expr::FunctionCall { args, .. } => {
677            for arg in args {
678                extract_inner_aggregates_rec(arg, out);
679            }
680        }
681        Expr::BinaryOp { left, right, .. } => {
682            extract_inner_aggregates_rec(left, out);
683            extract_inner_aggregates_rec(right, out);
684        }
685        Expr::UnaryOp { expr: e, .. }
686        | Expr::IsNull(e)
687        | Expr::IsNotNull(e)
688        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
689        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
690        Expr::List(items) => {
691            for item in items {
692                extract_inner_aggregates_rec(item, out);
693            }
694        }
695        Expr::Case {
696            expr: case_expr,
697            when_then,
698            else_expr,
699        } => {
700            if let Some(e) = case_expr {
701                extract_inner_aggregates_rec(e, out);
702            }
703            for (w, t) in when_then {
704                extract_inner_aggregates_rec(w, out);
705                extract_inner_aggregates_rec(t, out);
706            }
707            if let Some(e) = else_expr {
708                extract_inner_aggregates_rec(e, out);
709            }
710        }
711        Expr::In {
712            expr: in_expr,
713            list,
714        } => {
715            extract_inner_aggregates_rec(in_expr, out);
716            extract_inner_aggregates_rec(list, out);
717        }
718        Expr::ArrayIndex { array, index } => {
719            extract_inner_aggregates_rec(array, out);
720            extract_inner_aggregates_rec(index, out);
721        }
722        Expr::ArraySlice { array, start, end } => {
723            extract_inner_aggregates_rec(array, out);
724            if let Some(s) = start {
725                extract_inner_aggregates_rec(s, out);
726            }
727            if let Some(e) = end {
728                extract_inner_aggregates_rec(e, out);
729            }
730        }
731        Expr::Map(entries) => {
732            for (_, v) in entries {
733                extract_inner_aggregates_rec(v, out);
734            }
735        }
736        _ => {}
737    }
738}
739
740/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
741/// `Expr::Variable(aggregate_column_name(agg))`.
742///
743/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
744/// rewritten (the body references the loop variable, not outer-scope columns).
745fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
746    match expr {
747        Expr::FunctionCall {
748            name, window_spec, ..
749        } if window_spec.is_none() && is_aggregate_function_name(name) => {
750            // Replace bare aggregate with column reference
751            Expr::Variable(aggregate_column_name(expr))
752        }
753        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
754            Expr::Variable(aggregate_column_name(expr))
755        }
756        Expr::ListComprehension {
757            variable,
758            list,
759            where_clause,
760            map_expr,
761        } => Expr::ListComprehension {
762            variable: variable.clone(),
763            list: Box::new(replace_aggregates_with_columns(list)),
764            where_clause: where_clause.clone(), // don't touch — references loop var
765            map_expr: map_expr.clone(),         // don't touch — references loop var
766        },
767        Expr::Quantifier {
768            quantifier,
769            variable,
770            list,
771            predicate,
772        } => Expr::Quantifier {
773            quantifier: *quantifier,
774            variable: variable.clone(),
775            list: Box::new(replace_aggregates_with_columns(list)),
776            predicate: predicate.clone(), // don't touch — references loop var
777        },
778        Expr::Reduce {
779            accumulator,
780            init,
781            variable,
782            list,
783            expr: body,
784        } => Expr::Reduce {
785            accumulator: accumulator.clone(),
786            init: Box::new(replace_aggregates_with_columns(init)),
787            variable: variable.clone(),
788            list: Box::new(replace_aggregates_with_columns(list)),
789            expr: body.clone(), // don't touch — references loop var
790        },
791        Expr::FunctionCall {
792            name,
793            args,
794            distinct,
795            window_spec,
796        } => Expr::FunctionCall {
797            name: name.clone(),
798            args: args.iter().map(replace_aggregates_with_columns).collect(),
799            distinct: *distinct,
800            window_spec: window_spec.clone(),
801        },
802        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
803            left: Box::new(replace_aggregates_with_columns(left)),
804            op: *op,
805            right: Box::new(replace_aggregates_with_columns(right)),
806        },
807        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
808            op: *op,
809            expr: Box::new(replace_aggregates_with_columns(e)),
810        },
811        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
812        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
813        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
814        Expr::Property(base, prop) => Expr::Property(
815            Box::new(replace_aggregates_with_columns(base)),
816            prop.clone(),
817        ),
818        Expr::List(items) => {
819            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
820        }
821        Expr::Case {
822            expr: case_expr,
823            when_then,
824            else_expr,
825        } => Expr::Case {
826            expr: case_expr
827                .as_ref()
828                .map(|e| Box::new(replace_aggregates_with_columns(e))),
829            when_then: when_then
830                .iter()
831                .map(|(w, t)| {
832                    (
833                        replace_aggregates_with_columns(w),
834                        replace_aggregates_with_columns(t),
835                    )
836                })
837                .collect(),
838            else_expr: else_expr
839                .as_ref()
840                .map(|e| Box::new(replace_aggregates_with_columns(e))),
841        },
842        Expr::In {
843            expr: in_expr,
844            list,
845        } => Expr::In {
846            expr: Box::new(replace_aggregates_with_columns(in_expr)),
847            list: Box::new(replace_aggregates_with_columns(list)),
848        },
849        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
850            array: Box::new(replace_aggregates_with_columns(array)),
851            index: Box::new(replace_aggregates_with_columns(index)),
852        },
853        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
854            array: Box::new(replace_aggregates_with_columns(array)),
855            start: start
856                .as_ref()
857                .map(|e| Box::new(replace_aggregates_with_columns(e))),
858            end: end
859                .as_ref()
860                .map(|e| Box::new(replace_aggregates_with_columns(e))),
861        },
862        Expr::Map(entries) => Expr::Map(
863            entries
864                .iter()
865                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
866                .collect(),
867        ),
868        // Leaf expressions — return as-is
869        other => other.clone(),
870    }
871}
872
873/// Check if an expression contains any aggregate function (recursively).
874fn contains_aggregate_recursive(expr: &Expr) -> bool {
875    match expr {
876        Expr::FunctionCall { name, args, .. } => {
877            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
878        }
879        Expr::BinaryOp { left, right, .. } => {
880            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
881        }
882        Expr::UnaryOp { expr: e, .. }
883        | Expr::IsNull(e)
884        | Expr::IsNotNull(e)
885        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
886        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
887        Expr::Case {
888            expr,
889            when_then,
890            else_expr,
891        } => {
892            expr.as_deref().is_some_and(contains_aggregate_recursive)
893                || when_then.iter().any(|(w, t)| {
894                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
895                })
896                || else_expr
897                    .as_deref()
898                    .is_some_and(contains_aggregate_recursive)
899        }
900        Expr::In { expr, list } => {
901            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
902        }
903        Expr::Property(base, _) => contains_aggregate_recursive(base),
904        Expr::ListComprehension { list, .. } => {
905            // Only check the list source — where_clause/map_expr reference the loop variable
906            contains_aggregate_recursive(list)
907        }
908        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
909        Expr::Reduce { init, list, .. } => {
910            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
911        }
912        Expr::ArrayIndex { array, index } => {
913            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
914        }
915        Expr::ArraySlice { array, start, end } => {
916            contains_aggregate_recursive(array)
917                || start.as_deref().is_some_and(contains_aggregate_recursive)
918                || end.as_deref().is_some_and(contains_aggregate_recursive)
919        }
920        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
921        _ => false,
922    }
923}
924
925/// Check if an expression contains a non-deterministic function (e.g. rand()).
926fn contains_non_deterministic(expr: &Expr) -> bool {
927    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
928        return true;
929    }
930    let mut found = false;
931    expr.for_each_child(&mut |child| {
932        if !found {
933            found = contains_non_deterministic(child);
934        }
935    });
936    found
937}
938
939fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
940    match expr {
941        Expr::FunctionCall { name, args, .. } => {
942            if is_aggregate_function_name(name) {
943                out.insert(expr.to_string_repr());
944                return;
945            }
946            for arg in args {
947                collect_aggregate_reprs(arg, out);
948            }
949        }
950        Expr::BinaryOp { left, right, .. } => {
951            collect_aggregate_reprs(left, out);
952            collect_aggregate_reprs(right, out);
953        }
954        Expr::UnaryOp { expr, .. }
955        | Expr::IsNull(expr)
956        | Expr::IsNotNull(expr)
957        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
958        Expr::List(items) => {
959            for item in items {
960                collect_aggregate_reprs(item, out);
961            }
962        }
963        Expr::Case {
964            expr,
965            when_then,
966            else_expr,
967        } => {
968            if let Some(e) = expr {
969                collect_aggregate_reprs(e, out);
970            }
971            for (w, t) in when_then {
972                collect_aggregate_reprs(w, out);
973                collect_aggregate_reprs(t, out);
974            }
975            if let Some(e) = else_expr {
976                collect_aggregate_reprs(e, out);
977            }
978        }
979        Expr::In { expr, list } => {
980            collect_aggregate_reprs(expr, out);
981            collect_aggregate_reprs(list, out);
982        }
983        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
984        Expr::ListComprehension { list, .. } => {
985            collect_aggregate_reprs(list, out);
986        }
987        Expr::Quantifier { list, .. } => {
988            collect_aggregate_reprs(list, out);
989        }
990        Expr::Reduce { init, list, .. } => {
991            collect_aggregate_reprs(init, out);
992            collect_aggregate_reprs(list, out);
993        }
994        Expr::ArrayIndex { array, index } => {
995            collect_aggregate_reprs(array, out);
996            collect_aggregate_reprs(index, out);
997        }
998        Expr::ArraySlice { array, start, end } => {
999            collect_aggregate_reprs(array, out);
1000            if let Some(s) = start {
1001                collect_aggregate_reprs(s, out);
1002            }
1003            if let Some(e) = end {
1004                collect_aggregate_reprs(e, out);
1005            }
1006        }
1007        _ => {}
1008    }
1009}
1010
1011#[derive(Debug, Clone)]
1012enum NonAggregateRef {
1013    Var(String),
1014    Property {
1015        repr: String,
1016        base_var: Option<String>,
1017    },
1018}
1019
1020fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1021    match expr {
1022        Expr::FunctionCall { name, args, .. } => {
1023            if is_aggregate_function_name(name) {
1024                return;
1025            }
1026            for arg in args {
1027                collect_non_aggregate_refs(arg, inside_agg, out);
1028            }
1029        }
1030        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1031        Expr::Property(base, _) if !inside_agg => {
1032            let base_var = if let Expr::Variable(v) = base.as_ref() {
1033                Some(v.clone())
1034            } else {
1035                None
1036            };
1037            out.push(NonAggregateRef::Property {
1038                repr: expr.to_string_repr(),
1039                base_var,
1040            });
1041        }
1042        Expr::BinaryOp { left, right, .. } => {
1043            collect_non_aggregate_refs(left, inside_agg, out);
1044            collect_non_aggregate_refs(right, inside_agg, out);
1045        }
1046        Expr::UnaryOp { expr, .. }
1047        | Expr::IsNull(expr)
1048        | Expr::IsNotNull(expr)
1049        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1050        Expr::List(items) => {
1051            for item in items {
1052                collect_non_aggregate_refs(item, inside_agg, out);
1053            }
1054        }
1055        Expr::Case {
1056            expr,
1057            when_then,
1058            else_expr,
1059        } => {
1060            if let Some(e) = expr {
1061                collect_non_aggregate_refs(e, inside_agg, out);
1062            }
1063            for (w, t) in when_then {
1064                collect_non_aggregate_refs(w, inside_agg, out);
1065                collect_non_aggregate_refs(t, inside_agg, out);
1066            }
1067            if let Some(e) = else_expr {
1068                collect_non_aggregate_refs(e, inside_agg, out);
1069            }
1070        }
1071        Expr::In { expr, list } => {
1072            collect_non_aggregate_refs(expr, inside_agg, out);
1073            collect_non_aggregate_refs(list, inside_agg, out);
1074        }
1075        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1076        // source. The body references the loop variable, not outer-scope vars.
1077        Expr::ListComprehension { list, .. } => {
1078            collect_non_aggregate_refs(list, inside_agg, out);
1079        }
1080        Expr::Quantifier { list, .. } => {
1081            collect_non_aggregate_refs(list, inside_agg, out);
1082        }
1083        Expr::Reduce { init, list, .. } => {
1084            collect_non_aggregate_refs(init, inside_agg, out);
1085            collect_non_aggregate_refs(list, inside_agg, out);
1086        }
1087        _ => {}
1088    }
1089}
1090
1091fn validate_with_order_by_aggregate_item(
1092    expr: &Expr,
1093    projected_aggregate_reprs: &HashSet<String>,
1094    projected_simple_reprs: &HashSet<String>,
1095    projected_aliases: &HashSet<String>,
1096) -> Result<()> {
1097    let mut aggregate_reprs = HashSet::new();
1098    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1099    for agg in aggregate_reprs {
1100        if !projected_aggregate_reprs.contains(&agg) {
1101            return Err(anyhow!(
1102                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1103                agg
1104            ));
1105        }
1106    }
1107
1108    let mut refs = Vec::new();
1109    collect_non_aggregate_refs(expr, false, &mut refs);
1110    refs.retain(|r| match r {
1111        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1112        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1113    });
1114
1115    let mut dedup = HashSet::new();
1116    refs.retain(|r| {
1117        let key = match r {
1118            NonAggregateRef::Var(v) => format!("v:{v}"),
1119            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1120        };
1121        dedup.insert(key)
1122    });
1123
1124    if refs.len() > 1 {
1125        return Err(anyhow!(
1126            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1127        ));
1128    }
1129
1130    if let Some(r) = refs.first() {
1131        return match r {
1132            NonAggregateRef::Var(v) => Err(anyhow!(
1133                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1134                v
1135            )),
1136            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1137                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1138                base_var
1139                    .clone()
1140                    .unwrap_or_else(|| "<property-base>".to_string())
1141            )),
1142        };
1143    }
1144
1145    Ok(())
1146}
1147
1148/// Validate that no aggregation functions appear in WHERE clause.
1149fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1150    if contains_aggregate_recursive(predicate) {
1151        return Err(anyhow!(
1152            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1153        ));
1154    }
1155    Ok(())
1156}
1157
1158#[derive(Debug, Clone, Copy)]
1159enum ConstNumber {
1160    Int(i64),
1161    Float(f64),
1162}
1163
1164impl ConstNumber {
1165    fn to_f64(self) -> f64 {
1166        match self {
1167            Self::Int(v) => v as f64,
1168            Self::Float(v) => v,
1169        }
1170    }
1171}
1172
1173fn eval_const_numeric_expr(
1174    expr: &Expr,
1175    params: &HashMap<String, uni_common::Value>,
1176) -> Result<ConstNumber> {
1177    match expr {
1178        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1179        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1180        Expr::Parameter(name) => match params.get(name) {
1181            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1182            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1183            Some(uni_common::Value::Null) => Err(anyhow!(
1184                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1185                name
1186            )),
1187            Some(other) => Err(anyhow!(
1188                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1189                name,
1190                other
1191            )),
1192            None => Err(anyhow!(
1193                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1194            )),
1195        },
1196        Expr::UnaryOp {
1197            op: uni_cypher::ast::UnaryOp::Neg,
1198            expr,
1199        } => match eval_const_numeric_expr(expr, params)? {
1200            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1201            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1202        },
1203        Expr::BinaryOp { left, op, right } => {
1204            let l = eval_const_numeric_expr(left, params)?;
1205            let r = eval_const_numeric_expr(right, params)?;
1206            match op {
1207                BinaryOp::Add => match (l, r) {
1208                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1209                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1210                },
1211                BinaryOp::Sub => match (l, r) {
1212                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1213                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1214                },
1215                BinaryOp::Mul => match (l, r) {
1216                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1217                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1218                },
1219                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1220                BinaryOp::Mod => match (l, r) {
1221                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1222                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1223                },
1224                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1225                _ => Err(anyhow!(
1226                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1227                )),
1228            }
1229        }
1230        Expr::FunctionCall { name, args, .. } => {
1231            let lower = name.to_lowercase();
1232            match lower.as_str() {
1233                "rand" if args.is_empty() => {
1234                    use rand::Rng;
1235                    let mut rng = rand::thread_rng();
1236                    Ok(ConstNumber::Float(rng.r#gen::<f64>()))
1237                }
1238                "tointeger" | "toint" if args.len() == 1 => {
1239                    match eval_const_numeric_expr(&args[0], params)? {
1240                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1241                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1242                    }
1243                }
1244                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1245                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1246                )),
1247                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1248                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1249                )),
1250                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1251                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1252                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1253                },
1254                _ => Err(anyhow!(
1255                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1256                )),
1257            }
1258        }
1259        _ => Err(anyhow!(
1260            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1261        )),
1262    }
1263}
1264
1265/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1266/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1267fn parse_non_negative_integer(
1268    expr: &Expr,
1269    clause_name: &str,
1270    params: &HashMap<String, uni_common::Value>,
1271) -> Result<Option<usize>> {
1272    let referenced_vars = collect_expr_variables(expr);
1273    if !referenced_vars.is_empty() {
1274        return Err(anyhow!(
1275            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1276            clause_name
1277        ));
1278    }
1279
1280    let value = eval_const_numeric_expr(expr, params)?;
1281    let as_int = match value {
1282        ConstNumber::Int(v) => v,
1283        ConstNumber::Float(v) => {
1284            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1285                return Err(anyhow!(
1286                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1287                    clause_name
1288                ));
1289            }
1290            v as i64
1291        }
1292    };
1293    if as_int < 0 {
1294        return Err(anyhow!(
1295            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1296            clause_name
1297        ));
1298    }
1299    Ok(Some(as_int as usize))
1300}
1301
1302/// Validate that aggregation functions are not nested.
1303fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1304    if let Expr::FunctionCall { name, args, .. } = expr
1305        && is_aggregate_function_name(name)
1306    {
1307        for arg in args {
1308            if contains_aggregate_recursive(arg) {
1309                return Err(anyhow!(
1310                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1311                ));
1312            }
1313            if contains_non_deterministic(arg) {
1314                return Err(anyhow!(
1315                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1316                ));
1317            }
1318        }
1319    }
1320    let mut result = Ok(());
1321    expr.for_each_child(&mut |child| {
1322        if result.is_ok() {
1323            result = validate_no_nested_aggregation(child);
1324        }
1325    });
1326    result
1327}
1328
1329/// Validate that an expression does not access properties or labels of
1330/// deleted entities. `type(r)` on a deleted relationship is allowed per
1331/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1332fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1333    // Check n.prop on a deleted variable
1334    if let Expr::Property(inner, _) = expr
1335        && let Expr::Variable(name) = inner.as_ref()
1336        && deleted_vars.contains(name)
1337    {
1338        return Err(anyhow!(
1339            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1340            name
1341        ));
1342    }
1343    // Check labels(n) or keys(n) on a deleted variable
1344    if let Expr::FunctionCall { name, args, .. } = expr
1345        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1346        && args.len() == 1
1347        && let Expr::Variable(var) = &args[0]
1348        && deleted_vars.contains(var)
1349    {
1350        return Err(anyhow!(
1351            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1352            name.to_lowercase(),
1353            var
1354        ));
1355    }
1356    let mut result = Ok(());
1357    expr.for_each_child(&mut |child| {
1358        if result.is_ok() {
1359            result = validate_no_deleted_entity_access(child, deleted_vars);
1360        }
1361    });
1362    result
1363}
1364
1365/// Validate that all variables referenced in properties are defined,
1366/// either in scope or in the local CREATE variable list.
1367fn validate_property_variables(
1368    properties: &Option<Expr>,
1369    vars_in_scope: &[VariableInfo],
1370    create_vars: &[&str],
1371) -> Result<()> {
1372    if let Some(props) = properties {
1373        for var in collect_expr_variables(props) {
1374            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1375                return Err(anyhow!(
1376                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1377                    var
1378                ));
1379            }
1380        }
1381    }
1382    Ok(())
1383}
1384
1385/// Check that a variable name is not already bound in scope or in the local CREATE list.
1386/// Used to prevent rebinding in CREATE clauses.
1387fn check_not_already_bound(
1388    name: &str,
1389    vars_in_scope: &[VariableInfo],
1390    create_vars: &[&str],
1391) -> Result<()> {
1392    if is_var_in_scope(vars_in_scope, name) {
1393        return Err(anyhow!(
1394            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1395            name
1396        ));
1397    }
1398    if create_vars.contains(&name) {
1399        return Err(anyhow!(
1400            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1401            name
1402        ));
1403    }
1404    Ok(())
1405}
1406
1407fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1408    let mut scope = vars_in_scope.to_vec();
1409
1410    for path in &pattern.paths {
1411        if let Some(path_var) = &path.variable
1412            && !path_var.is_empty()
1413            && !is_var_in_scope(&scope, path_var)
1414        {
1415            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1416        }
1417        for element in &path.elements {
1418            match element {
1419                PatternElement::Node(n) => {
1420                    if let Some(v) = &n.variable
1421                        && !v.is_empty()
1422                        && !is_var_in_scope(&scope, v)
1423                    {
1424                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1425                    }
1426                }
1427                PatternElement::Relationship(r) => {
1428                    if let Some(v) = &r.variable
1429                        && !v.is_empty()
1430                        && !is_var_in_scope(&scope, v)
1431                    {
1432                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1433                    }
1434                }
1435                PatternElement::Parenthesized { .. } => {}
1436            }
1437        }
1438    }
1439
1440    scope
1441}
1442
1443fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1444    match item {
1445        SetItem::Property { expr, value } => {
1446            validate_expression_variables(expr, vars_in_scope)?;
1447            validate_expression(expr, vars_in_scope)?;
1448            validate_expression_variables(value, vars_in_scope)?;
1449            validate_expression(value, vars_in_scope)?;
1450            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1451                return Err(anyhow!(
1452                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1453                ));
1454            }
1455        }
1456        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1457            if !is_var_in_scope(vars_in_scope, variable) {
1458                return Err(anyhow!(
1459                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1460                    variable
1461                ));
1462            }
1463            validate_expression_variables(value, vars_in_scope)?;
1464            validate_expression(value, vars_in_scope)?;
1465            if contains_pattern_predicate(value) {
1466                return Err(anyhow!(
1467                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1468                ));
1469            }
1470        }
1471        SetItem::Labels { variable, .. } => {
1472            if !is_var_in_scope(vars_in_scope, variable) {
1473                return Err(anyhow!(
1474                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1475                    variable
1476                ));
1477            }
1478        }
1479    }
1480
1481    Ok(())
1482}
1483
1484/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1485/// The OpenCypher spec requires all property values in MERGE to be non-null.
1486fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1487    if let Some(Expr::Map(entries)) = properties {
1488        for (key, value) in entries {
1489            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1490                return Err(anyhow!(
1491                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1492                    key
1493                ));
1494            }
1495        }
1496    }
1497    Ok(())
1498}
1499
1500fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1501    for path in &merge_clause.pattern.paths {
1502        for element in &path.elements {
1503            match element {
1504                PatternElement::Node(n) => {
1505                    if let Some(Expr::Parameter(_)) = &n.properties {
1506                        return Err(anyhow!(
1507                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1508                        ));
1509                    }
1510                    reject_null_merge_properties(&n.properties)?;
1511                    // VariableAlreadyBound: reject if a bound variable is used
1512                    // as a standalone MERGE node or introduces new labels/properties.
1513                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1514                    if let Some(variable) = &n.variable
1515                        && !variable.is_empty()
1516                        && is_var_in_scope(vars_in_scope, variable)
1517                    {
1518                        let is_standalone = path.elements.len() == 1;
1519                        let has_new_labels = !n.labels.is_empty();
1520                        let has_new_properties = n.properties.is_some();
1521                        if is_standalone || has_new_labels || has_new_properties {
1522                            return Err(anyhow!(
1523                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1524                                variable
1525                            ));
1526                        }
1527                    }
1528                }
1529                PatternElement::Relationship(r) => {
1530                    if let Some(variable) = &r.variable
1531                        && !variable.is_empty()
1532                        && is_var_in_scope(vars_in_scope, variable)
1533                    {
1534                        return Err(anyhow!(
1535                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1536                            variable
1537                        ));
1538                    }
1539                    if r.types.len() != 1 {
1540                        return Err(anyhow!(
1541                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1542                        ));
1543                    }
1544                    if r.range.is_some() {
1545                        return Err(anyhow!(
1546                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1547                        ));
1548                    }
1549                    if let Some(Expr::Parameter(_)) = &r.properties {
1550                        return Err(anyhow!(
1551                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1552                        ));
1553                    }
1554                    reject_null_merge_properties(&r.properties)?;
1555                }
1556                PatternElement::Parenthesized { .. } => {}
1557            }
1558        }
1559    }
1560
1561    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1562    for item in &merge_clause.on_create {
1563        validate_merge_set_item(item, &merge_scope)?;
1564    }
1565    for item in &merge_clause.on_match {
1566        validate_merge_set_item(item, &merge_scope)?;
1567    }
1568
1569    Ok(())
1570}
1571
1572/// Recursively validate an expression for type errors, undefined variables, etc.
1573fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1574    // Validate boolean operators and nested aggregation first
1575    validate_boolean_expression(expr)?;
1576    validate_no_nested_aggregation(expr)?;
1577
1578    // Helper to validate multiple expressions
1579    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1580        for e in exprs {
1581            validate_expression(e, vars)?;
1582        }
1583        Ok(())
1584    }
1585
1586    match expr {
1587        Expr::FunctionCall { name, args, .. } => {
1588            validate_function_call(name, args, vars_in_scope)?;
1589            validate_all(args, vars_in_scope)
1590        }
1591        Expr::BinaryOp { left, right, .. } => {
1592            validate_expression(left, vars_in_scope)?;
1593            validate_expression(right, vars_in_scope)
1594        }
1595        Expr::UnaryOp { expr: e, .. }
1596        | Expr::IsNull(e)
1597        | Expr::IsNotNull(e)
1598        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1599        Expr::Property(base, prop) => {
1600            if let Expr::Variable(var_name) = base.as_ref()
1601                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1602            {
1603                // Paths don't have properties
1604                if var_info.var_type == VariableType::Path {
1605                    return Err(anyhow!(
1606                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1607                        var_name,
1608                        prop
1609                    ));
1610                }
1611                // Known non-graph literals (int, float, bool, string, list) don't have properties
1612                if var_info.var_type == VariableType::ScalarLiteral {
1613                    return Err(anyhow!(
1614                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1615                    ));
1616                }
1617            }
1618            validate_expression(base, vars_in_scope)
1619        }
1620        Expr::List(items) => validate_all(items, vars_in_scope),
1621        Expr::Case {
1622            expr: case_expr,
1623            when_then,
1624            else_expr,
1625        } => {
1626            if let Some(e) = case_expr {
1627                validate_expression(e, vars_in_scope)?;
1628            }
1629            for (w, t) in when_then {
1630                validate_expression(w, vars_in_scope)?;
1631                validate_expression(t, vars_in_scope)?;
1632            }
1633            if let Some(e) = else_expr {
1634                validate_expression(e, vars_in_scope)?;
1635            }
1636            Ok(())
1637        }
1638        Expr::In { expr: e, list } => {
1639            validate_expression(e, vars_in_scope)?;
1640            validate_expression(list, vars_in_scope)
1641        }
1642        Expr::Exists {
1643            query,
1644            from_pattern_predicate: true,
1645        } => {
1646            // Pattern predicates cannot introduce new named variables.
1647            // Extract named vars from inner MATCH pattern, check each is in scope.
1648            if let Query::Single(stmt) = query.as_ref() {
1649                for clause in &stmt.clauses {
1650                    if let Clause::Match(m) = clause {
1651                        for path in &m.pattern.paths {
1652                            for elem in &path.elements {
1653                                match elem {
1654                                    PatternElement::Node(n) => {
1655                                        if let Some(var) = &n.variable
1656                                            && !is_var_in_scope(vars_in_scope, var)
1657                                        {
1658                                            return Err(anyhow!(
1659                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1660                                                var
1661                                            ));
1662                                        }
1663                                    }
1664                                    PatternElement::Relationship(r) => {
1665                                        if let Some(var) = &r.variable
1666                                            && !is_var_in_scope(vars_in_scope, var)
1667                                        {
1668                                            return Err(anyhow!(
1669                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1670                                                var
1671                                            ));
1672                                        }
1673                                    }
1674                                    _ => {}
1675                                }
1676                            }
1677                        }
1678                    }
1679                }
1680            }
1681            Ok(())
1682        }
1683        _ => Ok(()),
1684    }
1685}
1686
1687/// One step (hop) in a Quantified Path Pattern sub-pattern.
1688///
1689/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1690#[derive(Debug, Clone)]
1691pub struct QppStepInfo {
1692    /// Edge type IDs that this step can traverse.
1693    pub edge_type_ids: Vec<u32>,
1694    /// Traversal direction for this step.
1695    pub direction: Direction,
1696    /// Optional label constraint on the target node.
1697    pub target_label: Option<String>,
1698}
1699
1700/// Logical query plan produced by [`QueryPlanner`].
1701///
1702/// Each variant represents one step in the Cypher execution pipeline.
1703/// Plans are tree-structured — leaf nodes produce rows, intermediate nodes
1704/// transform or join them, and the root node defines the final output.
1705#[derive(Debug, Clone)]
1706pub enum LogicalPlan {
1707    /// UNION / UNION ALL of two sub-plans.
1708    Union {
1709        left: Box<LogicalPlan>,
1710        right: Box<LogicalPlan>,
1711        /// When `true`, duplicate rows are preserved (UNION ALL semantics).
1712        all: bool,
1713    },
1714    /// Scan vertices of a single labeled dataset.
1715    Scan {
1716        label_id: u16,
1717        labels: Vec<String>,
1718        variable: String,
1719        filter: Option<Expr>,
1720        optional: bool,
1721    },
1722    /// Lookup vertices by ext_id using the main vertices table.
1723    /// Used when a query references ext_id without specifying a label.
1724    ExtIdLookup {
1725        variable: String,
1726        ext_id: String,
1727        filter: Option<Expr>,
1728        optional: bool,
1729    },
1730    /// Scan all vertices from main table (MATCH (n) without label).
1731    /// Used for schemaless queries that don't specify any label.
1732    ScanAll {
1733        variable: String,
1734        filter: Option<Expr>,
1735        optional: bool,
1736    },
1737    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1738    /// Used for labels not defined in schema (schemaless support).
1739    /// Scan main vertices table by label name(s) for schemaless support.
1740    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1741    ScanMainByLabels {
1742        labels: Vec<String>,
1743        variable: String,
1744        filter: Option<Expr>,
1745        optional: bool,
1746    },
1747    /// Produces exactly one empty row (used to bootstrap pipelines with no source).
1748    Empty,
1749    /// UNWIND: expand a list expression into one row per element.
1750    Unwind {
1751        input: Box<LogicalPlan>,
1752        expr: Expr,
1753        variable: String,
1754    },
1755    Traverse {
1756        input: Box<LogicalPlan>,
1757        edge_type_ids: Vec<u32>,
1758        direction: Direction,
1759        source_variable: String,
1760        target_variable: String,
1761        target_label_id: u16,
1762        step_variable: Option<String>,
1763        min_hops: usize,
1764        max_hops: usize,
1765        optional: bool,
1766        target_filter: Option<Expr>,
1767        path_variable: Option<String>,
1768        edge_properties: HashSet<String>,
1769        /// Whether this is a variable-length pattern (has `*` range specifier).
1770        /// When true, step_variable holds a list of edges (even for *1..1).
1771        is_variable_length: bool,
1772        /// All variables from this OPTIONAL MATCH pattern.
1773        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1774        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1775        optional_pattern_vars: HashSet<String>,
1776        /// Variable names (node + edge) from the current MATCH clause scope.
1777        /// Used for relationship uniqueness scoping: only edge ID columns whose
1778        /// associated variable is in this set participate in uniqueness filtering.
1779        /// Variables from previous disconnected MATCH clauses are excluded.
1780        scope_match_variables: HashSet<String>,
1781        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1782        edge_filter_expr: Option<Expr>,
1783        /// Path traversal semantics (Trail by default for OpenCypher).
1784        path_mode: crate::query::df_graph::nfa::PathMode,
1785        /// QPP steps for multi-hop quantified path patterns.
1786        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1787        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1788        qpp_steps: Option<Vec<QppStepInfo>>,
1789    },
1790    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1791    /// Used for edge types not defined in schema (schemaless support).
1792    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1793    TraverseMainByType {
1794        type_names: Vec<String>,
1795        input: Box<LogicalPlan>,
1796        direction: Direction,
1797        source_variable: String,
1798        target_variable: String,
1799        step_variable: Option<String>,
1800        min_hops: usize,
1801        max_hops: usize,
1802        optional: bool,
1803        target_filter: Option<Expr>,
1804        path_variable: Option<String>,
1805        /// Whether this is a variable-length pattern (has `*` range specifier).
1806        /// When true, step_variable holds a list of edges (even for *1..1).
1807        is_variable_length: bool,
1808        /// All variables from this OPTIONAL MATCH pattern.
1809        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1810        optional_pattern_vars: HashSet<String>,
1811        /// Variables belonging to the current MATCH clause scope.
1812        /// Used for relationship uniqueness scoping: only edge columns whose
1813        /// associated variable is in this set participate in uniqueness filtering.
1814        scope_match_variables: HashSet<String>,
1815        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1816        edge_filter_expr: Option<Expr>,
1817        /// Path traversal semantics (Trail by default for OpenCypher).
1818        path_mode: crate::query::df_graph::nfa::PathMode,
1819    },
1820    Filter {
1821        input: Box<LogicalPlan>,
1822        predicate: Expr,
1823        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1824        /// When evaluating the filter, if any of these variables are NULL,
1825        /// the row is preserved regardless of the predicate result.
1826        optional_variables: HashSet<String>,
1827    },
1828    Create {
1829        input: Box<LogicalPlan>,
1830        pattern: Pattern,
1831    },
1832    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1833    ///
1834    /// This variant combines multiple CREATE patterns into a single plan node
1835    /// to avoid deep recursion when executing many CREATEs sequentially.
1836    CreateBatch {
1837        input: Box<LogicalPlan>,
1838        patterns: Vec<Pattern>,
1839    },
1840    Merge {
1841        input: Box<LogicalPlan>,
1842        pattern: Pattern,
1843        on_match: Option<SetClause>,
1844        on_create: Option<SetClause>,
1845    },
1846    Set {
1847        input: Box<LogicalPlan>,
1848        items: Vec<SetItem>,
1849    },
1850    Remove {
1851        input: Box<LogicalPlan>,
1852        items: Vec<RemoveItem>,
1853    },
1854    Delete {
1855        input: Box<LogicalPlan>,
1856        items: Vec<Expr>,
1857        detach: bool,
1858    },
1859    /// FOREACH (variable IN list | clauses)
1860    Foreach {
1861        input: Box<LogicalPlan>,
1862        variable: String,
1863        list: Expr,
1864        body: Vec<LogicalPlan>,
1865    },
1866    Sort {
1867        input: Box<LogicalPlan>,
1868        order_by: Vec<SortItem>,
1869    },
1870    Limit {
1871        input: Box<LogicalPlan>,
1872        skip: Option<usize>,
1873        fetch: Option<usize>,
1874    },
1875    Aggregate {
1876        input: Box<LogicalPlan>,
1877        group_by: Vec<Expr>,
1878        aggregates: Vec<Expr>,
1879    },
1880    Distinct {
1881        input: Box<LogicalPlan>,
1882    },
1883    Window {
1884        input: Box<LogicalPlan>,
1885        window_exprs: Vec<Expr>,
1886    },
1887    Project {
1888        input: Box<LogicalPlan>,
1889        projections: Vec<(Expr, Option<String>)>,
1890    },
1891    CrossJoin {
1892        left: Box<LogicalPlan>,
1893        right: Box<LogicalPlan>,
1894    },
1895    Apply {
1896        input: Box<LogicalPlan>,
1897        subquery: Box<LogicalPlan>,
1898        input_filter: Option<Expr>,
1899    },
1900    RecursiveCTE {
1901        cte_name: String,
1902        initial: Box<LogicalPlan>,
1903        recursive: Box<LogicalPlan>,
1904    },
1905    ProcedureCall {
1906        procedure_name: String,
1907        arguments: Vec<Expr>,
1908        yield_items: Vec<(String, Option<String>)>,
1909    },
1910    SubqueryCall {
1911        input: Box<LogicalPlan>,
1912        subquery: Box<LogicalPlan>,
1913    },
1914    VectorKnn {
1915        label_id: u16,
1916        variable: String,
1917        property: String,
1918        query: Expr,
1919        k: usize,
1920        threshold: Option<f32>,
1921    },
1922    InvertedIndexLookup {
1923        label_id: u16,
1924        variable: String,
1925        property: String,
1926        terms: Expr,
1927    },
1928    ShortestPath {
1929        input: Box<LogicalPlan>,
1930        edge_type_ids: Vec<u32>,
1931        direction: Direction,
1932        source_variable: String,
1933        target_variable: String,
1934        target_label_id: u16,
1935        path_variable: String,
1936        /// Minimum number of hops (edges) in the path. Default is 1.
1937        min_hops: u32,
1938        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
1939        max_hops: u32,
1940    },
1941    /// allShortestPaths() - Returns all paths with minimum length
1942    AllShortestPaths {
1943        input: Box<LogicalPlan>,
1944        edge_type_ids: Vec<u32>,
1945        direction: Direction,
1946        source_variable: String,
1947        target_variable: String,
1948        target_label_id: u16,
1949        path_variable: String,
1950        /// Minimum number of hops (edges) in the path. Default is 1.
1951        min_hops: u32,
1952        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
1953        max_hops: u32,
1954    },
1955    QuantifiedPattern {
1956        input: Box<LogicalPlan>,
1957        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
1958        min_iterations: u32,
1959        max_iterations: u32,
1960        path_variable: Option<String>,
1961        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
1962        binding_variable: String, // Output variable of iteration (e.g. 'b')
1963    },
1964    // DDL Plans
1965    CreateVectorIndex {
1966        config: VectorIndexConfig,
1967        if_not_exists: bool,
1968    },
1969    CreateFullTextIndex {
1970        config: FullTextIndexConfig,
1971        if_not_exists: bool,
1972    },
1973    CreateScalarIndex {
1974        config: ScalarIndexConfig,
1975        if_not_exists: bool,
1976    },
1977    CreateJsonFtsIndex {
1978        config: JsonFtsIndexConfig,
1979        if_not_exists: bool,
1980    },
1981    DropIndex {
1982        name: String,
1983        if_exists: bool,
1984    },
1985    ShowIndexes {
1986        filter: Option<String>,
1987    },
1988    Copy {
1989        target: String,
1990        source: String,
1991        is_export: bool,
1992        options: HashMap<String, Value>,
1993    },
1994    Backup {
1995        destination: String,
1996        options: HashMap<String, Value>,
1997    },
1998    Explain {
1999        plan: Box<LogicalPlan>,
2000    },
2001    // Admin Plans
2002    ShowDatabase,
2003    ShowConfig,
2004    ShowStatistics,
2005    Vacuum,
2006    Checkpoint,
2007    CopyTo {
2008        label: String,
2009        path: String,
2010        format: String,
2011        options: HashMap<String, Value>,
2012    },
2013    CopyFrom {
2014        label: String,
2015        path: String,
2016        format: String,
2017        options: HashMap<String, Value>,
2018    },
2019    // Schema DDL
2020    CreateLabel(CreateLabel),
2021    CreateEdgeType(CreateEdgeType),
2022    AlterLabel(AlterLabel),
2023    AlterEdgeType(AlterEdgeType),
2024    DropLabel(DropLabel),
2025    DropEdgeType(DropEdgeType),
2026    // Constraints
2027    CreateConstraint(CreateConstraint),
2028    DropConstraint(DropConstraint),
2029    ShowConstraints(ShowConstraints),
2030    // Transaction Plans
2031    Begin,
2032    Commit,
2033    Rollback,
2034    /// Bind a zero-length path (single node pattern with path variable).
2035    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2036    BindZeroLengthPath {
2037        input: Box<LogicalPlan>,
2038        node_variable: String,
2039        path_variable: String,
2040    },
2041    /// Bind a fixed-length path from already-computed node and edge columns.
2042    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2043    BindPath {
2044        input: Box<LogicalPlan>,
2045        node_variables: Vec<String>,
2046        edge_variables: Vec<String>,
2047        path_variable: String,
2048    },
2049
2050    // ── Locy variants ──────────────────────────────────────────
2051    /// Top-level Locy program: stratified rules + commands.
2052    LocyProgram {
2053        strata: Vec<super::planner_locy_types::LocyStratum>,
2054        commands: Vec<super::planner_locy_types::LocyCommand>,
2055        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2056        max_iterations: usize,
2057        timeout: std::time::Duration,
2058        max_derived_bytes: usize,
2059        deterministic_best_by: bool,
2060        strict_probability_domain: bool,
2061        probability_epsilon: f64,
2062        exact_probability: bool,
2063        max_bdd_variables: usize,
2064        top_k_proofs: usize,
2065    },
2066    /// FOLD operator: lattice-join non-key columns per KEY group.
2067    LocyFold {
2068        input: Box<LogicalPlan>,
2069        key_columns: Vec<String>,
2070        fold_bindings: Vec<(String, Expr)>,
2071        strict_probability_domain: bool,
2072        probability_epsilon: f64,
2073    },
2074    /// BEST BY operator: select best row per KEY group by ordered criteria.
2075    LocyBestBy {
2076        input: Box<LogicalPlan>,
2077        key_columns: Vec<String>,
2078        /// (expression, ascending) pairs.
2079        criteria: Vec<(Expr, bool)>,
2080    },
2081    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2082    LocyPriority {
2083        input: Box<LogicalPlan>,
2084        key_columns: Vec<String>,
2085    },
2086    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2087    LocyDerivedScan {
2088        scan_index: usize,
2089        data: Arc<RwLock<Vec<RecordBatch>>>,
2090        schema: SchemaRef,
2091    },
2092    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2093    /// without carrying through helper/property columns like the regular Project.
2094    LocyProject {
2095        input: Box<LogicalPlan>,
2096        projections: Vec<(Expr, Option<String>)>,
2097        /// Expected output Arrow type per projection (for CAST support).
2098        target_types: Vec<DataType>,
2099    },
2100}
2101
2102/// Extracted vector similarity predicate info for optimization
2103struct VectorSimilarityPredicate {
2104    variable: String,
2105    property: String,
2106    query: Expr,
2107    threshold: Option<f32>,
2108}
2109
2110/// Result of extracting vector_similarity from a predicate
2111struct VectorSimilarityExtraction {
2112    /// The extracted vector similarity predicate
2113    predicate: VectorSimilarityPredicate,
2114    /// Remaining predicates that couldn't be optimized (if any)
2115    residual: Option<Expr>,
2116}
2117
2118/// Try to extract a vector_similarity predicate from an expression.
2119/// Matches patterns like:
2120/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2121/// - n.embedding ~= $query
2122///
2123/// Also handles AND predicates.
2124fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2125    match expr {
2126        Expr::BinaryOp { left, op, right } => {
2127            // Handle AND: check both sides for vector_similarity
2128            if matches!(op, BinaryOp::And) {
2129                // Try left side first
2130                if let Some(vs) = extract_simple_vector_similarity(left) {
2131                    return Some(VectorSimilarityExtraction {
2132                        predicate: vs,
2133                        residual: Some(right.as_ref().clone()),
2134                    });
2135                }
2136                // Try right side
2137                if let Some(vs) = extract_simple_vector_similarity(right) {
2138                    return Some(VectorSimilarityExtraction {
2139                        predicate: vs,
2140                        residual: Some(left.as_ref().clone()),
2141                    });
2142                }
2143                // Recursively check within left/right for nested ANDs
2144                if let Some(mut extraction) = extract_vector_similarity(left) {
2145                    extraction.residual = Some(combine_with_and(
2146                        extraction.residual,
2147                        right.as_ref().clone(),
2148                    ));
2149                    return Some(extraction);
2150                }
2151                if let Some(mut extraction) = extract_vector_similarity(right) {
2152                    extraction.residual =
2153                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2154                    return Some(extraction);
2155                }
2156                return None;
2157            }
2158
2159            // Simple case: direct vector_similarity comparison
2160            if let Some(vs) = extract_simple_vector_similarity(expr) {
2161                return Some(VectorSimilarityExtraction {
2162                    predicate: vs,
2163                    residual: None,
2164                });
2165            }
2166            None
2167        }
2168        _ => None,
2169    }
2170}
2171
2172/// Helper to combine an optional expression with another using AND
2173fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2174    match opt_expr {
2175        Some(e) => Expr::BinaryOp {
2176            left: Box::new(e),
2177            op: BinaryOp::And,
2178            right: Box::new(other),
2179        },
2180        None => other,
2181    }
2182}
2183
2184/// Extract a simple vector_similarity comparison (no AND)
2185fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2186    match expr {
2187        Expr::BinaryOp { left, op, right } => {
2188            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2189            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2190                && let (Some(vs), Some(thresh)) = (
2191                    extract_vector_similarity_call(left),
2192                    extract_float_literal(right),
2193                )
2194            {
2195                return Some(VectorSimilarityPredicate {
2196                    variable: vs.0,
2197                    property: vs.1,
2198                    query: vs.2,
2199                    threshold: Some(thresh),
2200                });
2201            }
2202            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2203            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2204                && let (Some(thresh), Some(vs)) = (
2205                    extract_float_literal(left),
2206                    extract_vector_similarity_call(right),
2207                )
2208            {
2209                return Some(VectorSimilarityPredicate {
2210                    variable: vs.0,
2211                    property: vs.1,
2212                    query: vs.2,
2213                    threshold: Some(thresh),
2214                });
2215            }
2216            // Pattern: n.embedding ~= query
2217            if matches!(op, BinaryOp::ApproxEq)
2218                && let Expr::Property(var_expr, prop) = left.as_ref()
2219                && let Expr::Variable(var) = var_expr.as_ref()
2220            {
2221                return Some(VectorSimilarityPredicate {
2222                    variable: var.clone(),
2223                    property: prop.clone(),
2224                    query: right.as_ref().clone(),
2225                    threshold: None,
2226                });
2227            }
2228            None
2229        }
2230        _ => None,
2231    }
2232}
2233
2234/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2235fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2236    if let Expr::FunctionCall { name, args, .. } = expr
2237        && name.eq_ignore_ascii_case("vector_similarity")
2238        && args.len() == 2
2239    {
2240        // First arg should be Property(Identifier(var), prop)
2241        if let Expr::Property(var_expr, prop) = &args[0]
2242            && let Expr::Variable(var) = var_expr.as_ref()
2243        {
2244            // Second arg is query
2245            return Some((var.clone(), prop.clone(), args[1].clone()));
2246        }
2247    }
2248    None
2249}
2250
2251/// Extract a float value from a literal expression
2252fn extract_float_literal(expr: &Expr) -> Option<f32> {
2253    match expr {
2254        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2255        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2256        _ => None,
2257    }
2258}
2259
2260/// Translates a parsed Cypher AST into a [`LogicalPlan`].
2261///
2262/// `QueryPlanner` applies semantic validation (variable scoping, label
2263/// resolution, type checking) and produces a plan tree that the executor
2264/// can run against storage.
2265#[derive(Debug)]
2266pub struct QueryPlanner {
2267    schema: Arc<Schema>,
2268    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2269    gen_expr_cache: HashMap<(String, String), Expr>,
2270    /// Counter for generating unique anonymous variable names.
2271    anon_counter: std::cell::Cell<usize>,
2272    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2273    params: HashMap<String, uni_common::Value>,
2274}
2275
2276struct TraverseParams<'a> {
2277    rel: &'a RelationshipPattern,
2278    target_node: &'a NodePattern,
2279    optional: bool,
2280    path_variable: Option<String>,
2281    /// All variables from this OPTIONAL MATCH pattern.
2282    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2283    optional_pattern_vars: HashSet<String>,
2284}
2285
2286impl QueryPlanner {
2287    /// Create a new planner for the given schema.
2288    ///
2289    /// Pre-parses all generation expressions defined in the schema so that
2290    /// repeated plan calls avoid redundant parsing.
2291    pub fn new(schema: Arc<Schema>) -> Self {
2292        // Pre-parse all generation expressions for caching
2293        let mut gen_expr_cache = HashMap::new();
2294        for (label, props) in &schema.properties {
2295            for (gen_col, meta) in props {
2296                if let Some(expr_str) = &meta.generation_expression
2297                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2298                {
2299                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2300                }
2301            }
2302        }
2303        Self {
2304            schema,
2305            gen_expr_cache,
2306            anon_counter: std::cell::Cell::new(0),
2307            params: HashMap::new(),
2308        }
2309    }
2310
2311    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2312    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2313        self.params = params;
2314        self
2315    }
2316
2317    /// Plan a Cypher query with no pre-bound variables.
2318    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2319        self.plan_with_scope(query, Vec::new())
2320    }
2321
2322    /// Plan a Cypher query with a set of externally pre-bound variable names.
2323    ///
2324    /// `vars` lists variable names already in scope before this query executes
2325    /// (e.g., from an enclosing Locy rule body).
2326    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2327        // Apply query rewrites before planning
2328        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2329        if Self::has_mixed_union_modes(&rewritten_query) {
2330            return Err(anyhow!(
2331                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2332            ));
2333        }
2334
2335        match rewritten_query {
2336            Query::Single(stmt) => self.plan_single(stmt, vars),
2337            Query::Union { left, right, all } => {
2338                let l = self.plan_with_scope(*left, vars.clone())?;
2339                let r = self.plan_with_scope(*right, vars)?;
2340
2341                // Validate that both sides have the same column names
2342                let left_cols = Self::extract_projection_columns(&l);
2343                let right_cols = Self::extract_projection_columns(&r);
2344
2345                if left_cols != right_cols {
2346                    return Err(anyhow!(
2347                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2348                    ));
2349                }
2350
2351                Ok(LogicalPlan::Union {
2352                    left: Box::new(l),
2353                    right: Box::new(r),
2354                    all,
2355                })
2356            }
2357            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2358            Query::Transaction(cmd) => self.plan_transaction_command(cmd),
2359            Query::Explain(inner) => {
2360                let inner_plan = self.plan_with_scope(*inner, vars)?;
2361                Ok(LogicalPlan::Explain {
2362                    plan: Box::new(inner_plan),
2363                })
2364            }
2365            Query::TimeTravel { .. } => {
2366                unreachable!("TimeTravel should be resolved at API layer before planning")
2367            }
2368        }
2369    }
2370
2371    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2372        match query {
2373            Query::Union { left, right, all } => {
2374                out.insert(*all);
2375                Self::collect_union_modes(left, out);
2376                Self::collect_union_modes(right, out);
2377            }
2378            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2379            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2380            Query::Single(_) | Query::Schema(_) | Query::Transaction(_) => {}
2381        }
2382    }
2383
2384    fn has_mixed_union_modes(query: &Query) -> bool {
2385        let mut modes = HashSet::new();
2386        Self::collect_union_modes(query, &mut modes);
2387        modes.len() > 1
2388    }
2389
2390    fn next_anon_var(&self) -> String {
2391        let id = self.anon_counter.get();
2392        self.anon_counter.set(id + 1);
2393        format!("_anon_{}", id)
2394    }
2395
2396    /// Extract projection column names from a logical plan.
2397    /// Used for UNION column validation.
2398    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2399        match plan {
2400            LogicalPlan::Project { projections, .. } => projections
2401                .iter()
2402                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2403                .collect(),
2404            LogicalPlan::Limit { input, .. }
2405            | LogicalPlan::Sort { input, .. }
2406            | LogicalPlan::Distinct { input, .. }
2407            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2408            LogicalPlan::Union { left, right, .. } => {
2409                let left_cols = Self::extract_projection_columns(left);
2410                if left_cols.is_empty() {
2411                    Self::extract_projection_columns(right)
2412                } else {
2413                    left_cols
2414                }
2415            }
2416            LogicalPlan::Aggregate {
2417                group_by,
2418                aggregates,
2419                ..
2420            } => {
2421                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2422                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2423                cols
2424            }
2425            _ => Vec::new(),
2426        }
2427    }
2428
2429    fn plan_return_clause(
2430        &self,
2431        return_clause: &ReturnClause,
2432        plan: LogicalPlan,
2433        vars_in_scope: &[VariableInfo],
2434    ) -> Result<LogicalPlan> {
2435        let mut plan = plan;
2436        let mut group_by = Vec::new();
2437        let mut aggregates = Vec::new();
2438        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2439        let mut has_agg = false;
2440        let mut projections = Vec::new();
2441        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2442        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2443        let mut projected_aliases: HashSet<String> = HashSet::new();
2444
2445        for item in &return_clause.items {
2446            match item {
2447                ReturnItem::All => {
2448                    // RETURN * - add all user-named variables in scope
2449                    // (anonymous variables like _anon_0 are excluded)
2450                    let user_vars: Vec<_> = vars_in_scope
2451                        .iter()
2452                        .filter(|v| !v.name.starts_with("_anon_"))
2453                        .collect();
2454                    if user_vars.is_empty() {
2455                        return Err(anyhow!(
2456                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2457                        ));
2458                    }
2459                    for v in user_vars {
2460                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2461                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2462                            group_by.push(Expr::Variable(v.name.clone()));
2463                        }
2464                        projected_aliases.insert(v.name.clone());
2465                        projected_simple_reprs.insert(v.name.clone());
2466                    }
2467                }
2468                ReturnItem::Expr {
2469                    expr,
2470                    alias,
2471                    source_text,
2472                } => {
2473                    if matches!(expr, Expr::Wildcard) {
2474                        for v in vars_in_scope {
2475                            projections
2476                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2477                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2478                                group_by.push(Expr::Variable(v.name.clone()));
2479                            }
2480                            projected_aliases.insert(v.name.clone());
2481                            projected_simple_reprs.insert(v.name.clone());
2482                        }
2483                    } else {
2484                        // Validate expression variables are defined
2485                        validate_expression_variables(expr, vars_in_scope)?;
2486                        // Validate function argument types and boolean operators
2487                        validate_expression(expr, vars_in_scope)?;
2488                        // Pattern predicates are not allowed in RETURN
2489                        if contains_pattern_predicate(expr) {
2490                            return Err(anyhow!(
2491                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2492                            ));
2493                        }
2494
2495                        // Use source text as column name when no explicit alias
2496                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2497                        projections.push((expr.clone(), effective_alias));
2498                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2499                            // Bare aggregate — push directly
2500                            has_agg = true;
2501                            aggregates.push(expr.clone());
2502                            projected_aggregate_reprs.insert(expr.to_string_repr());
2503                        } else if !is_window_function(expr)
2504                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2505                        {
2506                            // Compound aggregate or expression containing aggregates —
2507                            // extract the inner bare aggregates for the Aggregate node
2508                            has_agg = true;
2509                            compound_agg_exprs.push(expr.clone());
2510                            for inner in extract_inner_aggregates(expr) {
2511                                let repr = inner.to_string_repr();
2512                                if !projected_aggregate_reprs.contains(&repr) {
2513                                    aggregates.push(inner);
2514                                    projected_aggregate_reprs.insert(repr);
2515                                }
2516                            }
2517                        } else if !group_by.contains(expr) {
2518                            group_by.push(expr.clone());
2519                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
2520                                projected_simple_reprs.insert(expr.to_string_repr());
2521                            }
2522                        }
2523
2524                        if let Some(a) = alias {
2525                            if projected_aliases.contains(a) {
2526                                return Err(anyhow!(
2527                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2528                                    a
2529                                ));
2530                            }
2531                            projected_aliases.insert(a.clone());
2532                        } else if let Expr::Variable(v) = expr {
2533                            if projected_aliases.contains(v) {
2534                                return Err(anyhow!(
2535                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2536                                    v
2537                                ));
2538                            }
2539                            projected_aliases.insert(v.clone());
2540                        }
2541                    }
2542                }
2543            }
2544        }
2545
2546        // Validate compound aggregate expressions: non-aggregate refs must be
2547        // individually present in the group_by as simple variables or properties.
2548        if has_agg {
2549            let group_by_reprs: HashSet<String> =
2550                group_by.iter().map(|e| e.to_string_repr()).collect();
2551            for expr in &compound_agg_exprs {
2552                let mut refs = Vec::new();
2553                collect_non_aggregate_refs(expr, false, &mut refs);
2554                for r in &refs {
2555                    let is_covered = match r {
2556                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
2557                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
2558                    };
2559                    if !is_covered {
2560                        return Err(anyhow!(
2561                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
2562                        ));
2563                    }
2564                }
2565            }
2566        }
2567
2568        if has_agg {
2569            plan = LogicalPlan::Aggregate {
2570                input: Box::new(plan),
2571                group_by,
2572                aggregates,
2573            };
2574        }
2575
2576        let mut window_exprs = Vec::new();
2577        for (expr, _) in &projections {
2578            Self::collect_window_functions(expr, &mut window_exprs);
2579        }
2580
2581        if let Some(order_by) = &return_clause.order_by {
2582            for item in order_by {
2583                Self::collect_window_functions(&item.expr, &mut window_exprs);
2584            }
2585        }
2586
2587        let has_window_exprs = !window_exprs.is_empty();
2588
2589        if has_window_exprs {
2590            // Before creating the Window node, we need to ensure all properties
2591            // referenced by window functions are available. Create a Project node
2592            // that loads these properties.
2593            let mut props_needed_for_window: Vec<Expr> = Vec::new();
2594            for window_expr in &window_exprs {
2595                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
2596            }
2597
2598            // Also include non-window expressions from projections that might be needed
2599            // Preserve qualified names (e.g., "e.salary") as aliases for properties
2600            let non_window_projections: Vec<_> = projections
2601                .iter()
2602                .filter_map(|(expr, alias)| {
2603                    // Keep expressions that don't have window_spec
2604                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
2605                        window_spec.is_none()
2606                    } else {
2607                        true
2608                    };
2609
2610                    if keep {
2611                        // For property references, use the qualified name as alias
2612                        let new_alias = if matches!(expr, Expr::Property(..)) {
2613                            Some(expr.to_string_repr())
2614                        } else {
2615                            alias.clone()
2616                        };
2617                        Some((expr.clone(), new_alias))
2618                    } else {
2619                        None
2620                    }
2621                })
2622                .collect();
2623
2624            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
2625                let mut intermediate_projections = non_window_projections;
2626                // Add any additional property references needed by window functions
2627                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
2628                for prop in &props_needed_for_window {
2629                    if !intermediate_projections
2630                        .iter()
2631                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
2632                    {
2633                        let qualified_name = prop.to_string_repr();
2634                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
2635                    }
2636                }
2637
2638                if !intermediate_projections.is_empty() {
2639                    plan = LogicalPlan::Project {
2640                        input: Box::new(plan),
2641                        projections: intermediate_projections,
2642                    };
2643                }
2644            }
2645
2646            // Transform property expressions in window functions to use qualified variable names
2647            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
2648            let transformed_window_exprs: Vec<Expr> = window_exprs
2649                .into_iter()
2650                .map(Self::transform_window_expr_properties)
2651                .collect();
2652
2653            plan = LogicalPlan::Window {
2654                input: Box::new(plan),
2655                window_exprs: transformed_window_exprs,
2656            };
2657        }
2658
2659        if let Some(order_by) = &return_clause.order_by {
2660            let alias_exprs: HashMap<String, Expr> = projections
2661                .iter()
2662                .filter_map(|(expr, alias)| {
2663                    alias.as_ref().map(|a| {
2664                        // ORDER BY is planned before the final RETURN projection.
2665                        // In aggregate contexts, aliases must resolve to the
2666                        // post-aggregate output columns, not raw aggregate calls.
2667                        let rewritten = if has_agg && !has_window_exprs {
2668                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
2669                                Expr::Variable(aggregate_column_name(expr))
2670                            } else if is_compound_aggregate(expr)
2671                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
2672                            {
2673                                replace_aggregates_with_columns(expr)
2674                            } else {
2675                                Expr::Variable(expr.to_string_repr())
2676                            }
2677                        } else {
2678                            expr.clone()
2679                        };
2680                        (a.clone(), rewritten)
2681                    })
2682                })
2683                .collect();
2684
2685            // Build an extended scope that includes RETURN aliases so ORDER BY
2686            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
2687            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
2688                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
2689                // Keep aliases and directly returned variables in scope.
2690                let mut scope = Vec::new();
2691                for (expr, alias) in &projections {
2692                    if let Some(a) = alias
2693                        && !is_var_in_scope(&scope, a)
2694                    {
2695                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
2696                    }
2697                    if let Expr::Variable(v) = expr
2698                        && !is_var_in_scope(&scope, v)
2699                    {
2700                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
2701                    }
2702                }
2703                scope
2704            } else {
2705                let mut scope = vars_in_scope.to_vec();
2706                for (expr, alias) in &projections {
2707                    if let Some(a) = alias
2708                        && !is_var_in_scope(&scope, a)
2709                    {
2710                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
2711                    } else if let Expr::Variable(v) = expr
2712                        && !is_var_in_scope(&scope, v)
2713                    {
2714                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
2715                    }
2716                }
2717                scope
2718            };
2719            // Validate ORDER BY expressions against the extended scope
2720            for item in order_by {
2721                // DISTINCT allows ORDER BY on the same projected expression
2722                // even when underlying variables are not otherwise visible.
2723                let matches_projected_expr = return_clause.distinct
2724                    && projections
2725                        .iter()
2726                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
2727                if !matches_projected_expr {
2728                    validate_expression_variables(&item.expr, &order_by_scope)?;
2729                    validate_expression(&item.expr, &order_by_scope)?;
2730                }
2731                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
2732                if has_aggregate_in_item && !has_agg {
2733                    return Err(anyhow!(
2734                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
2735                    ));
2736                }
2737                if has_agg && has_aggregate_in_item {
2738                    validate_with_order_by_aggregate_item(
2739                        &item.expr,
2740                        &projected_aggregate_reprs,
2741                        &projected_simple_reprs,
2742                        &projected_aliases,
2743                    )?;
2744                }
2745            }
2746            let rewritten_order_by: Vec<SortItem> = order_by
2747                .iter()
2748                .map(|item| SortItem {
2749                    expr: {
2750                        let mut rewritten =
2751                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
2752                        if has_agg && !has_window_exprs {
2753                            rewritten = replace_aggregates_with_columns(&rewritten);
2754                        }
2755                        rewritten
2756                    },
2757                    ascending: item.ascending,
2758                })
2759                .collect();
2760            plan = LogicalPlan::Sort {
2761                input: Box::new(plan),
2762                order_by: rewritten_order_by,
2763            };
2764        }
2765
2766        if return_clause.skip.is_some() || return_clause.limit.is_some() {
2767            let skip = return_clause
2768                .skip
2769                .as_ref()
2770                .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
2771                .transpose()?
2772                .flatten();
2773            let fetch = return_clause
2774                .limit
2775                .as_ref()
2776                .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
2777                .transpose()?
2778                .flatten();
2779
2780            plan = LogicalPlan::Limit {
2781                input: Box::new(plan),
2782                skip,
2783                fetch,
2784            };
2785        }
2786
2787        if !projections.is_empty() {
2788            // If we created an Aggregate or Window node, we need to adjust the final projections
2789            // to reference aggregate/window function results as columns instead of re-evaluating them
2790            let final_projections = if has_agg || has_window_exprs {
2791                projections
2792                    .into_iter()
2793                    .map(|(expr, alias)| {
2794                        // Check if this expression is an aggregate function
2795                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
2796                        {
2797                            // Bare aggregate — replace with column reference
2798                            let col_name = aggregate_column_name(&expr);
2799                            (Expr::Variable(col_name), alias)
2800                        } else if !has_window_exprs
2801                            && (is_compound_aggregate(&expr)
2802                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
2803                        {
2804                            // Compound aggregate — replace inner aggregates with
2805                            // column references, keep outer expression for Project
2806                            (replace_aggregates_with_columns(&expr), alias)
2807                        }
2808                        // For grouped RETURN projections, reference the pre-computed
2809                        // group-by output column instead of re-evaluating the expression
2810                        // against the aggregate schema (which no longer has original vars).
2811                        else if has_agg
2812                            && !has_window_exprs
2813                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
2814                        {
2815                            (Expr::Variable(expr.to_string_repr()), alias)
2816                        }
2817                        // Check if this expression is a window function
2818                        else if let Expr::FunctionCall {
2819                            window_spec: Some(_),
2820                            ..
2821                        } = &expr
2822                        {
2823                            // Replace window function with a column reference to its result
2824                            // The column name in the Window output is the full expression string
2825                            let window_col_name = expr.to_string_repr();
2826                            // Keep the original alias for the final output
2827                            (Expr::Variable(window_col_name), alias)
2828                        } else {
2829                            (expr, alias)
2830                        }
2831                    })
2832                    .collect()
2833            } else {
2834                projections
2835            };
2836
2837            plan = LogicalPlan::Project {
2838                input: Box::new(plan),
2839                projections: final_projections,
2840            };
2841        }
2842
2843        if return_clause.distinct {
2844            plan = LogicalPlan::Distinct {
2845                input: Box::new(plan),
2846            };
2847        }
2848
2849        Ok(plan)
2850    }
2851
2852    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
2853        let typed_vars: Vec<VariableInfo> = initial_vars
2854            .into_iter()
2855            .map(|name| VariableInfo::new(name, VariableType::Imported))
2856            .collect();
2857        self.plan_single_typed(query, typed_vars)
2858    }
2859
2860    /// Rewrite a query then plan it, preserving typed variable scope when possible.
2861    ///
2862    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
2863    /// variable info through and avoid false type-conflict errors in subqueries.
2864    /// For unions and other compound queries, falls back to `plan_with_scope`.
2865    fn rewrite_and_plan_typed(
2866        &self,
2867        query: Query,
2868        typed_vars: &[VariableInfo],
2869    ) -> Result<LogicalPlan> {
2870        let rewritten = crate::query::rewrite::rewrite_query(query)?;
2871        match rewritten {
2872            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
2873            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
2874        }
2875    }
2876
2877    fn plan_single_typed(
2878        &self,
2879        query: Statement,
2880        initial_vars: Vec<VariableInfo>,
2881    ) -> Result<LogicalPlan> {
2882        let mut plan = LogicalPlan::Empty;
2883
2884        if !initial_vars.is_empty() {
2885            // Project bound variables from outer scope as parameters.
2886            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
2887            // Use Parameter expressions to read from params, not Variable which would read from input row.
2888            let projections = initial_vars
2889                .iter()
2890                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
2891                .collect();
2892            plan = LogicalPlan::Project {
2893                input: Box::new(plan),
2894                projections,
2895            };
2896        }
2897
2898        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
2899        // Track variables introduced by CREATE clauses so we can distinguish
2900        // MATCH-introduced variables (which cannot be re-created as bare nodes)
2901        // from CREATE-introduced variables (which can be referenced as bare nodes).
2902        let mut create_introduced_vars: HashSet<String> = HashSet::new();
2903        // Track variables targeted by DELETE so we can reject property/label
2904        // access on deleted entities in subsequent RETURN clauses.
2905        let mut deleted_vars: HashSet<String> = HashSet::new();
2906
2907        let clause_count = query.clauses.len();
2908        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
2909            match clause {
2910                Clause::Match(match_clause) => {
2911                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
2912                }
2913                Clause::Unwind(unwind) => {
2914                    plan = LogicalPlan::Unwind {
2915                        input: Box::new(plan),
2916                        expr: unwind.expr.clone(),
2917                        variable: unwind.variable.clone(),
2918                    };
2919                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
2920                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
2921                }
2922                Clause::Call(call_clause) => {
2923                    match &call_clause.kind {
2924                        CallKind::Procedure {
2925                            procedure,
2926                            arguments,
2927                        } => {
2928                            // Validate that procedure arguments don't contain aggregation functions
2929                            for arg in arguments {
2930                                if contains_aggregate_recursive(arg) {
2931                                    return Err(anyhow!(
2932                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
2933                                    ));
2934                                }
2935                            }
2936
2937                            let has_yield_star = call_clause.yield_items.len() == 1
2938                                && call_clause.yield_items[0].name == "*"
2939                                && call_clause.yield_items[0].alias.is_none();
2940                            if has_yield_star && clause_idx + 1 < clause_count {
2941                                return Err(anyhow!(
2942                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
2943                                ));
2944                            }
2945
2946                            // Validate for duplicate yield names (VariableAlreadyBound)
2947                            let mut yield_names = Vec::new();
2948                            for item in &call_clause.yield_items {
2949                                if item.name == "*" {
2950                                    continue;
2951                                }
2952                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
2953                                if yield_names.contains(output_name) {
2954                                    return Err(anyhow!(
2955                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
2956                                        output_name
2957                                    ));
2958                                }
2959                                // Check against existing scope (in-query CALL must not shadow)
2960                                if clause_idx > 0
2961                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
2962                                {
2963                                    return Err(anyhow!(
2964                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
2965                                        output_name
2966                                    ));
2967                                }
2968                                yield_names.push(output_name.clone());
2969                            }
2970
2971                            let mut yields = Vec::new();
2972                            for item in &call_clause.yield_items {
2973                                if item.name == "*" {
2974                                    continue;
2975                                }
2976                                yields.push((item.name.clone(), item.alias.clone()));
2977                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
2978                                // Use Imported because procedure return types are unknown
2979                                // at plan time (could be nodes, edges, or scalars)
2980                                add_var_to_scope(
2981                                    &mut vars_in_scope,
2982                                    var_name,
2983                                    VariableType::Imported,
2984                                )?;
2985                            }
2986                            let proc_plan = LogicalPlan::ProcedureCall {
2987                                procedure_name: procedure.clone(),
2988                                arguments: arguments.clone(),
2989                                yield_items: yields.clone(),
2990                            };
2991
2992                            if matches!(plan, LogicalPlan::Empty) {
2993                                // Standalone CALL (first clause) — use directly
2994                                plan = proc_plan;
2995                            } else if yields.is_empty() {
2996                                // In-query CALL with no YIELD (void procedure):
2997                                // preserve the input rows unchanged
2998                            } else {
2999                                // In-query CALL with YIELD: cross-join input × procedure output
3000                                plan = LogicalPlan::Apply {
3001                                    input: Box::new(plan),
3002                                    subquery: Box::new(proc_plan),
3003                                    input_filter: None,
3004                                };
3005                            }
3006                        }
3007                        CallKind::Subquery(query) => {
3008                            let subquery_plan =
3009                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
3010
3011                            // Extract variables from subquery RETURN clause
3012                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
3013
3014                            // Add new variables to scope (as Scalar since they come from subquery projection)
3015                            for var in subquery_vars {
3016                                if !is_var_in_scope(&vars_in_scope, &var) {
3017                                    add_var_to_scope(
3018                                        &mut vars_in_scope,
3019                                        &var,
3020                                        VariableType::Scalar,
3021                                    )?;
3022                                }
3023                            }
3024
3025                            plan = LogicalPlan::SubqueryCall {
3026                                input: Box::new(plan),
3027                                subquery: Box::new(subquery_plan),
3028                            };
3029                        }
3030                    }
3031                }
3032                Clause::Merge(merge_clause) => {
3033                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
3034
3035                    plan = LogicalPlan::Merge {
3036                        input: Box::new(plan),
3037                        pattern: merge_clause.pattern.clone(),
3038                        on_match: Some(SetClause {
3039                            items: merge_clause.on_match.clone(),
3040                        }),
3041                        on_create: Some(SetClause {
3042                            items: merge_clause.on_create.clone(),
3043                        }),
3044                    };
3045
3046                    for path in &merge_clause.pattern.paths {
3047                        if let Some(path_var) = &path.variable
3048                            && !path_var.is_empty()
3049                            && !is_var_in_scope(&vars_in_scope, path_var)
3050                        {
3051                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3052                        }
3053                        for element in &path.elements {
3054                            if let PatternElement::Node(n) = element {
3055                                if let Some(v) = &n.variable
3056                                    && !is_var_in_scope(&vars_in_scope, v)
3057                                {
3058                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3059                                }
3060                            } else if let PatternElement::Relationship(r) = element
3061                                && let Some(v) = &r.variable
3062                                && !is_var_in_scope(&vars_in_scope, v)
3063                            {
3064                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3065                            }
3066                        }
3067                    }
3068                }
3069                Clause::Create(create_clause) => {
3070                    // Validate CREATE patterns:
3071                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3072                    // - Bare nodes (v) are "references" if bound, "creations" if not
3073                    // - Relationships are always creations - can't rebind
3074                    // - Within CREATE, each new variable can only be defined once
3075                    // - Variables used in properties must be defined
3076                    let mut create_vars: Vec<&str> = Vec::new();
3077                    for path in &create_clause.pattern.paths {
3078                        let is_standalone_node = path.elements.len() == 1;
3079                        for element in &path.elements {
3080                            match element {
3081                                PatternElement::Node(n) => {
3082                                    validate_property_variables(
3083                                        &n.properties,
3084                                        &vars_in_scope,
3085                                        &create_vars,
3086                                    )?;
3087
3088                                    if let Some(v) = n.variable.as_deref()
3089                                        && !v.is_empty()
3090                                    {
3091                                        // A node is a "creation" if it has labels or properties
3092                                        let is_creation =
3093                                            !n.labels.is_empty() || n.properties.is_some();
3094
3095                                        if is_creation {
3096                                            check_not_already_bound(
3097                                                v,
3098                                                &vars_in_scope,
3099                                                &create_vars,
3100                                            )?;
3101                                            create_vars.push(v);
3102                                        } else if is_standalone_node
3103                                            && is_var_in_scope(&vars_in_scope, v)
3104                                            && !create_introduced_vars.contains(v)
3105                                        {
3106                                            // Standalone bare node referencing a variable from a
3107                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3108                                            // Bare nodes used as relationship endpoints
3109                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3110                                            return Err(anyhow!(
3111                                                "SyntaxError: VariableAlreadyBound - '{}'",
3112                                                v
3113                                            ));
3114                                        } else if !create_vars.contains(&v) {
3115                                            // New bare variable — register it
3116                                            create_vars.push(v);
3117                                        }
3118                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3119                                    }
3120                                }
3121                                PatternElement::Relationship(r) => {
3122                                    validate_property_variables(
3123                                        &r.properties,
3124                                        &vars_in_scope,
3125                                        &create_vars,
3126                                    )?;
3127
3128                                    if let Some(v) = r.variable.as_deref()
3129                                        && !v.is_empty()
3130                                    {
3131                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3132                                        create_vars.push(v);
3133                                    }
3134
3135                                    // Validate relationship constraints for CREATE
3136                                    if r.types.len() != 1 {
3137                                        return Err(anyhow!(
3138                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3139                                        ));
3140                                    }
3141                                    if r.direction == Direction::Both {
3142                                        return Err(anyhow!(
3143                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3144                                        ));
3145                                    }
3146                                    if r.range.is_some() {
3147                                        return Err(anyhow!(
3148                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3149                                        ));
3150                                    }
3151                                }
3152                                PatternElement::Parenthesized { .. } => {}
3153                            }
3154                        }
3155                    }
3156
3157                    // Batch consecutive CREATEs to avoid deep recursion
3158                    match &mut plan {
3159                        LogicalPlan::CreateBatch { patterns, .. } => {
3160                            // Append to existing batch
3161                            patterns.push(create_clause.pattern.clone());
3162                        }
3163                        LogicalPlan::Create { input, pattern } => {
3164                            // Convert single Create to CreateBatch with both patterns
3165                            let first_pattern = pattern.clone();
3166                            plan = LogicalPlan::CreateBatch {
3167                                input: input.clone(),
3168                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3169                            };
3170                        }
3171                        _ => {
3172                            // Start new Create (may become batch if more CREATEs follow)
3173                            plan = LogicalPlan::Create {
3174                                input: Box::new(plan),
3175                                pattern: create_clause.pattern.clone(),
3176                            };
3177                        }
3178                    }
3179                    // Add variables from created nodes and relationships to scope
3180                    for path in &create_clause.pattern.paths {
3181                        for element in &path.elements {
3182                            match element {
3183                                PatternElement::Node(n) => {
3184                                    if let Some(var) = &n.variable
3185                                        && !var.is_empty()
3186                                    {
3187                                        create_introduced_vars.insert(var.clone());
3188                                        add_var_to_scope(
3189                                            &mut vars_in_scope,
3190                                            var,
3191                                            VariableType::Node,
3192                                        )?;
3193                                    }
3194                                }
3195                                PatternElement::Relationship(r) => {
3196                                    if let Some(var) = &r.variable
3197                                        && !var.is_empty()
3198                                    {
3199                                        create_introduced_vars.insert(var.clone());
3200                                        add_var_to_scope(
3201                                            &mut vars_in_scope,
3202                                            var,
3203                                            VariableType::Edge,
3204                                        )?;
3205                                    }
3206                                }
3207                                PatternElement::Parenthesized { .. } => {
3208                                    // Skip for now - not commonly used in CREATE
3209                                }
3210                            }
3211                        }
3212                    }
3213                }
3214                Clause::Set(set_clause) => {
3215                    // Validate SET value expressions
3216                    for item in &set_clause.items {
3217                        match item {
3218                            SetItem::Property { value, .. }
3219                            | SetItem::Variable { value, .. }
3220                            | SetItem::VariablePlus { value, .. } => {
3221                                validate_expression_variables(value, &vars_in_scope)?;
3222                                validate_expression(value, &vars_in_scope)?;
3223                                if contains_pattern_predicate(value) {
3224                                    return Err(anyhow!(
3225                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3226                                    ));
3227                                }
3228                            }
3229                            SetItem::Labels { .. } => {}
3230                        }
3231                    }
3232                    plan = LogicalPlan::Set {
3233                        input: Box::new(plan),
3234                        items: set_clause.items.clone(),
3235                    };
3236                }
3237                Clause::Remove(remove_clause) => {
3238                    plan = LogicalPlan::Remove {
3239                        input: Box::new(plan),
3240                        items: remove_clause.items.clone(),
3241                    };
3242                }
3243                Clause::Delete(delete_clause) => {
3244                    // Validate DELETE targets
3245                    for item in &delete_clause.items {
3246                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3247                        if matches!(item, Expr::LabelCheck { .. }) {
3248                            return Err(anyhow!(
3249                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3250                            ));
3251                        }
3252                        let vars_used = collect_expr_variables(item);
3253                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3254                        if vars_used.is_empty() {
3255                            return Err(anyhow!(
3256                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3257                            ));
3258                        }
3259                        for var in &vars_used {
3260                            // Check if variable is defined
3261                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3262                                return Err(anyhow!(
3263                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3264                                    var
3265                                ));
3266                            }
3267                        }
3268                        // Strict type check only for simple variable references —
3269                        // complex expressions (property access, array index, etc.)
3270                        // may resolve to a node/edge at runtime even if the base
3271                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3272                        if let Expr::Variable(name) = item
3273                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3274                            && matches!(
3275                                info.var_type,
3276                                VariableType::Scalar | VariableType::ScalarLiteral
3277                            )
3278                        {
3279                            return Err(anyhow!(
3280                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3281                                name
3282                            ));
3283                        }
3284                    }
3285                    // Track deleted variables for later validation
3286                    for item in &delete_clause.items {
3287                        if let Expr::Variable(name) = item {
3288                            deleted_vars.insert(name.clone());
3289                        }
3290                    }
3291                    plan = LogicalPlan::Delete {
3292                        input: Box::new(plan),
3293                        items: delete_clause.items.clone(),
3294                        detach: delete_clause.detach,
3295                    };
3296                }
3297                Clause::With(with_clause) => {
3298                    let (new_plan, new_vars) =
3299                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3300                    plan = new_plan;
3301                    vars_in_scope = new_vars;
3302                }
3303                Clause::WithRecursive(with_recursive) => {
3304                    // Plan the recursive CTE
3305                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3306                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3307                    add_var_to_scope(
3308                        &mut vars_in_scope,
3309                        &with_recursive.name,
3310                        VariableType::Scalar,
3311                    )?;
3312                }
3313                Clause::Return(return_clause) => {
3314                    // Check for property/label access on deleted entities
3315                    if !deleted_vars.is_empty() {
3316                        for item in &return_clause.items {
3317                            if let ReturnItem::Expr { expr, .. } = item {
3318                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3319                            }
3320                        }
3321                    }
3322                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3323                } // All Clause variants are handled above - no catch-all needed
3324            }
3325        }
3326
3327        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3328        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3329        // If RETURN was used, the plan will have been wrapped in Project, so we only
3330        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3331        let plan = match &plan {
3332            LogicalPlan::Create { .. }
3333            | LogicalPlan::CreateBatch { .. }
3334            | LogicalPlan::Delete { .. }
3335            | LogicalPlan::Set { .. }
3336            | LogicalPlan::Remove { .. }
3337            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3338                input: Box::new(plan),
3339                skip: None,
3340                fetch: Some(0),
3341            },
3342            _ => plan,
3343        };
3344
3345        Ok(plan)
3346    }
3347
3348    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3349        match expr {
3350            Expr::Property(_, _) => {
3351                if !collected
3352                    .iter()
3353                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3354                {
3355                    collected.push(expr.clone());
3356                }
3357            }
3358            Expr::Variable(_) => {
3359                // Variables are already available, don't need to project them
3360            }
3361            Expr::BinaryOp { left, right, .. } => {
3362                Self::collect_properties_from_expr(left, collected);
3363                Self::collect_properties_from_expr(right, collected);
3364            }
3365            Expr::FunctionCall {
3366                args, window_spec, ..
3367            } => {
3368                for arg in args {
3369                    Self::collect_properties_from_expr(arg, collected);
3370                }
3371                if let Some(spec) = window_spec {
3372                    for partition_expr in &spec.partition_by {
3373                        Self::collect_properties_from_expr(partition_expr, collected);
3374                    }
3375                    for sort_item in &spec.order_by {
3376                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3377                    }
3378                }
3379            }
3380            Expr::List(items) => {
3381                for item in items {
3382                    Self::collect_properties_from_expr(item, collected);
3383                }
3384            }
3385            Expr::UnaryOp { expr: e, .. }
3386            | Expr::IsNull(e)
3387            | Expr::IsNotNull(e)
3388            | Expr::IsUnique(e) => {
3389                Self::collect_properties_from_expr(e, collected);
3390            }
3391            Expr::Case {
3392                expr,
3393                when_then,
3394                else_expr,
3395            } => {
3396                if let Some(e) = expr {
3397                    Self::collect_properties_from_expr(e, collected);
3398                }
3399                for (w, t) in when_then {
3400                    Self::collect_properties_from_expr(w, collected);
3401                    Self::collect_properties_from_expr(t, collected);
3402                }
3403                if let Some(e) = else_expr {
3404                    Self::collect_properties_from_expr(e, collected);
3405                }
3406            }
3407            Expr::In { expr, list } => {
3408                Self::collect_properties_from_expr(expr, collected);
3409                Self::collect_properties_from_expr(list, collected);
3410            }
3411            Expr::ArrayIndex { array, index } => {
3412                Self::collect_properties_from_expr(array, collected);
3413                Self::collect_properties_from_expr(index, collected);
3414            }
3415            Expr::ArraySlice { array, start, end } => {
3416                Self::collect_properties_from_expr(array, collected);
3417                if let Some(s) = start {
3418                    Self::collect_properties_from_expr(s, collected);
3419                }
3420                if let Some(e) = end {
3421                    Self::collect_properties_from_expr(e, collected);
3422                }
3423            }
3424            _ => {}
3425        }
3426    }
3427
3428    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3429        if let Expr::FunctionCall { window_spec, .. } = expr {
3430            // Collect any function with a window spec (OVER clause)
3431            if window_spec.is_some() {
3432                if !collected
3433                    .iter()
3434                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3435                {
3436                    collected.push(expr.clone());
3437                }
3438                return;
3439            }
3440        }
3441
3442        match expr {
3443            Expr::BinaryOp { left, right, .. } => {
3444                Self::collect_window_functions(left, collected);
3445                Self::collect_window_functions(right, collected);
3446            }
3447            Expr::FunctionCall { args, .. } => {
3448                for arg in args {
3449                    Self::collect_window_functions(arg, collected);
3450                }
3451            }
3452            Expr::List(items) => {
3453                for i in items {
3454                    Self::collect_window_functions(i, collected);
3455                }
3456            }
3457            Expr::Map(items) => {
3458                for (_, i) in items {
3459                    Self::collect_window_functions(i, collected);
3460                }
3461            }
3462            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
3463                Self::collect_window_functions(e, collected);
3464            }
3465            Expr::Case {
3466                expr,
3467                when_then,
3468                else_expr,
3469            } => {
3470                if let Some(e) = expr {
3471                    Self::collect_window_functions(e, collected);
3472                }
3473                for (w, t) in when_then {
3474                    Self::collect_window_functions(w, collected);
3475                    Self::collect_window_functions(t, collected);
3476                }
3477                if let Some(e) = else_expr {
3478                    Self::collect_window_functions(e, collected);
3479                }
3480            }
3481            Expr::Reduce {
3482                init, list, expr, ..
3483            } => {
3484                Self::collect_window_functions(init, collected);
3485                Self::collect_window_functions(list, collected);
3486                Self::collect_window_functions(expr, collected);
3487            }
3488            Expr::Quantifier {
3489                list, predicate, ..
3490            } => {
3491                Self::collect_window_functions(list, collected);
3492                Self::collect_window_functions(predicate, collected);
3493            }
3494            Expr::In { expr, list } => {
3495                Self::collect_window_functions(expr, collected);
3496                Self::collect_window_functions(list, collected);
3497            }
3498            Expr::ArrayIndex { array, index } => {
3499                Self::collect_window_functions(array, collected);
3500                Self::collect_window_functions(index, collected);
3501            }
3502            Expr::ArraySlice { array, start, end } => {
3503                Self::collect_window_functions(array, collected);
3504                if let Some(s) = start {
3505                    Self::collect_window_functions(s, collected);
3506                }
3507                if let Some(e) = end {
3508                    Self::collect_window_functions(e, collected);
3509                }
3510            }
3511            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
3512            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
3513            _ => {}
3514        }
3515    }
3516
3517    /// Transform property expressions in manual window functions to use qualified variable names.
3518    ///
3519    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
3520    /// so the executor can look up values directly from the row HashMap after the
3521    /// intermediate projection has materialized these properties with qualified names.
3522    ///
3523    /// Transforms ALL window functions (both manual and aggregate).
3524    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
3525    fn transform_window_expr_properties(expr: Expr) -> Expr {
3526        let Expr::FunctionCall {
3527            name,
3528            args,
3529            window_spec: Some(spec),
3530            distinct,
3531        } = expr
3532        else {
3533            return expr;
3534        };
3535
3536        // Transform arguments for ALL window functions
3537        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
3538        let transformed_args = args
3539            .into_iter()
3540            .map(Self::transform_property_to_variable)
3541            .collect();
3542
3543        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
3544        let transformed_partition_by = spec
3545            .partition_by
3546            .into_iter()
3547            .map(Self::transform_property_to_variable)
3548            .collect();
3549
3550        let transformed_order_by = spec
3551            .order_by
3552            .into_iter()
3553            .map(|item| SortItem {
3554                expr: Self::transform_property_to_variable(item.expr),
3555                ascending: item.ascending,
3556            })
3557            .collect();
3558
3559        Expr::FunctionCall {
3560            name,
3561            args: transformed_args,
3562            window_spec: Some(WindowSpec {
3563                partition_by: transformed_partition_by,
3564                order_by: transformed_order_by,
3565            }),
3566            distinct,
3567        }
3568    }
3569
3570    /// Transform a property expression to a variable expression with qualified name.
3571    ///
3572    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
3573    fn transform_property_to_variable(expr: Expr) -> Expr {
3574        let Expr::Property(base, prop) = expr else {
3575            return expr;
3576        };
3577
3578        match *base {
3579            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
3580            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
3581        }
3582    }
3583
3584    /// Transform VALID_AT macro into function call
3585    ///
3586    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
3587    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
3588    fn transform_valid_at_to_function(expr: Expr) -> Expr {
3589        match expr {
3590            Expr::ValidAt {
3591                entity,
3592                timestamp,
3593                start_prop,
3594                end_prop,
3595            } => {
3596                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
3597                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
3598
3599                Expr::FunctionCall {
3600                    name: "uni.temporal.validAt".to_string(),
3601                    args: vec![
3602                        Self::transform_valid_at_to_function(*entity),
3603                        Expr::Literal(CypherLiteral::String(start)),
3604                        Expr::Literal(CypherLiteral::String(end)),
3605                        Self::transform_valid_at_to_function(*timestamp),
3606                    ],
3607                    distinct: false,
3608                    window_spec: None,
3609                }
3610            }
3611            // Recursively transform nested expressions
3612            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
3613                left: Box::new(Self::transform_valid_at_to_function(*left)),
3614                op,
3615                right: Box::new(Self::transform_valid_at_to_function(*right)),
3616            },
3617            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
3618                op,
3619                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
3620            },
3621            Expr::FunctionCall {
3622                name,
3623                args,
3624                distinct,
3625                window_spec,
3626            } => Expr::FunctionCall {
3627                name,
3628                args: args
3629                    .into_iter()
3630                    .map(Self::transform_valid_at_to_function)
3631                    .collect(),
3632                distinct,
3633                window_spec,
3634            },
3635            Expr::Property(base, prop) => {
3636                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
3637            }
3638            Expr::List(items) => Expr::List(
3639                items
3640                    .into_iter()
3641                    .map(Self::transform_valid_at_to_function)
3642                    .collect(),
3643            ),
3644            Expr::In { expr, list } => Expr::In {
3645                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
3646                list: Box::new(Self::transform_valid_at_to_function(*list)),
3647            },
3648            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
3649            Expr::IsNotNull(e) => {
3650                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
3651            }
3652            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
3653            // Other cases: return as-is
3654            other => other,
3655        }
3656    }
3657
3658    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
3659    fn plan_match_clause(
3660        &self,
3661        match_clause: &MatchClause,
3662        plan: LogicalPlan,
3663        vars_in_scope: &mut Vec<VariableInfo>,
3664    ) -> Result<LogicalPlan> {
3665        let mut plan = plan;
3666
3667        if match_clause.pattern.paths.is_empty() {
3668            return Err(anyhow!("Empty pattern"));
3669        }
3670
3671        // Track variables introduced by this OPTIONAL MATCH
3672        let vars_before_pattern = vars_in_scope.len();
3673
3674        for path in &match_clause.pattern.paths {
3675            if let Some(mode) = &path.shortest_path_mode {
3676                plan =
3677                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
3678            } else {
3679                plan = self.plan_path(
3680                    path,
3681                    plan,
3682                    vars_in_scope,
3683                    match_clause.optional,
3684                    vars_before_pattern,
3685                )?;
3686            }
3687        }
3688
3689        // Collect variables introduced by this OPTIONAL MATCH pattern
3690        let optional_vars: HashSet<String> = if match_clause.optional {
3691            vars_in_scope[vars_before_pattern..]
3692                .iter()
3693                .map(|v| v.name.clone())
3694                .collect()
3695        } else {
3696            HashSet::new()
3697        };
3698
3699        // Handle WHERE clause with vector_similarity and predicate pushdown
3700        if let Some(predicate) = &match_clause.where_clause {
3701            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
3702        }
3703
3704        Ok(plan)
3705    }
3706
3707    /// Plan a shortestPath pattern.
3708    fn plan_shortest_path(
3709        &self,
3710        path: &PathPattern,
3711        plan: LogicalPlan,
3712        vars_in_scope: &mut Vec<VariableInfo>,
3713        mode: &ShortestPathMode,
3714        _vars_before_pattern: usize,
3715    ) -> Result<LogicalPlan> {
3716        let mut plan = plan;
3717        let elements = &path.elements;
3718
3719        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
3720        if elements.len() < 3 || elements.len().is_multiple_of(2) {
3721            return Err(anyhow!(
3722                "shortestPath requires at least one relationship: (a)-[*]->(b)"
3723            ));
3724        }
3725
3726        let source_node = match &elements[0] {
3727            PatternElement::Node(n) => n,
3728            _ => return Err(anyhow!("ShortestPath must start with a node")),
3729        };
3730        let rel = match &elements[1] {
3731            PatternElement::Relationship(r) => r,
3732            _ => {
3733                return Err(anyhow!(
3734                    "ShortestPath middle element must be a relationship"
3735                ));
3736            }
3737        };
3738        let target_node = match &elements[2] {
3739            PatternElement::Node(n) => n,
3740            _ => return Err(anyhow!("ShortestPath must end with a node")),
3741        };
3742
3743        let source_var = source_node
3744            .variable
3745            .clone()
3746            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
3747        let target_var = target_node
3748            .variable
3749            .clone()
3750            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
3751        let path_var = path
3752            .variable
3753            .clone()
3754            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
3755
3756        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
3757        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
3758
3759        // Plan source node if not bound
3760        if !source_bound {
3761            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
3762        } else if let Some(prop_filter) =
3763            self.properties_to_expr(&source_var, &source_node.properties)
3764        {
3765            plan = LogicalPlan::Filter {
3766                input: Box::new(plan),
3767                predicate: prop_filter,
3768                optional_variables: HashSet::new(),
3769            };
3770        }
3771
3772        // Plan target node if not bound
3773        let target_label_id = if !target_bound {
3774            // Use first label for target_label_id
3775            let target_label_name = target_node
3776                .labels
3777                .first()
3778                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
3779            let target_label_meta = self
3780                .schema
3781                .get_label_case_insensitive(target_label_name)
3782                .ok_or_else(|| anyhow!("Label {} not found", target_label_name))?;
3783
3784            let target_scan = LogicalPlan::Scan {
3785                label_id: target_label_meta.id,
3786                labels: target_node.labels.clone(),
3787                variable: target_var.clone(),
3788                filter: self.properties_to_expr(&target_var, &target_node.properties),
3789                optional: false,
3790            };
3791
3792            plan = Self::join_with_plan(plan, target_scan);
3793            target_label_meta.id
3794        } else {
3795            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
3796            {
3797                plan = LogicalPlan::Filter {
3798                    input: Box::new(plan),
3799                    predicate: prop_filter,
3800                    optional_variables: HashSet::new(),
3801                };
3802            }
3803            0 // Wildcard for already-bound target
3804        };
3805
3806        // Add ShortestPath operator
3807        let edge_type_ids = if rel.types.is_empty() {
3808            // If no type specified, fetch all edge types (both schema and schemaless)
3809            self.schema.all_edge_type_ids()
3810        } else {
3811            let mut ids = Vec::new();
3812            for type_name in &rel.types {
3813                let edge_meta = self
3814                    .schema
3815                    .edge_types
3816                    .get(type_name)
3817                    .ok_or_else(|| anyhow!("Edge type {} not found", type_name))?;
3818                ids.push(edge_meta.id);
3819            }
3820            ids
3821        };
3822
3823        // Extract hop constraints from relationship pattern
3824        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
3825        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
3826
3827        let sp_plan = match mode {
3828            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
3829                input: Box::new(plan),
3830                edge_type_ids,
3831                direction: rel.direction.clone(),
3832                source_variable: source_var.clone(),
3833                target_variable: target_var.clone(),
3834                target_label_id,
3835                path_variable: path_var.clone(),
3836                min_hops,
3837                max_hops,
3838            },
3839            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
3840                input: Box::new(plan),
3841                edge_type_ids,
3842                direction: rel.direction.clone(),
3843                source_variable: source_var.clone(),
3844                target_variable: target_var.clone(),
3845                target_label_id,
3846                path_variable: path_var.clone(),
3847                min_hops,
3848                max_hops,
3849            },
3850        };
3851
3852        if !source_bound {
3853            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
3854        }
3855        if !target_bound {
3856            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
3857        }
3858        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
3859
3860        Ok(sp_plan)
3861    }
3862    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
3863    ///
3864    /// This is a public entry point for the Locy plan builder to reuse the
3865    /// existing pattern-planning logic for clause bodies.
3866    pub fn plan_pattern(
3867        &self,
3868        pattern: &Pattern,
3869        initial_vars: &[VariableInfo],
3870    ) -> Result<LogicalPlan> {
3871        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
3872        let vars_before_pattern = vars_in_scope.len();
3873        let mut plan = LogicalPlan::Empty;
3874        for path in &pattern.paths {
3875            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
3876        }
3877        Ok(plan)
3878    }
3879
3880    /// Plan a regular MATCH path (not shortestPath).
3881    fn plan_path(
3882        &self,
3883        path: &PathPattern,
3884        plan: LogicalPlan,
3885        vars_in_scope: &mut Vec<VariableInfo>,
3886        optional: bool,
3887        vars_before_pattern: usize,
3888    ) -> Result<LogicalPlan> {
3889        let mut plan = plan;
3890        let elements = &path.elements;
3891        let mut i = 0;
3892
3893        let path_variable = path.variable.clone();
3894
3895        // Check for VariableAlreadyBound: path variable already in scope
3896        if let Some(pv) = &path_variable
3897            && !pv.is_empty()
3898            && is_var_in_scope(vars_in_scope, pv)
3899        {
3900            return Err(anyhow!(
3901                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3902                pv
3903            ));
3904        }
3905
3906        // Check for VariableAlreadyBound: path variable conflicts with element variables
3907        if let Some(pv) = &path_variable
3908            && !pv.is_empty()
3909        {
3910            for element in elements {
3911                match element {
3912                    PatternElement::Node(n) => {
3913                        if let Some(v) = &n.variable
3914                            && v == pv
3915                        {
3916                            return Err(anyhow!(
3917                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3918                                pv
3919                            ));
3920                        }
3921                    }
3922                    PatternElement::Relationship(r) => {
3923                        if let Some(v) = &r.variable
3924                            && v == pv
3925                        {
3926                            return Err(anyhow!(
3927                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3928                                pv
3929                            ));
3930                        }
3931                    }
3932                    PatternElement::Parenthesized { .. } => {}
3933                }
3934            }
3935        }
3936
3937        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
3938        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
3939        let mut optional_pattern_vars: HashSet<String> = if optional {
3940            let mut vars = HashSet::new();
3941            for element in elements {
3942                match element {
3943                    PatternElement::Node(n) => {
3944                        if let Some(v) = &n.variable
3945                            && !v.is_empty()
3946                            && !is_var_in_scope(vars_in_scope, v)
3947                        {
3948                            vars.insert(v.clone());
3949                        }
3950                    }
3951                    PatternElement::Relationship(r) => {
3952                        if let Some(v) = &r.variable
3953                            && !v.is_empty()
3954                            && !is_var_in_scope(vars_in_scope, v)
3955                        {
3956                            vars.insert(v.clone());
3957                        }
3958                    }
3959                    PatternElement::Parenthesized { pattern, .. } => {
3960                        // Also check nested patterns
3961                        for nested_elem in &pattern.elements {
3962                            match nested_elem {
3963                                PatternElement::Node(n) => {
3964                                    if let Some(v) = &n.variable
3965                                        && !v.is_empty()
3966                                        && !is_var_in_scope(vars_in_scope, v)
3967                                    {
3968                                        vars.insert(v.clone());
3969                                    }
3970                                }
3971                                PatternElement::Relationship(r) => {
3972                                    if let Some(v) = &r.variable
3973                                        && !v.is_empty()
3974                                        && !is_var_in_scope(vars_in_scope, v)
3975                                    {
3976                                        vars.insert(v.clone());
3977                                    }
3978                                }
3979                                _ => {}
3980                            }
3981                        }
3982                    }
3983                }
3984            }
3985            // Include path variable if present
3986            if let Some(pv) = &path_variable
3987                && !pv.is_empty()
3988            {
3989                vars.insert(pv.clone());
3990            }
3991            vars
3992        } else {
3993            HashSet::new()
3994        };
3995
3996        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
3997        // These must participate in Trail mode (relationship uniqueness) enforcement
3998        // across ALL segments in this path, so that VLP segments like [*0..1] don't
3999        // traverse through edges already claimed by a bound relationship [r].
4000        let path_bound_edge_vars: HashSet<String> = {
4001            let mut bound = HashSet::new();
4002            for element in elements {
4003                if let PatternElement::Relationship(rel) = element
4004                    && let Some(ref var_name) = rel.variable
4005                    && !var_name.is_empty()
4006                    && vars_in_scope[..vars_before_pattern]
4007                        .iter()
4008                        .any(|v| v.name == *var_name)
4009                {
4010                    bound.insert(var_name.clone());
4011                }
4012            }
4013            bound
4014        };
4015
4016        // Track if any traverses were added (for zero-length path detection)
4017        let mut had_traverses = false;
4018        // Track the node variable for zero-length path binding
4019        let mut single_node_variable: Option<String> = None;
4020        // Collect node/edge variables for BindPath (fixed-length path binding)
4021        let mut path_node_vars: Vec<String> = Vec::new();
4022        let mut path_edge_vars: Vec<String> = Vec::new();
4023        // Track the last processed outer node variable for QPP source binding.
4024        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
4025        let mut last_outer_node_var: Option<String> = None;
4026
4027        // Multi-hop path variables are now supported - path is accumulated across hops
4028        while i < elements.len() {
4029            let element = &elements[i];
4030            match element {
4031                PatternElement::Node(n) => {
4032                    let mut variable = n.variable.clone().unwrap_or_default();
4033                    if variable.is_empty() {
4034                        variable = self.next_anon_var();
4035                    }
4036                    // Track first node variable for zero-length path
4037                    if single_node_variable.is_none() {
4038                        single_node_variable = Some(variable.clone());
4039                    }
4040                    let is_bound =
4041                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4042                    if optional && !is_bound {
4043                        optional_pattern_vars.insert(variable.clone());
4044                    }
4045
4046                    if is_bound {
4047                        // Check for type conflict - can't use an Edge/Path as a Node
4048                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4049                            && !info.var_type.is_compatible_with(VariableType::Node)
4050                        {
4051                            return Err(anyhow!(
4052                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4053                                variable,
4054                                info.var_type
4055                            ));
4056                        }
4057                        if let Some(node_filter) =
4058                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4059                        {
4060                            plan = LogicalPlan::Filter {
4061                                input: Box::new(plan),
4062                                predicate: node_filter,
4063                                optional_variables: HashSet::new(),
4064                            };
4065                        }
4066                    } else {
4067                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4068                        if !variable.is_empty() {
4069                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4070                        }
4071                    }
4072
4073                    // Track source node for BindPath
4074                    if path_variable.is_some() && path_node_vars.is_empty() {
4075                        path_node_vars.push(variable.clone());
4076                    }
4077
4078                    // Look ahead for relationships
4079                    let mut current_source_var = variable;
4080                    last_outer_node_var = Some(current_source_var.clone());
4081                    i += 1;
4082                    while i < elements.len() {
4083                        if let PatternElement::Relationship(r) = &elements[i] {
4084                            if i + 1 < elements.len() {
4085                                let target_node_part = &elements[i + 1];
4086                                if let PatternElement::Node(n_target) = target_node_part {
4087                                    // For VLP traversals, pass path_variable through
4088                                    // For fixed-length, we use BindPath instead
4089                                    let is_vlp = r.range.is_some();
4090                                    let traverse_path_var =
4091                                        if is_vlp { path_variable.clone() } else { None };
4092
4093                                    // If we're about to start a VLP segment and there are
4094                                    // collected fixed-hop path vars, create an intermediate
4095                                    // BindPath for the fixed prefix first. The VLP will then
4096                                    // extend this existing path.
4097                                    if is_vlp
4098                                        && let Some(pv) = path_variable.as_ref()
4099                                        && !path_node_vars.is_empty()
4100                                    {
4101                                        plan = LogicalPlan::BindPath {
4102                                            input: Box::new(plan),
4103                                            node_variables: std::mem::take(&mut path_node_vars),
4104                                            edge_variables: std::mem::take(&mut path_edge_vars),
4105                                            path_variable: pv.clone(),
4106                                        };
4107                                        if !is_var_in_scope(vars_in_scope, pv) {
4108                                            add_var_to_scope(
4109                                                vars_in_scope,
4110                                                pv,
4111                                                VariableType::Path,
4112                                            )?;
4113                                        }
4114                                    }
4115
4116                                    // Plan the traverse from the current source node
4117                                    let target_was_bound =
4118                                        n_target.variable.as_ref().is_some_and(|v| {
4119                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4120                                        });
4121                                    let (new_plan, target_var, effective_target) = self
4122                                        .plan_traverse_with_source(
4123                                            plan,
4124                                            vars_in_scope,
4125                                            TraverseParams {
4126                                                rel: r,
4127                                                target_node: n_target,
4128                                                optional,
4129                                                path_variable: traverse_path_var,
4130                                                optional_pattern_vars: optional_pattern_vars
4131                                                    .clone(),
4132                                            },
4133                                            &current_source_var,
4134                                            vars_before_pattern,
4135                                            &path_bound_edge_vars,
4136                                        )?;
4137                                    plan = new_plan;
4138                                    if optional && !target_was_bound {
4139                                        optional_pattern_vars.insert(target_var.clone());
4140                                    }
4141
4142                                    // Track edge/target node for BindPath
4143                                    if path_variable.is_some() && !is_vlp {
4144                                        // Use the edge variable if given, otherwise use
4145                                        // the internal tracking column pattern.
4146                                        // Use effective_target (which may be __rebound_x
4147                                        // for bound-target traversals) to match the actual
4148                                        // column name produced by GraphTraverseExec.
4149                                        if let Some(ev) = &r.variable {
4150                                            path_edge_vars.push(ev.clone());
4151                                        } else {
4152                                            path_edge_vars
4153                                                .push(format!("__eid_to_{}", effective_target));
4154                                        }
4155                                        path_node_vars.push(target_var.clone());
4156                                    }
4157
4158                                    current_source_var = target_var;
4159                                    last_outer_node_var = Some(current_source_var.clone());
4160                                    had_traverses = true;
4161                                    i += 2;
4162                                } else {
4163                                    return Err(anyhow!("Relationship must be followed by a node"));
4164                                }
4165                            } else {
4166                                return Err(anyhow!("Relationship cannot be the last element"));
4167                            }
4168                        } else {
4169                            break;
4170                        }
4171                    }
4172                }
4173                PatternElement::Relationship(_) => {
4174                    return Err(anyhow!("Pattern must start with a node"));
4175                }
4176                PatternElement::Parenthesized { pattern, range } => {
4177                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4178                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4179                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4180                        return Err(anyhow!(
4181                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4182                        ));
4183                    }
4184
4185                    let source_node = match &pattern.elements[0] {
4186                        PatternElement::Node(n) => n,
4187                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4188                    };
4189
4190                    // Extract all relationship-node pairs (QPP steps)
4191                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4192                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4193                        let rel = match &pattern.elements[pair_idx] {
4194                            PatternElement::Relationship(r) => r,
4195                            _ => {
4196                                return Err(anyhow!(
4197                                    "Quantified pattern element at position {} must be a relationship",
4198                                    pair_idx
4199                                ));
4200                            }
4201                        };
4202                        let node = match &pattern.elements[pair_idx + 1] {
4203                            PatternElement::Node(n) => n,
4204                            _ => {
4205                                return Err(anyhow!(
4206                                    "Quantified pattern element at position {} must be a node",
4207                                    pair_idx + 1
4208                                ));
4209                            }
4210                        };
4211                        // Reject nested quantifiers
4212                        if rel.range.is_some() {
4213                            return Err(anyhow!(
4214                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4215                            ));
4216                        }
4217                        qpp_rels.push((rel, node));
4218                    }
4219
4220                    // Check if there's an outer target node after the Parenthesized element.
4221                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4222                    // target that should receive the traversal result.
4223                    let inner_target_node = qpp_rels.last().unwrap().1;
4224                    let outer_target_node = if i + 1 < elements.len() {
4225                        match &elements[i + 1] {
4226                            PatternElement::Node(n) => Some(n),
4227                            _ => None,
4228                        }
4229                    } else {
4230                        None
4231                    };
4232                    // Use the outer target for variable binding and filters; inner target
4233                    // labels are used for state constraints within the NFA.
4234                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4235
4236                    // For simple 3-element single-hop QPP without intermediate label constraints,
4237                    // fall back to existing VLP behavior (copy range to relationship).
4238                    let use_simple_vlp = qpp_rels.len() == 1
4239                        && inner_target_node
4240                            .labels
4241                            .first()
4242                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4243                            .is_none();
4244
4245                    // Plan source node.
4246                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4247                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4248                    // node variable, use it; otherwise fall back to the inner source.
4249                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4250                        // The preceding outer node is already bound and in scope
4251                        // Apply any property filters from the inner source node
4252                        if let Some(prop_filter) =
4253                            self.properties_to_expr(outer_src, &source_node.properties)
4254                        {
4255                            plan = LogicalPlan::Filter {
4256                                input: Box::new(plan),
4257                                predicate: prop_filter,
4258                                optional_variables: HashSet::new(),
4259                            };
4260                        }
4261                        outer_src.clone()
4262                    } else {
4263                        let sv = source_node
4264                            .variable
4265                            .clone()
4266                            .filter(|v| !v.is_empty())
4267                            .unwrap_or_else(|| self.next_anon_var());
4268
4269                        if is_var_in_scope(vars_in_scope, &sv) {
4270                            // Source is already bound, apply property filter if needed
4271                            if let Some(prop_filter) =
4272                                self.properties_to_expr(&sv, &source_node.properties)
4273                            {
4274                                plan = LogicalPlan::Filter {
4275                                    input: Box::new(plan),
4276                                    predicate: prop_filter,
4277                                    optional_variables: HashSet::new(),
4278                                };
4279                            }
4280                        } else {
4281                            // Source is unbound, scan it
4282                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4283                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4284                            if optional {
4285                                optional_pattern_vars.insert(sv.clone());
4286                            }
4287                        }
4288                        sv
4289                    };
4290
4291                    if use_simple_vlp {
4292                        // Simple single-hop QPP: apply range to relationship and use VLP path
4293                        let mut relationship = qpp_rels[0].0.clone();
4294                        relationship.range = range.clone();
4295
4296                        let target_was_bound = target_node
4297                            .variable
4298                            .as_ref()
4299                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4300                        let (new_plan, target_var, _effective_target) = self
4301                            .plan_traverse_with_source(
4302                                plan,
4303                                vars_in_scope,
4304                                TraverseParams {
4305                                    rel: &relationship,
4306                                    target_node,
4307                                    optional,
4308                                    path_variable: path_variable.clone(),
4309                                    optional_pattern_vars: optional_pattern_vars.clone(),
4310                                },
4311                                &source_variable,
4312                                vars_before_pattern,
4313                                &path_bound_edge_vars,
4314                            )?;
4315                        plan = new_plan;
4316                        if optional && !target_was_bound {
4317                            optional_pattern_vars.insert(target_var);
4318                        }
4319                    } else {
4320                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4321                        let mut qpp_step_infos = Vec::new();
4322                        let mut all_edge_type_ids = Vec::new();
4323
4324                        for (rel, node) in &qpp_rels {
4325                            let mut step_edge_type_ids = Vec::new();
4326                            if rel.types.is_empty() {
4327                                step_edge_type_ids = self.schema.all_edge_type_ids();
4328                            } else {
4329                                for type_name in &rel.types {
4330                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4331                                        step_edge_type_ids.push(edge_meta.id);
4332                                    }
4333                                }
4334                            }
4335                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4336
4337                            let target_label = node.labels.first().and_then(|l| {
4338                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4339                            });
4340
4341                            qpp_step_infos.push(QppStepInfo {
4342                                edge_type_ids: step_edge_type_ids,
4343                                direction: rel.direction.clone(),
4344                                target_label,
4345                            });
4346                        }
4347
4348                        // Deduplicate edge type IDs for adjacency warming
4349                        all_edge_type_ids.sort_unstable();
4350                        all_edge_type_ids.dedup();
4351
4352                        // Compute iteration bounds from range
4353                        let hops_per_iter = qpp_step_infos.len();
4354                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4355                        let (min_iter, max_iter) = if let Some(range) = range {
4356                            let min = range.min.unwrap_or(1) as usize;
4357                            let max = range
4358                                .max
4359                                .map(|m| m as usize)
4360                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4361                            (min, max)
4362                        } else {
4363                            (1, 1)
4364                        };
4365                        let min_hops = min_iter * hops_per_iter;
4366                        let max_hops = max_iter * hops_per_iter;
4367
4368                        // Target variable from the last node in the QPP sub-pattern
4369                        let target_variable = target_node
4370                            .variable
4371                            .clone()
4372                            .filter(|v| !v.is_empty())
4373                            .unwrap_or_else(|| self.next_anon_var());
4374
4375                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
4376
4377                        // Determine target label for the final node
4378                        let target_label_meta = target_node
4379                            .labels
4380                            .first()
4381                            .and_then(|l| self.schema.get_label_case_insensitive(l));
4382
4383                        // Collect scope match variables
4384                        let mut scope_match_variables: HashSet<String> = vars_in_scope
4385                            [vars_before_pattern..]
4386                            .iter()
4387                            .map(|v| v.name.clone())
4388                            .collect();
4389                        scope_match_variables.insert(target_variable.clone());
4390
4391                        // Handle bound target: use rebound variable for traverse
4392                        let rebound_target_var = if target_is_bound {
4393                            Some(target_variable.clone())
4394                        } else {
4395                            None
4396                        };
4397                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4398                            format!("__rebound_{}", bv)
4399                        } else {
4400                            target_variable.clone()
4401                        };
4402
4403                        plan = LogicalPlan::Traverse {
4404                            input: Box::new(plan),
4405                            edge_type_ids: all_edge_type_ids,
4406                            direction: qpp_rels[0].0.direction.clone(),
4407                            source_variable: source_variable.to_string(),
4408                            target_variable: effective_target_var.clone(),
4409                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4410                            step_variable: None, // QPP doesn't expose intermediate edges
4411                            min_hops,
4412                            max_hops,
4413                            optional,
4414                            target_filter: self.node_filter_expr(
4415                                &target_variable,
4416                                &target_node.labels,
4417                                &target_node.properties,
4418                            ),
4419                            path_variable: path_variable.clone(),
4420                            edge_properties: HashSet::new(),
4421                            is_variable_length: true,
4422                            optional_pattern_vars: optional_pattern_vars.clone(),
4423                            scope_match_variables,
4424                            edge_filter_expr: None,
4425                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4426                            qpp_steps: Some(qpp_step_infos),
4427                        };
4428
4429                        // Handle bound target: filter rebound results against original variable
4430                        if let Some(ref btv) = rebound_target_var {
4431                            // Filter: __rebound_x._vid = x._vid
4432                            let filter_pred = Expr::BinaryOp {
4433                                left: Box::new(Expr::Property(
4434                                    Box::new(Expr::Variable(effective_target_var.clone())),
4435                                    "_vid".to_string(),
4436                                )),
4437                                op: BinaryOp::Eq,
4438                                right: Box::new(Expr::Property(
4439                                    Box::new(Expr::Variable(btv.clone())),
4440                                    "_vid".to_string(),
4441                                )),
4442                            };
4443                            plan = LogicalPlan::Filter {
4444                                input: Box::new(plan),
4445                                predicate: filter_pred,
4446                                optional_variables: if optional {
4447                                    optional_pattern_vars.clone()
4448                                } else {
4449                                    HashSet::new()
4450                                },
4451                            };
4452                        }
4453
4454                        // Add target variable to scope
4455                        if !target_is_bound {
4456                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4457                        }
4458
4459                        // Add path variable to scope
4460                        if let Some(ref pv) = path_variable
4461                            && !pv.is_empty()
4462                            && !is_var_in_scope(vars_in_scope, pv)
4463                        {
4464                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4465                        }
4466                    }
4467                    had_traverses = true;
4468
4469                    // Skip the outer target node if we consumed it
4470                    if outer_target_node.is_some() {
4471                        i += 2; // skip both Parenthesized and the following Node
4472                    } else {
4473                        i += 1;
4474                    }
4475                }
4476            }
4477        }
4478
4479        // If this is a single-node pattern with a path variable, bind the zero-length path
4480        // E.g., `p = (a)` should create a Path with one node and zero edges
4481        if let Some(ref path_var) = path_variable
4482            && !path_var.is_empty()
4483            && !had_traverses
4484            && let Some(node_var) = single_node_variable
4485        {
4486            plan = LogicalPlan::BindZeroLengthPath {
4487                input: Box::new(plan),
4488                node_variable: node_var,
4489                path_variable: path_var.clone(),
4490            };
4491            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
4492        }
4493
4494        // Bind fixed-length path from collected node/edge variables
4495        if let Some(ref path_var) = path_variable
4496            && !path_var.is_empty()
4497            && had_traverses
4498            && !path_node_vars.is_empty()
4499            && !is_var_in_scope(vars_in_scope, path_var)
4500        {
4501            plan = LogicalPlan::BindPath {
4502                input: Box::new(plan),
4503                node_variables: path_node_vars,
4504                edge_variables: path_edge_vars,
4505                path_variable: path_var.clone(),
4506            };
4507            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
4508        }
4509
4510        Ok(plan)
4511    }
4512
4513    /// Plan a traverse with an explicit source variable name.
4514    ///
4515    /// Returns `(plan, target_variable, effective_target_variable)` where:
4516    /// - `target_variable` is the semantic variable name for downstream scope
4517    /// - `effective_target_variable` is the actual column-name prefix used by
4518    ///   the traverse (may be `__rebound_x` for bound-target patterns)
4519    fn plan_traverse_with_source(
4520        &self,
4521        plan: LogicalPlan,
4522        vars_in_scope: &mut Vec<VariableInfo>,
4523        params: TraverseParams<'_>,
4524        source_variable: &str,
4525        vars_before_pattern: usize,
4526        path_bound_edge_vars: &HashSet<String>,
4527    ) -> Result<(LogicalPlan, String, String)> {
4528        // Check for parameter used as relationship predicate
4529        if let Some(Expr::Parameter(_)) = &params.rel.properties {
4530            return Err(anyhow!(
4531                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
4532            ));
4533        }
4534
4535        let mut edge_type_ids = Vec::new();
4536        let mut dst_labels = Vec::new();
4537        let mut unknown_types = Vec::new();
4538
4539        if params.rel.types.is_empty() {
4540            // All types - include both schema and schemaless edge types
4541            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
4542            edge_type_ids = self.schema.all_edge_type_ids();
4543            for meta in self.schema.edge_types.values() {
4544                dst_labels.extend(meta.dst_labels.iter().cloned());
4545            }
4546        } else {
4547            for type_name in &params.rel.types {
4548                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4549                    // Known type - use standard Traverse with type_id
4550                    edge_type_ids.push(edge_meta.id);
4551                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
4552                } else {
4553                    // Unknown type - will use TraverseMainByType
4554                    unknown_types.push(type_name.clone());
4555                }
4556            }
4557        }
4558
4559        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
4560        edge_type_ids.sort_unstable();
4561        edge_type_ids.dedup();
4562        unknown_types.sort_unstable();
4563        unknown_types.dedup();
4564
4565        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
4566        if target_variable.is_empty() {
4567            target_variable = self.next_anon_var();
4568        }
4569        let target_is_bound =
4570            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
4571
4572        // Check for VariableTypeConflict: relationship variable used as node
4573        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
4574        if let Some(rel_var) = &params.rel.variable
4575            && !rel_var.is_empty()
4576            && rel_var == &target_variable
4577        {
4578            return Err(anyhow!(
4579                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
4580                rel_var
4581            ));
4582        }
4583
4584        // Check for VariableTypeConflict/RelationshipUniquenessViolation
4585        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
4586        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
4587        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
4588        let mut bound_edge_var: Option<String> = None;
4589        let mut bound_edge_list_var: Option<String> = None;
4590        if let Some(rel_var) = &params.rel.variable
4591            && !rel_var.is_empty()
4592            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
4593        {
4594            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
4595                .iter()
4596                .any(|v| v.name == *rel_var);
4597
4598            if info.var_type == VariableType::Edge {
4599                // Check if this edge variable comes from a previous clause (before this MATCH)
4600                if is_from_previous_clause {
4601                    // Edge variable bound from previous clause - this is allowed
4602                    // We'll filter the traversal to match this specific edge
4603                    bound_edge_var = Some(rel_var.clone());
4604                } else {
4605                    // Same relationship variable used twice in the same MATCH clause
4606                    return Err(anyhow!(
4607                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
4608                        rel_var
4609                    ));
4610                }
4611            } else if params.rel.range.is_some()
4612                && is_from_previous_clause
4613                && matches!(
4614                    info.var_type,
4615                    VariableType::Scalar | VariableType::ScalarLiteral
4616                )
4617            {
4618                // Allow VLP rebound against a previously bound relationship list
4619                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
4620                bound_edge_list_var = Some(rel_var.clone());
4621            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
4622                return Err(anyhow!(
4623                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
4624                    rel_var,
4625                    info.var_type
4626                ));
4627            }
4628        }
4629
4630        // Check for VariableTypeConflict: target node variable already bound as non-Node
4631        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
4632        if target_is_bound
4633            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4634            && !info.var_type.is_compatible_with(VariableType::Node)
4635        {
4636            return Err(anyhow!(
4637                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4638                target_variable,
4639                info.var_type
4640            ));
4641        }
4642
4643        // If all requested types are unknown (schemaless), use TraverseMainByType
4644        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
4645        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
4646        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
4647            // All types are unknown - use schemaless traversal
4648
4649            let is_variable_length = params.rel.range.is_some();
4650
4651            const DEFAULT_MAX_HOPS: usize = 100;
4652            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
4653                let min = range.min.unwrap_or(1) as usize;
4654                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
4655                (min, max)
4656            } else {
4657                (1, 1)
4658            };
4659
4660            // For both single-hop and variable-length paths:
4661            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
4662            //   Single-hop: step_var holds a single edge object
4663            //   VLP: step_var holds a list of edge objects
4664            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
4665            let step_var = params.rel.variable.clone();
4666            let path_var = params.path_variable.clone();
4667
4668            // Compute scope_match_variables for relationship uniqueness scoping.
4669            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
4670                .iter()
4671                .map(|v| v.name.clone())
4672                .collect();
4673            if let Some(ref sv) = step_var {
4674                // Only add the step variable to scope if it's NOT rebound from a previous clause.
4675                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
4676                // filtering because the second MATCH intentionally reuses the same edge.
4677                if bound_edge_var.is_none() {
4678                    scope_match_variables.insert(sv.clone());
4679                }
4680            }
4681            scope_match_variables.insert(target_variable.clone());
4682            // Include bound edge variables from this path for cross-segment Trail mode
4683            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
4684            // edges already claimed by a bound relationship [r] in the same path.
4685            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
4686            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
4687            scope_match_variables.extend(
4688                path_bound_edge_vars
4689                    .iter()
4690                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
4691                    .cloned(),
4692            );
4693
4694            let mut plan = LogicalPlan::TraverseMainByType {
4695                type_names: unknown_types,
4696                input: Box::new(plan),
4697                direction: params.rel.direction.clone(),
4698                source_variable: source_variable.to_string(),
4699                target_variable: target_variable.clone(),
4700                step_variable: step_var.clone(),
4701                min_hops,
4702                max_hops,
4703                optional: params.optional,
4704                target_filter: self.node_filter_expr(
4705                    &target_variable,
4706                    &params.target_node.labels,
4707                    &params.target_node.properties,
4708                ),
4709                path_variable: path_var.clone(),
4710                is_variable_length,
4711                optional_pattern_vars: params.optional_pattern_vars.clone(),
4712                scope_match_variables,
4713                edge_filter_expr: if is_variable_length {
4714                    let filter_var = step_var
4715                        .clone()
4716                        .unwrap_or_else(|| "__anon_edge".to_string());
4717                    self.properties_to_expr(&filter_var, &params.rel.properties)
4718                } else {
4719                    None
4720                },
4721                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4722            };
4723
4724            // Only apply bound target filter for Imported variables (from outer scope/subquery).
4725            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
4726            // uses Parameter which requires the value to be in params (subquery context).
4727            if target_is_bound
4728                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4729                && info.var_type == VariableType::Imported
4730            {
4731                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
4732            }
4733
4734            // Apply relationship property predicates for fixed-length schemaless
4735            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
4736            // For VLP, predicates are stored inline in edge_filter_expr (above).
4737            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
4738            if !is_variable_length
4739                && let Some(edge_var_name) = step_var.as_ref()
4740                && let Some(edge_prop_filter) =
4741                    self.properties_to_expr(edge_var_name, &params.rel.properties)
4742            {
4743                let filter_optional_vars = if params.optional {
4744                    params.optional_pattern_vars.clone()
4745                } else {
4746                    HashSet::new()
4747                };
4748                plan = LogicalPlan::Filter {
4749                    input: Box::new(plan),
4750                    predicate: edge_prop_filter,
4751                    optional_variables: filter_optional_vars,
4752                };
4753            }
4754
4755            // Add the bound variables to scope
4756            if let Some(sv) = &step_var {
4757                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
4758                if is_variable_length
4759                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
4760                {
4761                    info.is_vlp = true;
4762                }
4763            }
4764            if let Some(pv) = &path_var
4765                && !is_var_in_scope(vars_in_scope, pv)
4766            {
4767                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4768            }
4769            if !is_var_in_scope(vars_in_scope, &target_variable) {
4770                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4771            }
4772
4773            return Ok((plan, target_variable.clone(), target_variable));
4774        }
4775
4776        // If we have a mix of known and unknown types, error for now
4777        // (could be extended to Union of Traverse + TraverseMainByType)
4778        if !unknown_types.is_empty() {
4779            return Err(anyhow!(
4780                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
4781                unknown_types
4782            ));
4783        }
4784
4785        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
4786            // Use first label for target_label_id
4787            // For schemaless support, allow unknown target labels
4788            self.schema.get_label_case_insensitive(label_name)
4789        } else if !target_is_bound {
4790            // Infer from edge type(s)
4791            let unique_dsts: Vec<_> = dst_labels
4792                .into_iter()
4793                .collect::<HashSet<_>>()
4794                .into_iter()
4795                .collect();
4796            if unique_dsts.len() == 1 {
4797                let label_name = &unique_dsts[0];
4798                self.schema.get_label_case_insensitive(label_name)
4799            } else {
4800                // Multiple or no destination labels inferred - allow any target
4801                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
4802                // where the edge type can connect to multiple labels
4803                None
4804            }
4805        } else {
4806            None
4807        };
4808
4809        // Check if this is a variable-length pattern (has range specifier like *1..3)
4810        let is_variable_length = params.rel.range.is_some();
4811
4812        // For VLP patterns, default min to 1 and max to a reasonable limit.
4813        // For single-hop patterns (no range), both are 1.
4814        const DEFAULT_MAX_HOPS: usize = 100;
4815        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
4816            let min = range.min.unwrap_or(1) as usize;
4817            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
4818            (min, max)
4819        } else {
4820            (1, 1)
4821        };
4822
4823        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
4824        //   Single-hop: step_var holds a single edge object
4825        //   VLP: step_var holds a list of edge objects
4826        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
4827        let step_var = params.rel.variable.clone();
4828        let path_var = params.path_variable.clone();
4829
4830        // If we have a bound edge variable from a previous clause, use a temp variable
4831        // for the Traverse step, then filter to match the bound edge
4832        let rebound_var = bound_edge_var
4833            .as_ref()
4834            .or(bound_edge_list_var.as_ref())
4835            .cloned();
4836        let effective_step_var = if let Some(ref bv) = rebound_var {
4837            Some(format!("__rebound_{}", bv))
4838        } else {
4839            step_var.clone()
4840        };
4841
4842        // If we have a bound target variable from a previous clause (e.g. WITH),
4843        // use a temp variable for the Traverse step, then filter to match the bound
4844        // target — mirroring the bound edge pattern above.
4845        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
4846            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
4847                .map(|info| info.var_type == VariableType::Imported)
4848                .unwrap_or(false);
4849            if !is_imported {
4850                Some(target_variable.clone())
4851            } else {
4852                None
4853            }
4854        } else {
4855            None
4856        };
4857
4858        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4859            format!("__rebound_{}", bv)
4860        } else {
4861            target_variable.clone()
4862        };
4863
4864        // Collect all variables (node + edge) from the current MATCH clause scope
4865        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
4866        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
4867        // if their associated variable is in this set. This prevents relationship
4868        // uniqueness from being enforced across disconnected MATCH clauses.
4869        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
4870            .iter()
4871            .map(|v| v.name.clone())
4872            .collect();
4873        // Include the current traverse's edge variable (not yet added to vars_in_scope)
4874        if let Some(ref sv) = effective_step_var {
4875            scope_match_variables.insert(sv.clone());
4876        }
4877        // Include the target variable (not yet added to vars_in_scope)
4878        scope_match_variables.insert(effective_target_var.clone());
4879        // Include bound edge variables from this path for cross-segment Trail mode
4880        // enforcement (same as the schemaless path above).
4881        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
4882
4883        let mut plan = LogicalPlan::Traverse {
4884            input: Box::new(plan),
4885            edge_type_ids,
4886            direction: params.rel.direction.clone(),
4887            source_variable: source_variable.to_string(),
4888            target_variable: effective_target_var.clone(),
4889            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4890            step_variable: effective_step_var.clone(),
4891            min_hops,
4892            max_hops,
4893            optional: params.optional,
4894            target_filter: self.node_filter_expr(
4895                &target_variable,
4896                &params.target_node.labels,
4897                &params.target_node.properties,
4898            ),
4899            path_variable: path_var.clone(),
4900            edge_properties: HashSet::new(),
4901            is_variable_length,
4902            optional_pattern_vars: params.optional_pattern_vars.clone(),
4903            scope_match_variables,
4904            edge_filter_expr: if is_variable_length {
4905                // Use the step variable name, or a fallback for anonymous edges.
4906                // The variable name is used by properties_to_expr to build
4907                // `var.prop = value` expressions. For BFS property checking,
4908                // only the property name and value matter (the variable name
4909                // is stripped during extraction).
4910                let filter_var = effective_step_var
4911                    .clone()
4912                    .unwrap_or_else(|| "__anon_edge".to_string());
4913                self.properties_to_expr(&filter_var, &params.rel.properties)
4914            } else {
4915                None
4916            },
4917            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4918            qpp_steps: None,
4919        };
4920
4921        // Pre-compute optional variables set for filter nodes in this traverse.
4922        // Used by relationship property filters and bound-edge filters below.
4923        let filter_optional_vars = if params.optional {
4924            params.optional_pattern_vars.clone()
4925        } else {
4926            HashSet::new()
4927        };
4928
4929        // Apply relationship property predicates (e.g. [r {k: v}]).
4930        // For VLP, predicates are stored inline in edge_filter_expr (above).
4931        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
4932        if !is_variable_length
4933            && let Some(edge_var_name) = effective_step_var.as_ref()
4934            && let Some(edge_prop_filter) =
4935                self.properties_to_expr(edge_var_name, &params.rel.properties)
4936        {
4937            plan = LogicalPlan::Filter {
4938                input: Box::new(plan),
4939                predicate: edge_prop_filter,
4940                optional_variables: filter_optional_vars.clone(),
4941            };
4942        }
4943
4944        // Only apply bound target filter for Imported variables (from outer scope/subquery).
4945        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
4946        // uses Parameter which requires the value to be in params (subquery context).
4947        if target_is_bound
4948            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4949            && info.var_type == VariableType::Imported
4950        {
4951            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
4952        }
4953
4954        // If we have a bound edge variable, add a filter to match it
4955        if let Some(ref bv) = bound_edge_var {
4956            let temp_var = format!("__rebound_{}", bv);
4957            let bound_check = Expr::BinaryOp {
4958                left: Box::new(Expr::Property(
4959                    Box::new(Expr::Variable(temp_var)),
4960                    "_eid".to_string(),
4961                )),
4962                op: BinaryOp::Eq,
4963                right: Box::new(Expr::Property(
4964                    Box::new(Expr::Variable(bv.clone())),
4965                    "_eid".to_string(),
4966                )),
4967            };
4968            plan = LogicalPlan::Filter {
4969                input: Box::new(plan),
4970                predicate: bound_check,
4971                optional_variables: filter_optional_vars.clone(),
4972            };
4973        }
4974
4975        // If we have a bound relationship list variable for a VLP pattern,
4976        // add a filter to match the traversed relationship list exactly.
4977        if let Some(ref bv) = bound_edge_list_var {
4978            let temp_var = format!("__rebound_{}", bv);
4979            let temp_eids = Expr::ListComprehension {
4980                variable: "__rebound_edge".to_string(),
4981                list: Box::new(Expr::Variable(temp_var)),
4982                where_clause: None,
4983                map_expr: Box::new(Expr::FunctionCall {
4984                    name: "toInteger".to_string(),
4985                    args: vec![Expr::Property(
4986                        Box::new(Expr::Variable("__rebound_edge".to_string())),
4987                        "_eid".to_string(),
4988                    )],
4989                    distinct: false,
4990                    window_spec: None,
4991                }),
4992            };
4993            let bound_eids = Expr::ListComprehension {
4994                variable: "__bound_edge".to_string(),
4995                list: Box::new(Expr::Variable(bv.clone())),
4996                where_clause: None,
4997                map_expr: Box::new(Expr::FunctionCall {
4998                    name: "toInteger".to_string(),
4999                    args: vec![Expr::Property(
5000                        Box::new(Expr::Variable("__bound_edge".to_string())),
5001                        "_eid".to_string(),
5002                    )],
5003                    distinct: false,
5004                    window_spec: None,
5005                }),
5006            };
5007            let bound_list_check = Expr::BinaryOp {
5008                left: Box::new(temp_eids),
5009                op: BinaryOp::Eq,
5010                right: Box::new(bound_eids),
5011            };
5012            plan = LogicalPlan::Filter {
5013                input: Box::new(plan),
5014                predicate: bound_list_check,
5015                optional_variables: filter_optional_vars.clone(),
5016            };
5017        }
5018
5019        // If we have a bound target variable (non-imported), add a filter to constrain
5020        // the traversal output to match the previously bound target node.
5021        if let Some(ref bv) = rebound_target_var {
5022            let temp_var = format!("__rebound_{}", bv);
5023            let bound_check = Expr::BinaryOp {
5024                left: Box::new(Expr::Property(
5025                    Box::new(Expr::Variable(temp_var.clone())),
5026                    "_vid".to_string(),
5027                )),
5028                op: BinaryOp::Eq,
5029                right: Box::new(Expr::Property(
5030                    Box::new(Expr::Variable(bv.clone())),
5031                    "_vid".to_string(),
5032                )),
5033            };
5034            // For OPTIONAL MATCH, include the rebound variable in optional_variables
5035            // so that OptionalFilterExec excludes it from the grouping key and
5036            // properly nullifies it in recovery rows when all matches are filtered out.
5037            // Without this, each traverse result creates its own group (keyed by
5038            // __rebound_c._vid), and null-row recovery emits a spurious null row
5039            // for every non-matching target instead of one per source group.
5040            let mut rebound_filter_vars = filter_optional_vars;
5041            if params.optional {
5042                rebound_filter_vars.insert(temp_var);
5043            }
5044            plan = LogicalPlan::Filter {
5045                input: Box::new(plan),
5046                predicate: bound_check,
5047                optional_variables: rebound_filter_vars,
5048            };
5049        }
5050
5051        // Add the bound variables to scope
5052        // Skip adding the edge variable if it's already bound from a previous clause
5053        if let Some(sv) = &step_var
5054            && bound_edge_var.is_none()
5055            && bound_edge_list_var.is_none()
5056        {
5057            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5058            if is_variable_length
5059                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5060            {
5061                info.is_vlp = true;
5062            }
5063        }
5064        if let Some(pv) = &path_var
5065            && !is_var_in_scope(vars_in_scope, pv)
5066        {
5067            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5068        }
5069        if !is_var_in_scope(vars_in_scope, &target_variable) {
5070            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5071        }
5072
5073        Ok((plan, target_variable, effective_target_var))
5074    }
5075
5076    /// Combine a new scan plan with an existing plan.
5077    ///
5078    /// If the existing plan is `Empty`, returns the new plan directly.
5079    /// Otherwise, wraps them in a `CrossJoin`.
5080    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5081        if matches!(existing, LogicalPlan::Empty) {
5082            new
5083        } else {
5084            LogicalPlan::CrossJoin {
5085                left: Box::new(existing),
5086                right: Box::new(new),
5087            }
5088        }
5089    }
5090
5091    /// Split node map predicates into scan-pushable and residual filters.
5092    ///
5093    /// A predicate is scan-pushable when its value expression references only
5094    /// the node variable itself (or no variables). Predicates referencing other
5095    /// in-scope variables (correlated predicates) are returned as residual so
5096    /// they can be applied after joining with the existing plan.
5097    fn split_node_property_filters_for_scan(
5098        &self,
5099        variable: &str,
5100        properties: &Option<Expr>,
5101    ) -> (Option<Expr>, Option<Expr>) {
5102        let entries = match properties {
5103            Some(Expr::Map(entries)) => entries,
5104            _ => return (None, None),
5105        };
5106
5107        if entries.is_empty() {
5108            return (None, None);
5109        }
5110
5111        let mut pushdown_entries = Vec::new();
5112        let mut residual_entries = Vec::new();
5113
5114        for (prop, val_expr) in entries {
5115            let vars = collect_expr_variables(val_expr);
5116            if vars.iter().all(|v| v == variable) {
5117                pushdown_entries.push((prop.clone(), val_expr.clone()));
5118            } else {
5119                residual_entries.push((prop.clone(), val_expr.clone()));
5120            }
5121        }
5122
5123        let pushdown_map = if pushdown_entries.is_empty() {
5124            None
5125        } else {
5126            Some(Expr::Map(pushdown_entries))
5127        };
5128        let residual_map = if residual_entries.is_empty() {
5129            None
5130        } else {
5131            Some(Expr::Map(residual_entries))
5132        };
5133
5134        (
5135            self.properties_to_expr(variable, &pushdown_map),
5136            self.properties_to_expr(variable, &residual_map),
5137        )
5138    }
5139
5140    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5141    fn plan_unbound_node(
5142        &self,
5143        node: &NodePattern,
5144        variable: &str,
5145        plan: LogicalPlan,
5146        optional: bool,
5147    ) -> Result<LogicalPlan> {
5148        // Properties handling
5149        let properties = match &node.properties {
5150            Some(Expr::Map(entries)) => entries.as_slice(),
5151            Some(Expr::Parameter(_)) => {
5152                return Err(anyhow!(
5153                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5154                ));
5155            }
5156            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5157            None => &[],
5158        };
5159
5160        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5161
5162        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5163            if let Some(predicate) = residual {
5164                LogicalPlan::Filter {
5165                    input: Box::new(input),
5166                    predicate,
5167                    optional_variables: HashSet::new(),
5168                }
5169            } else {
5170                input
5171            }
5172        };
5173
5174        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5175            self.split_node_property_filters_for_scan(variable, &node.properties)
5176        } else {
5177            (self.properties_to_expr(variable, &node.properties), None)
5178        };
5179
5180        // Check for ext_id in properties when no label is specified
5181        if node.labels.is_empty() {
5182            // Try to find ext_id property for main table lookup
5183            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5184                // Extract the ext_id value as a string
5185                let ext_id = match ext_id_value {
5186                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5187                    _ => {
5188                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5189                    }
5190                };
5191
5192                // Build filter for remaining properties (excluding ext_id)
5193                let remaining_props: Vec<_> = properties
5194                    .iter()
5195                    .filter(|(k, _)| k != "ext_id")
5196                    .cloned()
5197                    .collect();
5198
5199                let remaining_expr = if remaining_props.is_empty() {
5200                    None
5201                } else {
5202                    Some(Expr::Map(remaining_props))
5203                };
5204
5205                let (prop_filter, residual_filter) = if has_existing_scope {
5206                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5207                } else {
5208                    (self.properties_to_expr(variable, &remaining_expr), None)
5209                };
5210
5211                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5212                    variable: variable.to_string(),
5213                    ext_id,
5214                    filter: prop_filter,
5215                    optional,
5216                };
5217
5218                let joined = Self::join_with_plan(plan, ext_id_lookup);
5219                return Ok(apply_residual_filter(joined, residual_filter));
5220            }
5221
5222            // No ext_id: create ScanAll for unlabeled node pattern
5223            let scan_all = LogicalPlan::ScanAll {
5224                variable: variable.to_string(),
5225                filter: node_scan_filter,
5226                optional,
5227            };
5228
5229            let joined = Self::join_with_plan(plan, scan_all);
5230            return Ok(apply_residual_filter(joined, node_residual_filter));
5231        }
5232
5233        // Use first label for label_id (primary label for dataset selection)
5234        let label_name = &node.labels[0];
5235
5236        // Check if label exists in schema
5237        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
5238            // Known label: use standard Scan
5239            let scan = LogicalPlan::Scan {
5240                label_id: label_meta.id,
5241                labels: node.labels.clone(),
5242                variable: variable.to_string(),
5243                filter: node_scan_filter,
5244                optional,
5245            };
5246
5247            let joined = Self::join_with_plan(plan, scan);
5248            Ok(apply_residual_filter(joined, node_residual_filter))
5249        } else {
5250            // Unknown label: use ScanMainByLabels for schemaless support
5251            let scan_main = LogicalPlan::ScanMainByLabels {
5252                labels: node.labels.clone(),
5253                variable: variable.to_string(),
5254                filter: node_scan_filter,
5255                optional,
5256            };
5257
5258            let joined = Self::join_with_plan(plan, scan_main);
5259            Ok(apply_residual_filter(joined, node_residual_filter))
5260        }
5261    }
5262
5263    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
5264    ///
5265    /// When `optional_vars` is non-empty, the Filter will preserve rows where
5266    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
5267    fn plan_where_clause(
5268        &self,
5269        predicate: &Expr,
5270        plan: LogicalPlan,
5271        vars_in_scope: &[VariableInfo],
5272        optional_vars: HashSet<String>,
5273    ) -> Result<LogicalPlan> {
5274        // Validate no aggregation functions in WHERE clause
5275        validate_no_aggregation_in_where(predicate)?;
5276
5277        // Validate all variables used are in scope
5278        validate_expression_variables(predicate, vars_in_scope)?;
5279
5280        // Validate expression types (function args, boolean operators)
5281        validate_expression(predicate, vars_in_scope)?;
5282
5283        // Check that WHERE predicate isn't a bare node/edge/path variable
5284        if let Expr::Variable(var_name) = predicate
5285            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
5286            && matches!(
5287                info.var_type,
5288                VariableType::Node | VariableType::Edge | VariableType::Path
5289            )
5290        {
5291            return Err(anyhow!(
5292                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
5293                info.var_type
5294            ));
5295        }
5296
5297        let mut plan = plan;
5298
5299        // Transform VALID_AT macro to function call
5300        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
5301
5302        let mut current_predicate =
5303            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
5304
5305        // 1. Try to extract vector_similarity predicate for optimization
5306        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
5307            let vs = &extraction.predicate;
5308            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
5309                plan = Self::replace_scan_with_knn(
5310                    plan,
5311                    &vs.variable,
5312                    &vs.property,
5313                    vs.query.clone(),
5314                    vs.threshold,
5315                );
5316                if let Some(residual) = extraction.residual {
5317                    current_predicate = residual;
5318                } else {
5319                    current_predicate = Expr::TRUE;
5320                }
5321            }
5322        }
5323
5324        // 3. Push eligible predicates to Scan OR Traverse filters
5325        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
5326        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
5327        // rows. Let them stay in the Filter operator which handles NULL preservation.
5328        for var in vars_in_scope {
5329            // Skip pushdown for optional variables - they need NULL preservation in Filter
5330            if optional_vars.contains(&var.name) {
5331                continue;
5332            }
5333
5334            // Check if var is produced by a Scan
5335            if Self::find_scan_label_id(&plan, &var.name).is_some() {
5336                let (pushable, residual) =
5337                    Self::extract_variable_predicates(&current_predicate, &var.name);
5338
5339                for pred in pushable {
5340                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
5341                }
5342
5343                if let Some(r) = residual {
5344                    current_predicate = r;
5345                } else {
5346                    current_predicate = Expr::TRUE;
5347                }
5348            } else if Self::is_traverse_target(&plan, &var.name) {
5349                // Push to Traverse
5350                let (pushable, residual) =
5351                    Self::extract_variable_predicates(&current_predicate, &var.name);
5352
5353                for pred in pushable {
5354                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
5355                }
5356
5357                if let Some(r) = residual {
5358                    current_predicate = r;
5359                } else {
5360                    current_predicate = Expr::TRUE;
5361                }
5362            }
5363        }
5364
5365        // 4. Push predicates to Apply.input_filter
5366        // This filters input rows BEFORE executing correlated subqueries.
5367        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
5368
5369        // 5. Add Filter node for any remaining predicates
5370        if !current_predicate.is_true_literal() {
5371            plan = LogicalPlan::Filter {
5372                input: Box::new(plan),
5373                predicate: current_predicate,
5374                optional_variables: optional_vars,
5375            };
5376        }
5377
5378        Ok(plan)
5379    }
5380
5381    fn rewrite_predicates_using_indexes(
5382        &self,
5383        predicate: &Expr,
5384        plan: &LogicalPlan,
5385        vars_in_scope: &[VariableInfo],
5386    ) -> Result<Expr> {
5387        let mut rewritten = predicate.clone();
5388
5389        for var in vars_in_scope {
5390            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
5391                // Find label name
5392                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
5393
5394                if let Some(label) = label_name
5395                    && let Some(props) = self.schema.properties.get(&label)
5396                {
5397                    for (gen_col, meta) in props {
5398                        if meta.generation_expression.is_some() {
5399                            // Use cached parsed expression
5400                            if let Some(schema_expr) =
5401                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
5402                            {
5403                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
5404                                rewritten = Self::replace_expression(
5405                                    rewritten,
5406                                    schema_expr,
5407                                    &var.name,
5408                                    gen_col,
5409                                );
5410                            }
5411                        }
5412                    }
5413                }
5414            }
5415        }
5416        Ok(rewritten)
5417    }
5418
5419    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
5420        // First, normalize schema_expr to use query_var
5421        let schema_var = schema_expr.extract_variable();
5422
5423        if let Some(s_var) = schema_var {
5424            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
5425
5426            if expr == target_expr {
5427                return Expr::Property(
5428                    Box::new(Expr::Variable(query_var.to_string())),
5429                    gen_col.to_string(),
5430                );
5431            }
5432        }
5433
5434        // Recurse
5435        match expr {
5436            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
5437                left: Box::new(Self::replace_expression(
5438                    *left,
5439                    schema_expr,
5440                    query_var,
5441                    gen_col,
5442                )),
5443                op,
5444                right: Box::new(Self::replace_expression(
5445                    *right,
5446                    schema_expr,
5447                    query_var,
5448                    gen_col,
5449                )),
5450            },
5451            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
5452                op,
5453                expr: Box::new(Self::replace_expression(
5454                    *expr,
5455                    schema_expr,
5456                    query_var,
5457                    gen_col,
5458                )),
5459            },
5460            Expr::FunctionCall {
5461                name,
5462                args,
5463                distinct,
5464                window_spec,
5465            } => Expr::FunctionCall {
5466                name,
5467                args: args
5468                    .into_iter()
5469                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
5470                    .collect(),
5471                distinct,
5472                window_spec,
5473            },
5474            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
5475                *expr,
5476                schema_expr,
5477                query_var,
5478                gen_col,
5479            ))),
5480            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
5481                *expr,
5482                schema_expr,
5483                query_var,
5484                gen_col,
5485            ))),
5486            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
5487                *expr,
5488                schema_expr,
5489                query_var,
5490                gen_col,
5491            ))),
5492            Expr::ArrayIndex {
5493                array: e,
5494                index: idx,
5495            } => Expr::ArrayIndex {
5496                array: Box::new(Self::replace_expression(
5497                    *e,
5498                    schema_expr,
5499                    query_var,
5500                    gen_col,
5501                )),
5502                index: Box::new(Self::replace_expression(
5503                    *idx,
5504                    schema_expr,
5505                    query_var,
5506                    gen_col,
5507                )),
5508            },
5509            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
5510                array: Box::new(Self::replace_expression(
5511                    *array,
5512                    schema_expr,
5513                    query_var,
5514                    gen_col,
5515                )),
5516                start: start.map(|s| {
5517                    Box::new(Self::replace_expression(
5518                        *s,
5519                        schema_expr,
5520                        query_var,
5521                        gen_col,
5522                    ))
5523                }),
5524                end: end.map(|e| {
5525                    Box::new(Self::replace_expression(
5526                        *e,
5527                        schema_expr,
5528                        query_var,
5529                        gen_col,
5530                    ))
5531                }),
5532            },
5533            Expr::List(exprs) => Expr::List(
5534                exprs
5535                    .into_iter()
5536                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
5537                    .collect(),
5538            ),
5539            Expr::Map(entries) => Expr::Map(
5540                entries
5541                    .into_iter()
5542                    .map(|(k, v)| {
5543                        (
5544                            k,
5545                            Self::replace_expression(v, schema_expr, query_var, gen_col),
5546                        )
5547                    })
5548                    .collect(),
5549            ),
5550            Expr::Property(e, prop) => Expr::Property(
5551                Box::new(Self::replace_expression(
5552                    *e,
5553                    schema_expr,
5554                    query_var,
5555                    gen_col,
5556                )),
5557                prop,
5558            ),
5559            Expr::Case {
5560                expr: case_expr,
5561                when_then,
5562                else_expr,
5563            } => Expr::Case {
5564                expr: case_expr.map(|e| {
5565                    Box::new(Self::replace_expression(
5566                        *e,
5567                        schema_expr,
5568                        query_var,
5569                        gen_col,
5570                    ))
5571                }),
5572                when_then: when_then
5573                    .into_iter()
5574                    .map(|(w, t)| {
5575                        (
5576                            Self::replace_expression(w, schema_expr, query_var, gen_col),
5577                            Self::replace_expression(t, schema_expr, query_var, gen_col),
5578                        )
5579                    })
5580                    .collect(),
5581                else_expr: else_expr.map(|e| {
5582                    Box::new(Self::replace_expression(
5583                        *e,
5584                        schema_expr,
5585                        query_var,
5586                        gen_col,
5587                    ))
5588                }),
5589            },
5590            Expr::Reduce {
5591                accumulator,
5592                init,
5593                variable: reduce_var,
5594                list,
5595                expr: reduce_expr,
5596            } => Expr::Reduce {
5597                accumulator,
5598                init: Box::new(Self::replace_expression(
5599                    *init,
5600                    schema_expr,
5601                    query_var,
5602                    gen_col,
5603                )),
5604                variable: reduce_var,
5605                list: Box::new(Self::replace_expression(
5606                    *list,
5607                    schema_expr,
5608                    query_var,
5609                    gen_col,
5610                )),
5611                expr: Box::new(Self::replace_expression(
5612                    *reduce_expr,
5613                    schema_expr,
5614                    query_var,
5615                    gen_col,
5616                )),
5617            },
5618
5619            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
5620            _ => expr,
5621        }
5622    }
5623
5624    /// Check if the variable is the target of a Traverse node
5625    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
5626        match plan {
5627            LogicalPlan::Traverse {
5628                target_variable,
5629                input,
5630                ..
5631            } => target_variable == variable || Self::is_traverse_target(input, variable),
5632            LogicalPlan::Filter { input, .. }
5633            | LogicalPlan::Project { input, .. }
5634            | LogicalPlan::Sort { input, .. }
5635            | LogicalPlan::Limit { input, .. }
5636            | LogicalPlan::Aggregate { input, .. }
5637            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
5638            LogicalPlan::CrossJoin { left, right } => {
5639                Self::is_traverse_target(left, variable)
5640                    || Self::is_traverse_target(right, variable)
5641            }
5642            _ => false,
5643        }
5644    }
5645
5646    /// Push a predicate into a Traverse's target_filter for the specified variable
5647    fn push_predicate_to_traverse(
5648        plan: LogicalPlan,
5649        variable: &str,
5650        predicate: Expr,
5651    ) -> LogicalPlan {
5652        match plan {
5653            LogicalPlan::Traverse {
5654                input,
5655                edge_type_ids,
5656                direction,
5657                source_variable,
5658                target_variable,
5659                target_label_id,
5660                step_variable,
5661                min_hops,
5662                max_hops,
5663                optional,
5664                target_filter,
5665                path_variable,
5666                edge_properties,
5667                is_variable_length,
5668                optional_pattern_vars,
5669                scope_match_variables,
5670                edge_filter_expr,
5671                path_mode,
5672                qpp_steps,
5673            } => {
5674                if target_variable == variable {
5675                    // Found the traverse producing this variable
5676                    let new_filter = match target_filter {
5677                        Some(existing) => Some(Expr::BinaryOp {
5678                            left: Box::new(existing),
5679                            op: BinaryOp::And,
5680                            right: Box::new(predicate),
5681                        }),
5682                        None => Some(predicate),
5683                    };
5684                    LogicalPlan::Traverse {
5685                        input,
5686                        edge_type_ids,
5687                        direction,
5688                        source_variable,
5689                        target_variable,
5690                        target_label_id,
5691                        step_variable,
5692                        min_hops,
5693                        max_hops,
5694                        optional,
5695                        target_filter: new_filter,
5696                        path_variable,
5697                        edge_properties,
5698                        is_variable_length,
5699                        optional_pattern_vars,
5700                        scope_match_variables,
5701                        edge_filter_expr,
5702                        path_mode,
5703                        qpp_steps,
5704                    }
5705                } else {
5706                    // Recurse into input
5707                    LogicalPlan::Traverse {
5708                        input: Box::new(Self::push_predicate_to_traverse(
5709                            *input, variable, predicate,
5710                        )),
5711                        edge_type_ids,
5712                        direction,
5713                        source_variable,
5714                        target_variable,
5715                        target_label_id,
5716                        step_variable,
5717                        min_hops,
5718                        max_hops,
5719                        optional,
5720                        target_filter,
5721                        path_variable,
5722                        edge_properties,
5723                        is_variable_length,
5724                        optional_pattern_vars,
5725                        scope_match_variables,
5726                        edge_filter_expr,
5727                        path_mode,
5728                        qpp_steps,
5729                    }
5730                }
5731            }
5732            LogicalPlan::Filter {
5733                input,
5734                predicate: p,
5735                optional_variables: opt_vars,
5736            } => LogicalPlan::Filter {
5737                input: Box::new(Self::push_predicate_to_traverse(
5738                    *input, variable, predicate,
5739                )),
5740                predicate: p,
5741                optional_variables: opt_vars,
5742            },
5743            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
5744                input: Box::new(Self::push_predicate_to_traverse(
5745                    *input, variable, predicate,
5746                )),
5747                projections,
5748            },
5749            LogicalPlan::CrossJoin { left, right } => {
5750                // Check which side has the variable
5751                if Self::is_traverse_target(&left, variable) {
5752                    LogicalPlan::CrossJoin {
5753                        left: Box::new(Self::push_predicate_to_traverse(
5754                            *left, variable, predicate,
5755                        )),
5756                        right,
5757                    }
5758                } else {
5759                    LogicalPlan::CrossJoin {
5760                        left,
5761                        right: Box::new(Self::push_predicate_to_traverse(
5762                            *right, variable, predicate,
5763                        )),
5764                    }
5765                }
5766            }
5767            other => other,
5768        }
5769    }
5770
5771    /// Plan a WITH clause, handling aggregations and projections.
5772    fn plan_with_clause(
5773        &self,
5774        with_clause: &WithClause,
5775        plan: LogicalPlan,
5776        vars_in_scope: &[VariableInfo],
5777    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
5778        let mut plan = plan;
5779        let mut group_by: Vec<Expr> = Vec::new();
5780        let mut aggregates: Vec<Expr> = Vec::new();
5781        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
5782        let mut has_agg = false;
5783        let mut projections = Vec::new();
5784        let mut new_vars: Vec<VariableInfo> = Vec::new();
5785        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
5786        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
5787        let mut projected_aliases: HashSet<String> = HashSet::new();
5788        let mut has_unaliased_non_variable_expr = false;
5789
5790        for item in &with_clause.items {
5791            match item {
5792                ReturnItem::All => {
5793                    // WITH * - add all variables in scope
5794                    for v in vars_in_scope {
5795                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
5796                        projected_aliases.insert(v.name.clone());
5797                        projected_simple_reprs.insert(v.name.clone());
5798                    }
5799                    new_vars.extend(vars_in_scope.iter().cloned());
5800                }
5801                ReturnItem::Expr { expr, alias, .. } => {
5802                    if matches!(expr, Expr::Wildcard) {
5803                        for v in vars_in_scope {
5804                            projections
5805                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
5806                            projected_aliases.insert(v.name.clone());
5807                            projected_simple_reprs.insert(v.name.clone());
5808                        }
5809                        new_vars.extend(vars_in_scope.iter().cloned());
5810                    } else {
5811                        // Validate expression variables and syntax
5812                        validate_expression_variables(expr, vars_in_scope)?;
5813                        validate_expression(expr, vars_in_scope)?;
5814                        // Pattern predicates are not allowed in WITH
5815                        if contains_pattern_predicate(expr) {
5816                            return Err(anyhow!(
5817                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
5818                            ));
5819                        }
5820
5821                        projections.push((expr.clone(), alias.clone()));
5822                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
5823                            // Bare aggregate — push directly
5824                            has_agg = true;
5825                            aggregates.push(expr.clone());
5826                            projected_aggregate_reprs.insert(expr.to_string_repr());
5827                        } else if !is_window_function(expr)
5828                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
5829                        {
5830                            // Compound aggregate or expression containing aggregates
5831                            has_agg = true;
5832                            compound_agg_exprs.push(expr.clone());
5833                            for inner in extract_inner_aggregates(expr) {
5834                                let repr = inner.to_string_repr();
5835                                if !projected_aggregate_reprs.contains(&repr) {
5836                                    aggregates.push(inner);
5837                                    projected_aggregate_reprs.insert(repr);
5838                                }
5839                            }
5840                        } else if !group_by.contains(expr) {
5841                            group_by.push(expr.clone());
5842                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
5843                                projected_simple_reprs.insert(expr.to_string_repr());
5844                            }
5845                        }
5846
5847                        // Preserve non-scalar type information when WITH aliases
5848                        // entity/path-capable expressions.
5849                        if let Some(a) = alias {
5850                            if projected_aliases.contains(a) {
5851                                return Err(anyhow!(
5852                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
5853                                    a
5854                                ));
5855                            }
5856                            let inferred = infer_with_output_type(expr, vars_in_scope);
5857                            new_vars.push(VariableInfo::new(a.clone(), inferred));
5858                            projected_aliases.insert(a.clone());
5859                        } else if let Expr::Variable(v) = expr {
5860                            if projected_aliases.contains(v) {
5861                                return Err(anyhow!(
5862                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
5863                                    v
5864                                ));
5865                            }
5866                            // Preserve the original type if the variable is just passed through
5867                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
5868                                new_vars.push(existing.clone());
5869                            } else {
5870                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
5871                            }
5872                            projected_aliases.insert(v.clone());
5873                        } else {
5874                            has_unaliased_non_variable_expr = true;
5875                        }
5876                    }
5877                }
5878            }
5879        }
5880
5881        // Collect extra variables that need to survive the projection stage
5882        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
5883        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
5884        let mut passthrough_extras: Vec<String> = Vec::new();
5885        let mut seen_passthrough: HashSet<String> = HashSet::new();
5886
5887        if let Some(predicate) = &with_clause.where_clause {
5888            for name in collect_expr_variables(predicate) {
5889                if !projected_names.contains(name.as_str())
5890                    && find_var_in_scope(vars_in_scope, &name).is_some()
5891                    && seen_passthrough.insert(name.clone())
5892                {
5893                    passthrough_extras.push(name);
5894                }
5895            }
5896        }
5897
5898        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
5899        // Carry those variables through the projection so Sort can resolve them.
5900        if !has_agg && let Some(order_by) = &with_clause.order_by {
5901            for item in order_by {
5902                for name in collect_expr_variables(&item.expr) {
5903                    if !projected_names.contains(name.as_str())
5904                        && find_var_in_scope(vars_in_scope, &name).is_some()
5905                        && seen_passthrough.insert(name.clone())
5906                    {
5907                        passthrough_extras.push(name);
5908                    }
5909                }
5910            }
5911        }
5912
5913        let needs_cleanup = !passthrough_extras.is_empty();
5914        for extra in &passthrough_extras {
5915            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
5916        }
5917
5918        // Validate compound aggregate expressions: non-aggregate refs must be
5919        // individually present in the group_by as simple variables or properties.
5920        if has_agg {
5921            let group_by_reprs: HashSet<String> =
5922                group_by.iter().map(|e| e.to_string_repr()).collect();
5923            for expr in &compound_agg_exprs {
5924                let mut refs = Vec::new();
5925                collect_non_aggregate_refs(expr, false, &mut refs);
5926                for r in &refs {
5927                    let is_covered = match r {
5928                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
5929                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
5930                    };
5931                    if !is_covered {
5932                        return Err(anyhow!(
5933                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
5934                        ));
5935                    }
5936                }
5937            }
5938        }
5939
5940        if has_agg {
5941            plan = LogicalPlan::Aggregate {
5942                input: Box::new(plan),
5943                group_by,
5944                aggregates,
5945            };
5946
5947            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
5948            // can reference the WITH aliases instead of raw column names.
5949            let rename_projections: Vec<(Expr, Option<String>)> = projections
5950                .iter()
5951                .map(|(expr, alias)| {
5952                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
5953                        // Bare aggregate — reference by column name
5954                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
5955                    } else if is_compound_aggregate(expr)
5956                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
5957                    {
5958                        // Compound aggregate — replace inner aggregates with
5959                        // column references, keep outer expression
5960                        (replace_aggregates_with_columns(expr), alias.clone())
5961                    } else {
5962                        (Expr::Variable(expr.to_string_repr()), alias.clone())
5963                    }
5964                })
5965                .collect();
5966            plan = LogicalPlan::Project {
5967                input: Box::new(plan),
5968                projections: rename_projections,
5969            };
5970        } else if !projections.is_empty() {
5971            plan = LogicalPlan::Project {
5972                input: Box::new(plan),
5973                projections: projections.clone(),
5974            };
5975        }
5976
5977        // Apply the WHERE filter (post-projection, with extras still visible).
5978        if let Some(predicate) = &with_clause.where_clause {
5979            plan = LogicalPlan::Filter {
5980                input: Box::new(plan),
5981                predicate: predicate.clone(),
5982                optional_variables: HashSet::new(),
5983            };
5984        }
5985
5986        // Validate and apply ORDER BY for WITH clause.
5987        // Keep pre-WITH vars in scope for parser compatibility, then apply
5988        // stricter checks for aggregate-containing ORDER BY items.
5989        if let Some(order_by) = &with_clause.order_by {
5990            // Build a mapping from aliases and projected expression reprs to
5991            // output columns of the preceding Project/Aggregate pipeline.
5992            let with_order_aliases: HashMap<String, Expr> = projections
5993                .iter()
5994                .flat_map(|(expr, alias)| {
5995                    let output_col = if let Some(a) = alias {
5996                        a.clone()
5997                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
5998                        aggregate_column_name(expr)
5999                    } else {
6000                        expr.to_string_repr()
6001                    };
6002
6003                    let mut entries = Vec::new();
6004                    // ORDER BY alias
6005                    if let Some(a) = alias {
6006                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
6007                    }
6008                    // ORDER BY projected expression (e.g. me.age)
6009                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
6010                    entries
6011                })
6012                .collect();
6013
6014            let order_by_scope: Vec<VariableInfo> = {
6015                let mut scope = new_vars.clone();
6016                for v in vars_in_scope {
6017                    if !is_var_in_scope(&scope, &v.name) {
6018                        scope.push(v.clone());
6019                    }
6020                }
6021                scope
6022            };
6023            for item in order_by {
6024                validate_expression_variables(&item.expr, &order_by_scope)?;
6025                validate_expression(&item.expr, &order_by_scope)?;
6026                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
6027                if has_aggregate_in_item && !has_agg {
6028                    return Err(anyhow!(
6029                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
6030                    ));
6031                }
6032                if has_agg && has_aggregate_in_item {
6033                    validate_with_order_by_aggregate_item(
6034                        &item.expr,
6035                        &projected_aggregate_reprs,
6036                        &projected_simple_reprs,
6037                        &projected_aliases,
6038                    )?;
6039                }
6040            }
6041            let rewritten_order_by: Vec<SortItem> = order_by
6042                .iter()
6043                .map(|item| {
6044                    let mut expr =
6045                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
6046                    if has_agg {
6047                        // Rewrite any aggregate calls to the aggregate output
6048                        // columns produced by Aggregate.
6049                        expr = replace_aggregates_with_columns(&expr);
6050                        // Then re-map projected property expressions to aliases
6051                        // from the WITH projection.
6052                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
6053                    }
6054                    SortItem {
6055                        expr,
6056                        ascending: item.ascending,
6057                    }
6058                })
6059                .collect();
6060            plan = LogicalPlan::Sort {
6061                input: Box::new(plan),
6062                order_by: rewritten_order_by,
6063            };
6064        }
6065
6066        // Non-variable expressions in WITH must be aliased.
6067        // This check is intentionally placed after ORDER BY validation so
6068        // higher-priority semantic errors (e.g., ambiguous aggregation in
6069        // ORDER BY) can surface first.
6070        if has_unaliased_non_variable_expr {
6071            return Err(anyhow!(
6072                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
6073            ));
6074        }
6075
6076        // Validate and apply SKIP/LIMIT for WITH clause
6077        let skip = with_clause
6078            .skip
6079            .as_ref()
6080            .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
6081            .transpose()?
6082            .flatten();
6083        let fetch = with_clause
6084            .limit
6085            .as_ref()
6086            .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
6087            .transpose()?
6088            .flatten();
6089
6090        if skip.is_some() || fetch.is_some() {
6091            plan = LogicalPlan::Limit {
6092                input: Box::new(plan),
6093                skip,
6094                fetch,
6095            };
6096        }
6097
6098        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
6099        if needs_cleanup {
6100            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
6101                .iter()
6102                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
6103                .collect();
6104            plan = LogicalPlan::Project {
6105                input: Box::new(plan),
6106                projections: cleanup_projections,
6107            };
6108        }
6109
6110        if with_clause.distinct {
6111            plan = LogicalPlan::Distinct {
6112                input: Box::new(plan),
6113            };
6114        }
6115
6116        Ok((plan, new_vars))
6117    }
6118
6119    fn plan_with_recursive(
6120        &self,
6121        with_recursive: &WithRecursiveClause,
6122        _prev_plan: LogicalPlan,
6123        vars_in_scope: &[VariableInfo],
6124    ) -> Result<LogicalPlan> {
6125        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
6126        match &*with_recursive.query {
6127            Query::Union { left, right, .. } => {
6128                // Plan the anchor (initial) query with current scope
6129                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
6130
6131                // Plan the recursive query with the CTE name added to scope
6132                // so it can reference itself
6133                let mut recursive_scope = vars_in_scope.to_vec();
6134                recursive_scope.push(VariableInfo::new(
6135                    with_recursive.name.clone(),
6136                    VariableType::Scalar,
6137                ));
6138                let recursive_plan =
6139                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
6140
6141                Ok(LogicalPlan::RecursiveCTE {
6142                    cte_name: with_recursive.name.clone(),
6143                    initial: Box::new(initial_plan),
6144                    recursive: Box::new(recursive_plan),
6145                })
6146            }
6147            _ => Err(anyhow::anyhow!(
6148                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
6149            )),
6150        }
6151    }
6152
6153    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
6154        let entries = match properties {
6155            Some(Expr::Map(entries)) => entries,
6156            _ => return None,
6157        };
6158
6159        if entries.is_empty() {
6160            return None;
6161        }
6162        let mut final_expr = None;
6163        for (prop, val_expr) in entries {
6164            let eq_expr = Expr::BinaryOp {
6165                left: Box::new(Expr::Property(
6166                    Box::new(Expr::Variable(variable.to_string())),
6167                    prop.clone(),
6168                )),
6169                op: BinaryOp::Eq,
6170                right: Box::new(val_expr.clone()),
6171            };
6172
6173            if let Some(e) = final_expr {
6174                final_expr = Some(Expr::BinaryOp {
6175                    left: Box::new(e),
6176                    op: BinaryOp::And,
6177                    right: Box::new(eq_expr),
6178                });
6179            } else {
6180                final_expr = Some(eq_expr);
6181            }
6182        }
6183        final_expr
6184    }
6185
6186    /// Build a filter expression from node properties and labels.
6187    ///
6188    /// This is used for TraverseMainByType where we need to filter target nodes
6189    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
6190    pub fn node_filter_expr(
6191        &self,
6192        variable: &str,
6193        labels: &[String],
6194        properties: &Option<Expr>,
6195    ) -> Option<Expr> {
6196        let mut final_expr = None;
6197
6198        // Add label checks using hasLabel(variable, 'label')
6199        for label in labels {
6200            let label_check = Expr::FunctionCall {
6201                name: "hasLabel".to_string(),
6202                args: vec![
6203                    Expr::Variable(variable.to_string()),
6204                    Expr::Literal(CypherLiteral::String(label.clone())),
6205                ],
6206                distinct: false,
6207                window_spec: None,
6208            };
6209
6210            final_expr = match final_expr {
6211                Some(e) => Some(Expr::BinaryOp {
6212                    left: Box::new(e),
6213                    op: BinaryOp::And,
6214                    right: Box::new(label_check),
6215                }),
6216                None => Some(label_check),
6217            };
6218        }
6219
6220        // Add property checks
6221        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
6222            final_expr = match final_expr {
6223                Some(e) => Some(Expr::BinaryOp {
6224                    left: Box::new(e),
6225                    op: BinaryOp::And,
6226                    right: Box::new(prop_expr),
6227                }),
6228                None => Some(prop_expr),
6229            };
6230        }
6231
6232        final_expr
6233    }
6234
6235    /// Create a filter plan that ensures traversed target matches a bound variable.
6236    ///
6237    /// Used in EXISTS subquery patterns where the target is already bound.
6238    /// Compares the target's VID against the bound variable's VID.
6239    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
6240        // Compare the traverse-discovered target's VID against the bound variable's VID.
6241        // Left side: Property access on the variable from current scope.
6242        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
6243        // We use Variable("{var}._vid") to access the VID column from the traverse output,
6244        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
6245        let bound_check = Expr::BinaryOp {
6246            left: Box::new(Expr::Property(
6247                Box::new(Expr::Variable(target_variable.to_string())),
6248                "_vid".to_string(),
6249            )),
6250            op: BinaryOp::Eq,
6251            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
6252        };
6253        LogicalPlan::Filter {
6254            input: Box::new(plan),
6255            predicate: bound_check,
6256            optional_variables: HashSet::new(),
6257        }
6258    }
6259
6260    /// Replace a Scan node matching the variable with a VectorKnn node
6261    fn replace_scan_with_knn(
6262        plan: LogicalPlan,
6263        variable: &str,
6264        property: &str,
6265        query: Expr,
6266        threshold: Option<f32>,
6267    ) -> LogicalPlan {
6268        match plan {
6269            LogicalPlan::Scan {
6270                label_id,
6271                labels,
6272                variable: scan_var,
6273                filter,
6274                optional,
6275            } => {
6276                if scan_var == variable {
6277                    // Inject any existing scan filter into VectorKnn?
6278                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
6279                    // Typically filter is applied post-Knn or during Knn if supported.
6280                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
6281                    // If `filter` is present on Scan, it must be preserved.
6282                    // We can wrap VectorKnn in Filter if Scan had filter.
6283
6284                    let knn = LogicalPlan::VectorKnn {
6285                        label_id,
6286                        variable: variable.to_string(),
6287                        property: property.to_string(),
6288                        query,
6289                        k: 100, // Default K, should push down LIMIT
6290                        threshold,
6291                    };
6292
6293                    if let Some(f) = filter {
6294                        LogicalPlan::Filter {
6295                            input: Box::new(knn),
6296                            predicate: f,
6297                            optional_variables: HashSet::new(),
6298                        }
6299                    } else {
6300                        knn
6301                    }
6302                } else {
6303                    LogicalPlan::Scan {
6304                        label_id,
6305                        labels,
6306                        variable: scan_var,
6307                        filter,
6308                        optional,
6309                    }
6310                }
6311            }
6312            LogicalPlan::Filter {
6313                input,
6314                predicate,
6315                optional_variables,
6316            } => LogicalPlan::Filter {
6317                input: Box::new(Self::replace_scan_with_knn(
6318                    *input, variable, property, query, threshold,
6319                )),
6320                predicate,
6321                optional_variables,
6322            },
6323            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6324                input: Box::new(Self::replace_scan_with_knn(
6325                    *input, variable, property, query, threshold,
6326                )),
6327                projections,
6328            },
6329            LogicalPlan::Limit { input, skip, fetch } => {
6330                // If we encounter Limit, we should ideally push K down to VectorKnn
6331                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
6332                // Limit comes later.
6333                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
6334                LogicalPlan::Limit {
6335                    input: Box::new(Self::replace_scan_with_knn(
6336                        *input, variable, property, query, threshold,
6337                    )),
6338                    skip,
6339                    fetch,
6340                }
6341            }
6342            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6343                left: Box::new(Self::replace_scan_with_knn(
6344                    *left,
6345                    variable,
6346                    property,
6347                    query.clone(),
6348                    threshold,
6349                )),
6350                right: Box::new(Self::replace_scan_with_knn(
6351                    *right, variable, property, query, threshold,
6352                )),
6353            },
6354            other => other,
6355        }
6356    }
6357
6358    /// Find the label_id for a Scan node matching the given variable
6359    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
6360        match plan {
6361            LogicalPlan::Scan {
6362                label_id,
6363                variable: var,
6364                ..
6365            } if var == variable => Some(*label_id),
6366            LogicalPlan::Filter { input, .. }
6367            | LogicalPlan::Project { input, .. }
6368            | LogicalPlan::Sort { input, .. }
6369            | LogicalPlan::Limit { input, .. }
6370            | LogicalPlan::Aggregate { input, .. }
6371            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
6372            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
6373                .or_else(|| Self::find_scan_label_id(right, variable)),
6374            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
6375            _ => None,
6376        }
6377    }
6378
6379    /// Push a predicate into a Scan's filter for the specified variable
6380    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
6381        match plan {
6382            LogicalPlan::Scan {
6383                label_id,
6384                labels,
6385                variable: var,
6386                filter,
6387                optional,
6388            } if var == variable => {
6389                // Merge the predicate with existing filter
6390                let new_filter = match filter {
6391                    Some(existing) => Some(Expr::BinaryOp {
6392                        left: Box::new(existing),
6393                        op: BinaryOp::And,
6394                        right: Box::new(predicate),
6395                    }),
6396                    None => Some(predicate),
6397                };
6398                LogicalPlan::Scan {
6399                    label_id,
6400                    labels,
6401                    variable: var,
6402                    filter: new_filter,
6403                    optional,
6404                }
6405            }
6406            LogicalPlan::Filter {
6407                input,
6408                predicate: p,
6409                optional_variables: opt_vars,
6410            } => LogicalPlan::Filter {
6411                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6412                predicate: p,
6413                optional_variables: opt_vars,
6414            },
6415            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6416                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6417                projections,
6418            },
6419            LogicalPlan::CrossJoin { left, right } => {
6420                // Check which side has the variable
6421                if Self::find_scan_label_id(&left, variable).is_some() {
6422                    LogicalPlan::CrossJoin {
6423                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
6424                        right,
6425                    }
6426                } else {
6427                    LogicalPlan::CrossJoin {
6428                        left,
6429                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
6430                    }
6431                }
6432            }
6433            LogicalPlan::Traverse {
6434                input,
6435                edge_type_ids,
6436                direction,
6437                source_variable,
6438                target_variable,
6439                target_label_id,
6440                step_variable,
6441                min_hops,
6442                max_hops,
6443                optional,
6444                target_filter,
6445                path_variable,
6446                edge_properties,
6447                is_variable_length,
6448                optional_pattern_vars,
6449                scope_match_variables,
6450                edge_filter_expr,
6451                path_mode,
6452                qpp_steps,
6453            } => LogicalPlan::Traverse {
6454                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6455                edge_type_ids,
6456                direction,
6457                source_variable,
6458                target_variable,
6459                target_label_id,
6460                step_variable,
6461                min_hops,
6462                max_hops,
6463                optional,
6464                target_filter,
6465                path_variable,
6466                edge_properties,
6467                is_variable_length,
6468                optional_pattern_vars,
6469                scope_match_variables,
6470                edge_filter_expr,
6471                path_mode,
6472                qpp_steps,
6473            },
6474            other => other,
6475        }
6476    }
6477
6478    /// Extract predicates that reference only the specified variable
6479    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
6480        let analyzer = PredicateAnalyzer::new();
6481        let analysis = analyzer.analyze(predicate, variable);
6482
6483        // Return pushable predicates and combined residual
6484        let residual = if analysis.residual.is_empty() {
6485            None
6486        } else {
6487            let mut iter = analysis.residual.into_iter();
6488            let first = iter.next().unwrap();
6489            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
6490                left: Box::new(acc),
6491                op: BinaryOp::And,
6492                right: Box::new(e),
6493            }))
6494        };
6495
6496        (analysis.pushable, residual)
6497    }
6498
6499    // =====================================================================
6500    // Apply Predicate Pushdown - Helper Functions
6501    // =====================================================================
6502
6503    /// Split AND-connected predicates into a list.
6504    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
6505        match expr {
6506            Expr::BinaryOp {
6507                left,
6508                op: BinaryOp::And,
6509                right,
6510            } => {
6511                let mut result = Self::split_and_conjuncts(left);
6512                result.extend(Self::split_and_conjuncts(right));
6513                result
6514            }
6515            _ => vec![expr.clone()],
6516        }
6517    }
6518
6519    /// Combine predicates with AND.
6520    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
6521        if predicates.is_empty() {
6522            return None;
6523        }
6524        let mut result = predicates[0].clone();
6525        for pred in predicates.iter().skip(1) {
6526            result = Expr::BinaryOp {
6527                left: Box::new(result),
6528                op: BinaryOp::And,
6529                right: Box::new(pred.clone()),
6530            };
6531        }
6532        Some(result)
6533    }
6534
6535    /// Collect all variable names referenced in an expression.
6536    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
6537        let mut vars = HashSet::new();
6538        Self::collect_expr_variables_impl(expr, &mut vars);
6539        vars
6540    }
6541
6542    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
6543        match expr {
6544            Expr::Variable(name) => {
6545                vars.insert(name.clone());
6546            }
6547            Expr::Property(inner, _) => {
6548                if let Expr::Variable(name) = inner.as_ref() {
6549                    vars.insert(name.clone());
6550                } else {
6551                    Self::collect_expr_variables_impl(inner, vars);
6552                }
6553            }
6554            Expr::BinaryOp { left, right, .. } => {
6555                Self::collect_expr_variables_impl(left, vars);
6556                Self::collect_expr_variables_impl(right, vars);
6557            }
6558            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
6559            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
6560            Expr::FunctionCall { args, .. } => {
6561                for arg in args {
6562                    Self::collect_expr_variables_impl(arg, vars);
6563                }
6564            }
6565            Expr::List(items) => {
6566                for item in items {
6567                    Self::collect_expr_variables_impl(item, vars);
6568                }
6569            }
6570            Expr::Case {
6571                expr,
6572                when_then,
6573                else_expr,
6574            } => {
6575                if let Some(e) = expr {
6576                    Self::collect_expr_variables_impl(e, vars);
6577                }
6578                for (w, t) in when_then {
6579                    Self::collect_expr_variables_impl(w, vars);
6580                    Self::collect_expr_variables_impl(t, vars);
6581                }
6582                if let Some(e) = else_expr {
6583                    Self::collect_expr_variables_impl(e, vars);
6584                }
6585            }
6586            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
6587            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
6588            // they introduce local variable bindings not in outer scope.
6589            _ => {}
6590        }
6591    }
6592
6593    /// Collect all variables produced by a logical plan.
6594    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
6595        let mut vars = HashSet::new();
6596        Self::collect_plan_variables_impl(plan, &mut vars);
6597        vars
6598    }
6599
6600    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
6601        match plan {
6602            LogicalPlan::Scan { variable, .. } => {
6603                vars.insert(variable.clone());
6604            }
6605            LogicalPlan::Traverse {
6606                target_variable,
6607                step_variable,
6608                input,
6609                path_variable,
6610                ..
6611            } => {
6612                vars.insert(target_variable.clone());
6613                if let Some(sv) = step_variable {
6614                    vars.insert(sv.clone());
6615                }
6616                if let Some(pv) = path_variable {
6617                    vars.insert(pv.clone());
6618                }
6619                Self::collect_plan_variables_impl(input, vars);
6620            }
6621            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
6622            LogicalPlan::Project { input, projections } => {
6623                for (expr, alias) in projections {
6624                    if let Some(a) = alias {
6625                        vars.insert(a.clone());
6626                    } else if let Expr::Variable(v) = expr {
6627                        vars.insert(v.clone());
6628                    }
6629                }
6630                Self::collect_plan_variables_impl(input, vars);
6631            }
6632            LogicalPlan::Apply {
6633                input, subquery, ..
6634            } => {
6635                Self::collect_plan_variables_impl(input, vars);
6636                Self::collect_plan_variables_impl(subquery, vars);
6637            }
6638            LogicalPlan::CrossJoin { left, right } => {
6639                Self::collect_plan_variables_impl(left, vars);
6640                Self::collect_plan_variables_impl(right, vars);
6641            }
6642            LogicalPlan::Unwind {
6643                input, variable, ..
6644            } => {
6645                vars.insert(variable.clone());
6646                Self::collect_plan_variables_impl(input, vars);
6647            }
6648            LogicalPlan::Aggregate { input, .. } => {
6649                Self::collect_plan_variables_impl(input, vars);
6650            }
6651            LogicalPlan::Distinct { input } => {
6652                Self::collect_plan_variables_impl(input, vars);
6653            }
6654            LogicalPlan::Sort { input, .. } => {
6655                Self::collect_plan_variables_impl(input, vars);
6656            }
6657            LogicalPlan::Limit { input, .. } => {
6658                Self::collect_plan_variables_impl(input, vars);
6659            }
6660            LogicalPlan::VectorKnn { variable, .. } => {
6661                vars.insert(variable.clone());
6662            }
6663            LogicalPlan::ProcedureCall { yield_items, .. } => {
6664                for (name, alias) in yield_items {
6665                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
6666                }
6667            }
6668            LogicalPlan::ShortestPath {
6669                input,
6670                path_variable,
6671                ..
6672            } => {
6673                vars.insert(path_variable.clone());
6674                Self::collect_plan_variables_impl(input, vars);
6675            }
6676            LogicalPlan::AllShortestPaths {
6677                input,
6678                path_variable,
6679                ..
6680            } => {
6681                vars.insert(path_variable.clone());
6682                Self::collect_plan_variables_impl(input, vars);
6683            }
6684            LogicalPlan::RecursiveCTE {
6685                initial, recursive, ..
6686            } => {
6687                Self::collect_plan_variables_impl(initial, vars);
6688                Self::collect_plan_variables_impl(recursive, vars);
6689            }
6690            LogicalPlan::SubqueryCall {
6691                input, subquery, ..
6692            } => {
6693                Self::collect_plan_variables_impl(input, vars);
6694                Self::collect_plan_variables_impl(subquery, vars);
6695            }
6696            _ => {}
6697        }
6698    }
6699
6700    /// Extract predicates that only reference variables from Apply's input.
6701    /// Returns (input_only_predicates, remaining_predicates).
6702    fn extract_apply_input_predicates(
6703        predicate: &Expr,
6704        input_variables: &HashSet<String>,
6705        subquery_new_variables: &HashSet<String>,
6706    ) -> (Vec<Expr>, Vec<Expr>) {
6707        let conjuncts = Self::split_and_conjuncts(predicate);
6708        let mut input_preds = Vec::new();
6709        let mut remaining = Vec::new();
6710
6711        for conj in conjuncts {
6712            let vars = Self::collect_expr_variables(&conj);
6713
6714            // Predicate only references input variables (none from subquery)
6715            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
6716            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
6717
6718            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
6719                input_preds.push(conj);
6720            } else {
6721                remaining.push(conj);
6722            }
6723        }
6724
6725        (input_preds, remaining)
6726    }
6727
6728    /// Push eligible predicates into Apply.input_filter.
6729    /// This filters input rows BEFORE executing the correlated subquery.
6730    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
6731        match plan {
6732            LogicalPlan::Apply {
6733                input,
6734                subquery,
6735                input_filter,
6736            } => {
6737                // Collect variables from input plan
6738                let input_vars = Self::collect_plan_variables(&input);
6739
6740                // Collect NEW variables introduced by subquery (not in input)
6741                let subquery_vars = Self::collect_plan_variables(&subquery);
6742                let new_subquery_vars: HashSet<String> =
6743                    subquery_vars.difference(&input_vars).cloned().collect();
6744
6745                // Extract predicates that only reference input variables
6746                let (input_preds, remaining) = Self::extract_apply_input_predicates(
6747                    current_predicate,
6748                    &input_vars,
6749                    &new_subquery_vars,
6750                );
6751
6752                // Update current_predicate to only remaining predicates
6753                *current_predicate = if remaining.is_empty() {
6754                    Expr::TRUE
6755                } else {
6756                    Self::combine_predicates(remaining).unwrap()
6757                };
6758
6759                // Combine extracted predicates with existing input_filter
6760                let new_input_filter = if input_preds.is_empty() {
6761                    input_filter
6762                } else {
6763                    let extracted = Self::combine_predicates(input_preds).unwrap();
6764                    match input_filter {
6765                        Some(existing) => Some(Expr::BinaryOp {
6766                            left: Box::new(existing),
6767                            op: BinaryOp::And,
6768                            right: Box::new(extracted),
6769                        }),
6770                        None => Some(extracted),
6771                    }
6772                };
6773
6774                // Recurse into input plan
6775                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
6776
6777                LogicalPlan::Apply {
6778                    input: Box::new(new_input),
6779                    subquery,
6780                    input_filter: new_input_filter,
6781                }
6782            }
6783            // Recurse into other plan nodes
6784            LogicalPlan::Filter {
6785                input,
6786                predicate,
6787                optional_variables,
6788            } => LogicalPlan::Filter {
6789                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6790                predicate,
6791                optional_variables,
6792            },
6793            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6794                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6795                projections,
6796            },
6797            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
6798                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6799                order_by,
6800            },
6801            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
6802                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6803                skip,
6804                fetch,
6805            },
6806            LogicalPlan::Aggregate {
6807                input,
6808                group_by,
6809                aggregates,
6810            } => LogicalPlan::Aggregate {
6811                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6812                group_by,
6813                aggregates,
6814            },
6815            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6816                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
6817                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
6818            },
6819            LogicalPlan::Traverse {
6820                input,
6821                edge_type_ids,
6822                direction,
6823                source_variable,
6824                target_variable,
6825                target_label_id,
6826                step_variable,
6827                min_hops,
6828                max_hops,
6829                optional,
6830                target_filter,
6831                path_variable,
6832                edge_properties,
6833                is_variable_length,
6834                optional_pattern_vars,
6835                scope_match_variables,
6836                edge_filter_expr,
6837                path_mode,
6838                qpp_steps,
6839            } => LogicalPlan::Traverse {
6840                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6841                edge_type_ids,
6842                direction,
6843                source_variable,
6844                target_variable,
6845                target_label_id,
6846                step_variable,
6847                min_hops,
6848                max_hops,
6849                optional,
6850                target_filter,
6851                path_variable,
6852                edge_properties,
6853                is_variable_length,
6854                optional_pattern_vars,
6855                scope_match_variables,
6856                edge_filter_expr,
6857                path_mode,
6858                qpp_steps,
6859            },
6860            other => other,
6861        }
6862    }
6863}
6864
6865/// Get the expected column name for an aggregate expression.
6866///
6867/// This is the single source of truth for aggregate column naming, used by:
6868/// - Logical planner (to create column references)
6869/// - Physical planner (to rename DataFusion's auto-generated column names)
6870/// - Fallback executor (to name result columns)
6871pub fn aggregate_column_name(expr: &Expr) -> String {
6872    expr.to_string_repr()
6873}
6874
6875/// Output produced by `EXPLAIN` — a human-readable plan with index and cost info.
6876#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6877pub struct ExplainOutput {
6878    /// Debug-formatted logical plan tree.
6879    pub plan_text: String,
6880    /// Index availability report for each scan in the plan.
6881    pub index_usage: Vec<IndexUsage>,
6882    /// Rough row and cost estimates for the full plan.
6883    pub cost_estimates: CostEstimates,
6884    /// Planner warnings (e.g., missing index, forced full scan).
6885    pub warnings: Vec<String>,
6886    /// Suggested indexes that would improve this query.
6887    pub suggestions: Vec<IndexSuggestion>,
6888}
6889
6890/// Suggestion for creating an index to improve query performance.
6891#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6892pub struct IndexSuggestion {
6893    /// Label or edge type that would benefit from the index.
6894    pub label_or_type: String,
6895    /// Property to index.
6896    pub property: String,
6897    /// Recommended index type (e.g., `"SCALAR"`, `"VECTOR"`).
6898    pub index_type: String,
6899    /// Human-readable explanation of the performance benefit.
6900    pub reason: String,
6901    /// Ready-to-execute Cypher statement to create the index.
6902    pub create_statement: String,
6903}
6904
6905/// Index availability report for a single scan operator.
6906#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6907pub struct IndexUsage {
6908    pub label_or_type: String,
6909    pub property: String,
6910    pub index_type: String,
6911    /// Whether the index was actually used for this scan.
6912    pub used: bool,
6913    /// Human-readable explanation of why the index was or was not used.
6914    pub reason: Option<String>,
6915}
6916
6917/// Rough cost and row count estimates for a complete logical plan.
6918#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6919pub struct CostEstimates {
6920    /// Estimated number of rows the plan will produce.
6921    pub estimated_rows: f64,
6922    /// Abstract cost units (lower is cheaper).
6923    pub estimated_cost: f64,
6924}
6925
6926impl QueryPlanner {
6927    /// Plan a query and produce an EXPLAIN report (plan text, index usage, costs).
6928    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
6929        let plan = self.plan(ast)?;
6930        self.explain_logical_plan(&plan)
6931    }
6932
6933    /// Produce an EXPLAIN report for an already-planned logical plan.
6934    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
6935        let index_usage = self.analyze_index_usage(plan)?;
6936        let cost_estimates = self.estimate_costs(plan)?;
6937        let suggestions = self.collect_index_suggestions(plan);
6938        let warnings = Vec::new();
6939        let plan_text = format!("{:#?}", plan);
6940
6941        Ok(ExplainOutput {
6942            plan_text,
6943            index_usage,
6944            cost_estimates,
6945            warnings,
6946            suggestions,
6947        })
6948    }
6949
6950    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
6951        let mut usage = Vec::new();
6952        self.collect_index_usage(plan, &mut usage);
6953        Ok(usage)
6954    }
6955
6956    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
6957        match plan {
6958            LogicalPlan::Scan { .. } => {
6959                // Placeholder: Scan might use index if it was optimized
6960                // Ideally LogicalPlan::Scan should store if it uses index.
6961                // But typically Planner converts Scan to specific index scan or we infer it here.
6962            }
6963            LogicalPlan::VectorKnn {
6964                label_id, property, ..
6965            } => {
6966                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
6967                usage.push(IndexUsage {
6968                    label_or_type: label_name.to_string(),
6969                    property: property.clone(),
6970                    index_type: "VECTOR".to_string(),
6971                    used: true,
6972                    reason: None,
6973                });
6974            }
6975            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
6976            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
6977            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
6978            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
6979            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
6980            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
6981            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
6982            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
6983                self.collect_index_usage(left, usage);
6984                self.collect_index_usage(right, usage);
6985            }
6986            _ => {}
6987        }
6988    }
6989
6990    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
6991        Ok(CostEstimates {
6992            estimated_rows: 100.0,
6993            estimated_cost: 10.0,
6994        })
6995    }
6996
6997    /// Collect index suggestions based on query patterns.
6998    ///
6999    /// Currently detects:
7000    /// - Temporal predicates from `uni.validAt()` function calls
7001    /// - Temporal predicates from `VALID_AT` macro expansion
7002    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
7003        let mut suggestions = Vec::new();
7004        self.collect_temporal_suggestions(plan, &mut suggestions);
7005        suggestions
7006    }
7007
7008    /// Recursively collect temporal index suggestions from the plan.
7009    fn collect_temporal_suggestions(
7010        &self,
7011        plan: &LogicalPlan,
7012        suggestions: &mut Vec<IndexSuggestion>,
7013    ) {
7014        match plan {
7015            LogicalPlan::Filter {
7016                input, predicate, ..
7017            } => {
7018                // Check for temporal patterns in the predicate
7019                self.detect_temporal_pattern(predicate, suggestions);
7020                // Recurse into input
7021                self.collect_temporal_suggestions(input, suggestions);
7022            }
7023            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
7024            LogicalPlan::Project { input, .. } => {
7025                self.collect_temporal_suggestions(input, suggestions)
7026            }
7027            LogicalPlan::Limit { input, .. } => {
7028                self.collect_temporal_suggestions(input, suggestions)
7029            }
7030            LogicalPlan::Sort { input, .. } => {
7031                self.collect_temporal_suggestions(input, suggestions)
7032            }
7033            LogicalPlan::Aggregate { input, .. } => {
7034                self.collect_temporal_suggestions(input, suggestions)
7035            }
7036            LogicalPlan::Traverse { input, .. } => {
7037                self.collect_temporal_suggestions(input, suggestions)
7038            }
7039            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
7040                self.collect_temporal_suggestions(left, suggestions);
7041                self.collect_temporal_suggestions(right, suggestions);
7042            }
7043            _ => {}
7044        }
7045    }
7046
7047    /// Detect temporal predicate patterns and suggest indexes.
7048    ///
7049    /// Detects two patterns:
7050    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
7051    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
7052    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
7053        match expr {
7054            // Pattern 1: uni.temporal.validAt() function call
7055            Expr::FunctionCall { name, args, .. }
7056                if name.eq_ignore_ascii_case("uni.temporal.validAt")
7057                    || name.eq_ignore_ascii_case("validAt") =>
7058            {
7059                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
7060                if args.len() >= 2 {
7061                    let start_prop =
7062                        if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1) {
7063                            s.clone()
7064                        } else {
7065                            "valid_from".to_string()
7066                        };
7067
7068                    // Try to extract label from the node expression
7069                    if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
7070                        self.suggest_temporal_index(&var, &start_prop, suggestions);
7071                    }
7072                }
7073            }
7074
7075            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
7076            Expr::BinaryOp {
7077                left,
7078                op: BinaryOp::And,
7079                right,
7080            } => {
7081                // Check left side for `prop <= time` pattern (temporal start condition)
7082                if let Expr::BinaryOp {
7083                    left: prop_expr,
7084                    op: BinaryOp::LtEq,
7085                    ..
7086                } = left.as_ref()
7087                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
7088                    && (prop_name == "valid_from"
7089                        || prop_name.contains("start")
7090                        || prop_name.contains("from")
7091                        || prop_name.contains("begin"))
7092                    && let Some(var) = base.extract_variable()
7093                {
7094                    self.suggest_temporal_index(&var, prop_name, suggestions);
7095                }
7096
7097                // Recurse into both sides of AND
7098                self.detect_temporal_pattern(left.as_ref(), suggestions);
7099                self.detect_temporal_pattern(right.as_ref(), suggestions);
7100            }
7101
7102            // Recurse into other binary ops
7103            Expr::BinaryOp { left, right, .. } => {
7104                self.detect_temporal_pattern(left.as_ref(), suggestions);
7105                self.detect_temporal_pattern(right.as_ref(), suggestions);
7106            }
7107
7108            _ => {}
7109        }
7110    }
7111
7112    /// Suggest a scalar index for a temporal property if one doesn't already exist.
7113    fn suggest_temporal_index(
7114        &self,
7115        _variable: &str,
7116        property: &str,
7117        suggestions: &mut Vec<IndexSuggestion>,
7118    ) {
7119        // Check if a scalar index already exists for this property
7120        // We need to check all labels since we may not know the exact label from the variable
7121        let mut has_index = false;
7122
7123        for index in &self.schema.indexes {
7124            if let IndexDefinition::Scalar(config) = index
7125                && config.properties.contains(&property.to_string())
7126            {
7127                has_index = true;
7128                break;
7129            }
7130        }
7131
7132        if !has_index {
7133            // Avoid duplicate suggestions
7134            let already_suggested = suggestions.iter().any(|s| s.property == property);
7135            if !already_suggested {
7136                suggestions.push(IndexSuggestion {
7137                    label_or_type: "(detected from temporal query)".to_string(),
7138                    property: property.to_string(),
7139                    index_type: "SCALAR (BTree)".to_string(),
7140                    reason: format!(
7141                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
7142                        property
7143                    ),
7144                    create_statement: format!(
7145                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
7146                        property, property
7147                    ),
7148                });
7149            }
7150        }
7151    }
7152
7153    /// Helper functions for expression normalization
7154    /// Normalize an expression for storage: strip variable prefixes
7155    /// For simple property: u.email -> "email"
7156    /// For expressions: lower(u.email) -> "lower(email)"
7157    fn normalize_expression_for_storage(expr: &Expr) -> String {
7158        match expr {
7159            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
7160            _ => {
7161                // Serialize expression and strip variable prefix
7162                let expr_str = expr.to_string_repr();
7163                Self::strip_variable_prefix(&expr_str)
7164            }
7165        }
7166    }
7167
7168    /// Strip variable references like "u.prop" from expression strings
7169    /// Converts "lower(u.email)" to "lower(email)"
7170    fn strip_variable_prefix(expr_str: &str) -> String {
7171        use regex::Regex;
7172        // Match patterns like "word.property" and replace with just "property"
7173        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
7174        re.replace_all(expr_str, "$1").to_string()
7175    }
7176
7177    /// Plan a schema command from the new AST
7178    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
7179        match cmd {
7180            SchemaCommand::CreateVectorIndex(c) => {
7181                // Parse index type from options (default: IvfPq)
7182                let index_type = if let Some(type_val) = c.options.get("type") {
7183                    match type_val.as_str() {
7184                        Some("hnsw") => VectorIndexType::Hnsw {
7185                            m: 16,
7186                            ef_construction: 200,
7187                            ef_search: 100,
7188                        },
7189                        Some("flat") => VectorIndexType::Flat,
7190                        _ => VectorIndexType::IvfPq {
7191                            num_partitions: 256,
7192                            num_sub_vectors: 16,
7193                            bits_per_subvector: 8,
7194                        },
7195                    }
7196                } else {
7197                    VectorIndexType::IvfPq {
7198                        num_partitions: 256,
7199                        num_sub_vectors: 16,
7200                        bits_per_subvector: 8,
7201                    }
7202                };
7203
7204                // Parse embedding config from options
7205                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
7206                    Self::parse_embedding_config(emb_val)?
7207                } else {
7208                    None
7209                };
7210
7211                let config = VectorIndexConfig {
7212                    name: c.name,
7213                    label: c.label,
7214                    property: c.property,
7215                    metric: DistanceMetric::Cosine,
7216                    index_type,
7217                    embedding_config,
7218                    metadata: Default::default(),
7219                };
7220                Ok(LogicalPlan::CreateVectorIndex {
7221                    config,
7222                    if_not_exists: c.if_not_exists,
7223                })
7224            }
7225            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
7226                config: FullTextIndexConfig {
7227                    name: cfg.name,
7228                    label: cfg.label,
7229                    properties: cfg.properties,
7230                    tokenizer: TokenizerConfig::Standard,
7231                    with_positions: true,
7232                    metadata: Default::default(),
7233                },
7234                if_not_exists: cfg.if_not_exists,
7235            }),
7236            SchemaCommand::CreateScalarIndex(cfg) => {
7237                // Convert expressions to storage strings (strip variable prefix)
7238                let properties: Vec<String> = cfg
7239                    .expressions
7240                    .iter()
7241                    .map(Self::normalize_expression_for_storage)
7242                    .collect();
7243
7244                Ok(LogicalPlan::CreateScalarIndex {
7245                    config: ScalarIndexConfig {
7246                        name: cfg.name,
7247                        label: cfg.label,
7248                        properties,
7249                        index_type: ScalarIndexType::BTree,
7250                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
7251                        metadata: Default::default(),
7252                    },
7253                    if_not_exists: cfg.if_not_exists,
7254                })
7255            }
7256            SchemaCommand::CreateJsonFtsIndex(cfg) => {
7257                let with_positions = cfg
7258                    .options
7259                    .get("with_positions")
7260                    .and_then(|v| v.as_bool())
7261                    .unwrap_or(false);
7262                Ok(LogicalPlan::CreateJsonFtsIndex {
7263                    config: JsonFtsIndexConfig {
7264                        name: cfg.name,
7265                        label: cfg.label,
7266                        column: cfg.column,
7267                        paths: Vec::new(),
7268                        with_positions,
7269                        metadata: Default::default(),
7270                    },
7271                    if_not_exists: cfg.if_not_exists,
7272                })
7273            }
7274            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
7275                name: drop.name,
7276                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
7277            }),
7278            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
7279            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
7280            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
7281            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
7282            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
7283            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
7284            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
7285            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
7286            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
7287            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
7288            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
7289            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
7290            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
7291            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
7292            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
7293            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
7294                destination: path,
7295                options: HashMap::new(),
7296            }),
7297            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
7298                label: cmd.label,
7299                path: cmd.path,
7300                format: cmd.format,
7301                options: cmd.options,
7302            }),
7303            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
7304                label: cmd.label,
7305                path: cmd.path,
7306                format: cmd.format,
7307                options: cmd.options,
7308            }),
7309        }
7310    }
7311
7312    fn plan_transaction_command(
7313        &self,
7314        cmd: uni_cypher::ast::TransactionCommand,
7315    ) -> Result<LogicalPlan> {
7316        use uni_cypher::ast::TransactionCommand;
7317        match cmd {
7318            TransactionCommand::Begin => Ok(LogicalPlan::Begin),
7319            TransactionCommand::Commit => Ok(LogicalPlan::Commit),
7320            TransactionCommand::Rollback => Ok(LogicalPlan::Rollback),
7321        }
7322    }
7323
7324    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
7325        let obj = emb_val
7326            .as_object()
7327            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
7328
7329        // Parse alias (required)
7330        let alias = obj
7331            .get("alias")
7332            .and_then(|v| v.as_str())
7333            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
7334
7335        // Parse source properties (required)
7336        let source_properties = obj
7337            .get("source")
7338            .and_then(|v| v.as_array())
7339            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
7340            .iter()
7341            .filter_map(|v| v.as_str().map(|s| s.to_string()))
7342            .collect::<Vec<_>>();
7343
7344        if source_properties.is_empty() {
7345            return Err(anyhow!(
7346                "embedding.source must contain at least one property"
7347            ));
7348        }
7349
7350        let batch_size = obj
7351            .get("batch_size")
7352            .and_then(|v| v.as_u64())
7353            .map(|v| v as usize)
7354            .unwrap_or(32);
7355
7356        Ok(Some(EmbeddingConfig {
7357            alias: alias.to_string(),
7358            source_properties,
7359            batch_size,
7360        }))
7361    }
7362}
7363
7364/// Collect all properties referenced anywhere in the LogicalPlan tree.
7365///
7366/// This is critical for window functions: properties must be materialized
7367/// at the Scan node so they're available for window operations later.
7368///
7369/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
7370pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
7371    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
7372    collect_properties_recursive(plan, &mut properties);
7373    properties
7374}
7375
7376/// Recursively walk the LogicalPlan tree and collect all property references.
7377fn collect_properties_recursive(
7378    plan: &LogicalPlan,
7379    properties: &mut HashMap<String, HashSet<String>>,
7380) {
7381    match plan {
7382        LogicalPlan::Window {
7383            input,
7384            window_exprs,
7385        } => {
7386            // Collect from window expressions
7387            for expr in window_exprs {
7388                collect_properties_from_expr_into(expr, properties);
7389            }
7390            collect_properties_recursive(input, properties);
7391        }
7392        LogicalPlan::Project { input, projections } => {
7393            for (expr, _alias) in projections {
7394                collect_properties_from_expr_into(expr, properties);
7395            }
7396            collect_properties_recursive(input, properties);
7397        }
7398        LogicalPlan::Sort { input, order_by } => {
7399            for sort_item in order_by {
7400                collect_properties_from_expr_into(&sort_item.expr, properties);
7401            }
7402            collect_properties_recursive(input, properties);
7403        }
7404        LogicalPlan::Filter {
7405            input, predicate, ..
7406        } => {
7407            collect_properties_from_expr_into(predicate, properties);
7408            collect_properties_recursive(input, properties);
7409        }
7410        LogicalPlan::Aggregate {
7411            input,
7412            group_by,
7413            aggregates,
7414        } => {
7415            for expr in group_by {
7416                collect_properties_from_expr_into(expr, properties);
7417            }
7418            for expr in aggregates {
7419                collect_properties_from_expr_into(expr, properties);
7420            }
7421            collect_properties_recursive(input, properties);
7422        }
7423        LogicalPlan::Scan {
7424            filter: Some(expr), ..
7425        } => {
7426            collect_properties_from_expr_into(expr, properties);
7427        }
7428        LogicalPlan::Scan { filter: None, .. } => {}
7429        LogicalPlan::ExtIdLookup {
7430            filter: Some(expr), ..
7431        } => {
7432            collect_properties_from_expr_into(expr, properties);
7433        }
7434        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
7435        LogicalPlan::ScanAll {
7436            filter: Some(expr), ..
7437        } => {
7438            collect_properties_from_expr_into(expr, properties);
7439        }
7440        LogicalPlan::ScanAll { filter: None, .. } => {}
7441        LogicalPlan::ScanMainByLabels {
7442            filter: Some(expr), ..
7443        } => {
7444            collect_properties_from_expr_into(expr, properties);
7445        }
7446        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
7447        LogicalPlan::TraverseMainByType {
7448            input,
7449            target_filter,
7450            ..
7451        } => {
7452            if let Some(expr) = target_filter {
7453                collect_properties_from_expr_into(expr, properties);
7454            }
7455            collect_properties_recursive(input, properties);
7456        }
7457        LogicalPlan::Traverse {
7458            input,
7459            target_filter,
7460            step_variable: _,
7461            ..
7462        } => {
7463            if let Some(expr) = target_filter {
7464                collect_properties_from_expr_into(expr, properties);
7465            }
7466            // Note: Edge properties (step_variable) will be collected from expressions
7467            // that reference them. The edge_properties field in LogicalPlan is populated
7468            // later during physical planning based on this collected map.
7469            collect_properties_recursive(input, properties);
7470        }
7471        LogicalPlan::Unwind { input, expr, .. } => {
7472            collect_properties_from_expr_into(expr, properties);
7473            collect_properties_recursive(input, properties);
7474        }
7475        LogicalPlan::Create { input, pattern } => {
7476            // Mark variables referenced in CREATE patterns with "*" so plan_scan
7477            // adds structural projections (bare entity columns). Without this,
7478            // execute_create_pattern() can't find bound variables and creates
7479            // spurious new nodes instead of using existing MATCH'd ones.
7480            mark_pattern_variables(pattern, properties);
7481            collect_properties_recursive(input, properties);
7482        }
7483        LogicalPlan::CreateBatch { input, patterns } => {
7484            for pattern in patterns {
7485                mark_pattern_variables(pattern, properties);
7486            }
7487            collect_properties_recursive(input, properties);
7488        }
7489        LogicalPlan::Merge {
7490            input,
7491            pattern,
7492            on_match,
7493            on_create,
7494        } => {
7495            mark_pattern_variables(pattern, properties);
7496            if let Some(set_clause) = on_match {
7497                mark_set_item_variables(&set_clause.items, properties);
7498            }
7499            if let Some(set_clause) = on_create {
7500                mark_set_item_variables(&set_clause.items, properties);
7501            }
7502            collect_properties_recursive(input, properties);
7503        }
7504        LogicalPlan::Set { input, items } => {
7505            mark_set_item_variables(items, properties);
7506            collect_properties_recursive(input, properties);
7507        }
7508        LogicalPlan::Remove { input, items } => {
7509            for item in items {
7510                match item {
7511                    RemoveItem::Property(expr) => {
7512                        // REMOVE n.prop — collect the property and mark the variable
7513                        // with "*" so full structural projection is applied.
7514                        collect_properties_from_expr_into(expr, properties);
7515                        if let Expr::Property(base, _) = expr
7516                            && let Expr::Variable(var) = base.as_ref()
7517                        {
7518                            properties
7519                                .entry(var.clone())
7520                                .or_default()
7521                                .insert("*".to_string());
7522                        }
7523                    }
7524                    RemoveItem::Labels { variable, .. } => {
7525                        // REMOVE n:Label — mark n with "*"
7526                        properties
7527                            .entry(variable.clone())
7528                            .or_default()
7529                            .insert("*".to_string());
7530                    }
7531                }
7532            }
7533            collect_properties_recursive(input, properties);
7534        }
7535        LogicalPlan::Delete { input, items, .. } => {
7536            for expr in items {
7537                collect_properties_from_expr_into(expr, properties);
7538            }
7539            collect_properties_recursive(input, properties);
7540        }
7541        LogicalPlan::Foreach {
7542            input, list, body, ..
7543        } => {
7544            collect_properties_from_expr_into(list, properties);
7545            for plan in body {
7546                collect_properties_recursive(plan, properties);
7547            }
7548            collect_properties_recursive(input, properties);
7549        }
7550        LogicalPlan::Limit { input, .. } => {
7551            collect_properties_recursive(input, properties);
7552        }
7553        LogicalPlan::CrossJoin { left, right } => {
7554            collect_properties_recursive(left, properties);
7555            collect_properties_recursive(right, properties);
7556        }
7557        LogicalPlan::Apply {
7558            input,
7559            subquery,
7560            input_filter,
7561        } => {
7562            if let Some(expr) = input_filter {
7563                collect_properties_from_expr_into(expr, properties);
7564            }
7565            collect_properties_recursive(input, properties);
7566            collect_properties_recursive(subquery, properties);
7567        }
7568        LogicalPlan::Union { left, right, .. } => {
7569            collect_properties_recursive(left, properties);
7570            collect_properties_recursive(right, properties);
7571        }
7572        LogicalPlan::RecursiveCTE {
7573            initial, recursive, ..
7574        } => {
7575            collect_properties_recursive(initial, properties);
7576            collect_properties_recursive(recursive, properties);
7577        }
7578        LogicalPlan::ProcedureCall { arguments, .. } => {
7579            for arg in arguments {
7580                collect_properties_from_expr_into(arg, properties);
7581            }
7582        }
7583        LogicalPlan::VectorKnn { query, .. } => {
7584            collect_properties_from_expr_into(query, properties);
7585        }
7586        LogicalPlan::InvertedIndexLookup { terms, .. } => {
7587            collect_properties_from_expr_into(terms, properties);
7588        }
7589        LogicalPlan::ShortestPath { input, .. } => {
7590            collect_properties_recursive(input, properties);
7591        }
7592        LogicalPlan::AllShortestPaths { input, .. } => {
7593            collect_properties_recursive(input, properties);
7594        }
7595        LogicalPlan::Distinct { input } => {
7596            collect_properties_recursive(input, properties);
7597        }
7598        LogicalPlan::QuantifiedPattern {
7599            input,
7600            pattern_plan,
7601            ..
7602        } => {
7603            collect_properties_recursive(input, properties);
7604            collect_properties_recursive(pattern_plan, properties);
7605        }
7606        LogicalPlan::BindZeroLengthPath { input, .. } => {
7607            collect_properties_recursive(input, properties);
7608        }
7609        LogicalPlan::BindPath { input, .. } => {
7610            collect_properties_recursive(input, properties);
7611        }
7612        LogicalPlan::SubqueryCall { input, subquery } => {
7613            collect_properties_recursive(input, properties);
7614            collect_properties_recursive(subquery, properties);
7615        }
7616        LogicalPlan::LocyProject {
7617            input, projections, ..
7618        } => {
7619            for (expr, _alias) in projections {
7620                match expr {
7621                    // Bare variable in LocyProject: only need _vid for node variables
7622                    // (plan_locy_project extracts VID directly). Adding "*" would create
7623                    // a structural Struct column that conflicts with derived scan columns.
7624                    Expr::Variable(name) if !name.contains('.') => {
7625                        properties
7626                            .entry(name.clone())
7627                            .or_default()
7628                            .insert("_vid".to_string());
7629                    }
7630                    _ => collect_properties_from_expr_into(expr, properties),
7631                }
7632            }
7633            collect_properties_recursive(input, properties);
7634        }
7635        LogicalPlan::LocyFold {
7636            input,
7637            fold_bindings,
7638            ..
7639        } => {
7640            for (_name, expr) in fold_bindings {
7641                collect_properties_from_expr_into(expr, properties);
7642            }
7643            collect_properties_recursive(input, properties);
7644        }
7645        LogicalPlan::LocyBestBy {
7646            input, criteria, ..
7647        } => {
7648            for (expr, _asc) in criteria {
7649                collect_properties_from_expr_into(expr, properties);
7650            }
7651            collect_properties_recursive(input, properties);
7652        }
7653        LogicalPlan::LocyPriority { input, .. } => {
7654            collect_properties_recursive(input, properties);
7655        }
7656        // DDL and other plans don't reference properties
7657        _ => {}
7658    }
7659}
7660
7661/// Mark target variables from SET items with "*" and collect value expressions.
7662fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
7663    for item in items {
7664        match item {
7665            SetItem::Property { expr, value } => {
7666                // SET n.prop = val — mark n via the property expr, collect from value.
7667                // Also mark the variable with "*" for full structural projection so
7668                // edge identity fields (_src/_dst) are available for write operations.
7669                collect_properties_from_expr_into(expr, properties);
7670                collect_properties_from_expr_into(value, properties);
7671                if let Expr::Property(base, _) = expr
7672                    && let Expr::Variable(var) = base.as_ref()
7673                {
7674                    properties
7675                        .entry(var.clone())
7676                        .or_default()
7677                        .insert("*".to_string());
7678                }
7679            }
7680            SetItem::Labels { variable, .. } => {
7681                // SET n:Label — need full access to n
7682                properties
7683                    .entry(variable.clone())
7684                    .or_default()
7685                    .insert("*".to_string());
7686            }
7687            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
7688                // SET n = {props} or SET n += {props}
7689                properties
7690                    .entry(variable.clone())
7691                    .or_default()
7692                    .insert("*".to_string());
7693                collect_properties_from_expr_into(value, properties);
7694            }
7695        }
7696    }
7697}
7698
7699/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
7700/// adds structural projections (bare entity Struct columns) for them.
7701/// This is needed so that execute_create_pattern() can find bound variables
7702/// in the row HashMap and reuse existing nodes instead of creating new ones.
7703fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
7704    for path in &pattern.paths {
7705        if let Some(ref v) = path.variable {
7706            properties
7707                .entry(v.clone())
7708                .or_default()
7709                .insert("*".to_string());
7710        }
7711        for element in &path.elements {
7712            match element {
7713                PatternElement::Node(n) => {
7714                    if let Some(ref v) = n.variable {
7715                        properties
7716                            .entry(v.clone())
7717                            .or_default()
7718                            .insert("*".to_string());
7719                    }
7720                    // Also collect properties from inline property expressions
7721                    if let Some(ref props) = n.properties {
7722                        collect_properties_from_expr_into(props, properties);
7723                    }
7724                }
7725                PatternElement::Relationship(r) => {
7726                    if let Some(ref v) = r.variable {
7727                        properties
7728                            .entry(v.clone())
7729                            .or_default()
7730                            .insert("*".to_string());
7731                    }
7732                    if let Some(ref props) = r.properties {
7733                        collect_properties_from_expr_into(props, properties);
7734                    }
7735                }
7736                PatternElement::Parenthesized { pattern, .. } => {
7737                    let sub = Pattern {
7738                        paths: vec![pattern.as_ref().clone()],
7739                    };
7740                    mark_pattern_variables(&sub, properties);
7741                }
7742            }
7743        }
7744    }
7745}
7746
7747/// Collect properties from an expression into a HashMap.
7748fn collect_properties_from_expr_into(
7749    expr: &Expr,
7750    properties: &mut HashMap<String, HashSet<String>>,
7751) {
7752    match expr {
7753        Expr::PatternComprehension {
7754            where_clause,
7755            map_expr,
7756            ..
7757        } => {
7758            // Collect properties from the WHERE clause and map expression.
7759            // The pattern itself creates local bindings that don't need
7760            // property collection from the outer scope.
7761            if let Some(where_expr) = where_clause {
7762                collect_properties_from_expr_into(where_expr, properties);
7763            }
7764            collect_properties_from_expr_into(map_expr, properties);
7765        }
7766        Expr::Variable(name) => {
7767            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
7768            if let Some((var, prop)) = name.split_once('.') {
7769                properties
7770                    .entry(var.to_string())
7771                    .or_default()
7772                    .insert(prop.to_string());
7773            } else {
7774                // Bare variable (e.g., RETURN n) — needs all properties materialized
7775                properties
7776                    .entry(name.clone())
7777                    .or_default()
7778                    .insert("*".to_string());
7779            }
7780        }
7781        Expr::Property(base, name) => {
7782            // Extract variable name from the base expression
7783            if let Expr::Variable(var) = base.as_ref() {
7784                properties
7785                    .entry(var.clone())
7786                    .or_default()
7787                    .insert(name.clone());
7788                // Don't recurse into Variable — that would mark it as a bare
7789                // variable reference (adding "*") when it's just a property base.
7790            } else {
7791                // Recurse for complex base expressions (nested property, function call, etc.)
7792                collect_properties_from_expr_into(base, properties);
7793            }
7794        }
7795        Expr::BinaryOp { left, right, .. } => {
7796            collect_properties_from_expr_into(left, properties);
7797            collect_properties_from_expr_into(right, properties);
7798        }
7799        Expr::FunctionCall {
7800            name,
7801            args,
7802            window_spec,
7803            ..
7804        } => {
7805            // Analyze function for property requirements (pushdown hydration)
7806            analyze_function_property_requirements(name, args, properties);
7807
7808            // Collect from arguments
7809            for arg in args {
7810                collect_properties_from_expr_into(arg, properties);
7811            }
7812
7813            // Collect from window spec (PARTITION BY, ORDER BY)
7814            if let Some(spec) = window_spec {
7815                for part_expr in &spec.partition_by {
7816                    collect_properties_from_expr_into(part_expr, properties);
7817                }
7818                for sort_item in &spec.order_by {
7819                    collect_properties_from_expr_into(&sort_item.expr, properties);
7820                }
7821            }
7822        }
7823        Expr::UnaryOp { expr, .. } => {
7824            collect_properties_from_expr_into(expr, properties);
7825        }
7826        Expr::List(items) => {
7827            for item in items {
7828                collect_properties_from_expr_into(item, properties);
7829            }
7830        }
7831        Expr::Map(entries) => {
7832            for (_key, value) in entries {
7833                collect_properties_from_expr_into(value, properties);
7834            }
7835        }
7836        Expr::ListComprehension {
7837            list,
7838            where_clause,
7839            map_expr,
7840            ..
7841        } => {
7842            collect_properties_from_expr_into(list, properties);
7843            if let Some(where_expr) = where_clause {
7844                collect_properties_from_expr_into(where_expr, properties);
7845            }
7846            collect_properties_from_expr_into(map_expr, properties);
7847        }
7848        Expr::Case {
7849            expr,
7850            when_then,
7851            else_expr,
7852        } => {
7853            if let Some(scrutinee_expr) = expr {
7854                collect_properties_from_expr_into(scrutinee_expr, properties);
7855            }
7856            for (when, then) in when_then {
7857                collect_properties_from_expr_into(when, properties);
7858                collect_properties_from_expr_into(then, properties);
7859            }
7860            if let Some(default_expr) = else_expr {
7861                collect_properties_from_expr_into(default_expr, properties);
7862            }
7863        }
7864        Expr::Quantifier {
7865            list, predicate, ..
7866        } => {
7867            collect_properties_from_expr_into(list, properties);
7868            collect_properties_from_expr_into(predicate, properties);
7869        }
7870        Expr::Reduce {
7871            init, list, expr, ..
7872        } => {
7873            collect_properties_from_expr_into(init, properties);
7874            collect_properties_from_expr_into(list, properties);
7875            collect_properties_from_expr_into(expr, properties);
7876        }
7877        Expr::Exists { query, .. } => {
7878            // Walk into EXISTS body to collect property references for outer-scope variables.
7879            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
7880            // are included in the outer scan's property list. Extra properties collected for
7881            // inner-only variables are harmless — the outer scan ignores unknown variable names.
7882            collect_properties_from_subquery(query, properties);
7883        }
7884        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
7885            collect_properties_from_subquery(query, properties);
7886        }
7887        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
7888            collect_properties_from_expr_into(expr, properties);
7889        }
7890        Expr::In { expr, list } => {
7891            collect_properties_from_expr_into(expr, properties);
7892            collect_properties_from_expr_into(list, properties);
7893        }
7894        Expr::ArrayIndex { array, index } => {
7895            if let Expr::Variable(var) = array.as_ref() {
7896                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
7897                    // Static string key: e['name'] → only need that specific property
7898                    properties
7899                        .entry(var.clone())
7900                        .or_default()
7901                        .insert(prop_name.clone());
7902                } else {
7903                    // Dynamic property access: e[prop] → need all properties
7904                    properties
7905                        .entry(var.clone())
7906                        .or_default()
7907                        .insert("*".to_string());
7908                }
7909            }
7910            collect_properties_from_expr_into(array, properties);
7911            collect_properties_from_expr_into(index, properties);
7912        }
7913        Expr::ArraySlice { array, start, end } => {
7914            collect_properties_from_expr_into(array, properties);
7915            if let Some(start_expr) = start {
7916                collect_properties_from_expr_into(start_expr, properties);
7917            }
7918            if let Some(end_expr) = end {
7919                collect_properties_from_expr_into(end_expr, properties);
7920            }
7921        }
7922        Expr::ValidAt {
7923            entity,
7924            timestamp,
7925            start_prop,
7926            end_prop,
7927        } => {
7928            // Extract property requirements from ValidAt expression
7929            if let Expr::Variable(var) = entity.as_ref() {
7930                if let Some(prop) = start_prop {
7931                    properties
7932                        .entry(var.clone())
7933                        .or_default()
7934                        .insert(prop.clone());
7935                }
7936                if let Some(prop) = end_prop {
7937                    properties
7938                        .entry(var.clone())
7939                        .or_default()
7940                        .insert(prop.clone());
7941                }
7942            }
7943            collect_properties_from_expr_into(entity, properties);
7944            collect_properties_from_expr_into(timestamp, properties);
7945        }
7946        Expr::MapProjection { base, items } => {
7947            collect_properties_from_expr_into(base, properties);
7948            for item in items {
7949                match item {
7950                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
7951                        if let Expr::Variable(var) = base.as_ref() {
7952                            properties
7953                                .entry(var.clone())
7954                                .or_default()
7955                                .insert(prop.clone());
7956                        }
7957                    }
7958                    uni_cypher::ast::MapProjectionItem::AllProperties => {
7959                        if let Expr::Variable(var) = base.as_ref() {
7960                            properties
7961                                .entry(var.clone())
7962                                .or_default()
7963                                .insert("*".to_string());
7964                        }
7965                    }
7966                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
7967                        collect_properties_from_expr_into(expr, properties);
7968                    }
7969                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
7970                }
7971            }
7972        }
7973        Expr::LabelCheck { expr, .. } => {
7974            collect_properties_from_expr_into(expr, properties);
7975        }
7976        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
7977        // Mark them with "*" so the outer scan produces structural projections that
7978        // extract_row_params can resolve.
7979        Expr::Parameter(name) => {
7980            properties
7981                .entry(name.clone())
7982                .or_default()
7983                .insert("*".to_string());
7984        }
7985        // Literals and wildcard don't reference properties
7986        Expr::Literal(_) | Expr::Wildcard => {}
7987    }
7988}
7989
7990/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
7991///
7992/// This is needed so that correlated property accesses like `a.city` inside
7993/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
7994/// scan to include `a.city` in its projected columns.
7995fn collect_properties_from_subquery(
7996    query: &Query,
7997    properties: &mut HashMap<String, HashSet<String>>,
7998) {
7999    match query {
8000        Query::Single(stmt) => {
8001            for clause in &stmt.clauses {
8002                match clause {
8003                    Clause::Match(m) => {
8004                        if let Some(ref wc) = m.where_clause {
8005                            collect_properties_from_expr_into(wc, properties);
8006                        }
8007                    }
8008                    Clause::With(w) => {
8009                        for item in &w.items {
8010                            if let ReturnItem::Expr { expr, .. } = item {
8011                                collect_properties_from_expr_into(expr, properties);
8012                            }
8013                        }
8014                        if let Some(ref wc) = w.where_clause {
8015                            collect_properties_from_expr_into(wc, properties);
8016                        }
8017                    }
8018                    Clause::Return(r) => {
8019                        for item in &r.items {
8020                            if let ReturnItem::Expr { expr, .. } = item {
8021                                collect_properties_from_expr_into(expr, properties);
8022                            }
8023                        }
8024                    }
8025                    _ => {}
8026                }
8027            }
8028        }
8029        Query::Union { left, right, .. } => {
8030            collect_properties_from_subquery(left, properties);
8031            collect_properties_from_subquery(right, properties);
8032        }
8033        _ => {}
8034    }
8035}
8036
8037/// Analyze function calls to extract property requirements for pushdown hydration
8038///
8039/// This function examines function calls and their arguments to determine which properties
8040/// need to be loaded for entity arguments. For example:
8041/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
8042/// - keys(n) -> n needs all properties (*)
8043///
8044/// The extracted requirements are added to the properties map for later use during
8045/// scan planning.
8046fn analyze_function_property_requirements(
8047    name: &str,
8048    args: &[Expr],
8049    properties: &mut HashMap<String, HashSet<String>>,
8050) {
8051    use crate::query::function_props::get_function_spec;
8052
8053    /// Helper to mark a variable as needing all properties.
8054    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
8055        properties
8056            .entry(var.to_string())
8057            .or_default()
8058            .insert("*".to_string());
8059    }
8060
8061    let Some(spec) = get_function_spec(name) else {
8062        // Unknown function: conservatively require all properties for variable args
8063        for arg in args {
8064            if let Expr::Variable(var) = arg {
8065                mark_wildcard(var, properties);
8066            }
8067        }
8068        return;
8069    };
8070
8071    // Extract property names from string literal arguments
8072    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
8073        let entity_arg = args.get(entity_arg_idx);
8074        let prop_arg = args.get(prop_arg_idx);
8075
8076        match (entity_arg, prop_arg) {
8077            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
8078                properties
8079                    .entry(var.clone())
8080                    .or_default()
8081                    .insert(prop.clone());
8082            }
8083            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
8084                // Parameter property name: need all properties
8085                mark_wildcard(var, properties);
8086            }
8087            _ => {}
8088        }
8089    }
8090
8091    // Handle full entity requirement (keys(), properties())
8092    if spec.needs_full_entity {
8093        for &idx in spec.entity_args {
8094            if let Some(Expr::Variable(var)) = args.get(idx) {
8095                mark_wildcard(var, properties);
8096            }
8097        }
8098    }
8099}
8100
8101#[cfg(test)]
8102mod pushdown_tests {
8103    use super::*;
8104
8105    #[test]
8106    fn test_validat_extracts_property_names() {
8107        // validAt(e, 'start', 'end', ts) → e: {start, end}
8108        let mut properties = HashMap::new();
8109
8110        let args = vec![
8111            Expr::Variable("e".to_string()),
8112            Expr::Literal(CypherLiteral::String("start".to_string())),
8113            Expr::Literal(CypherLiteral::String("end".to_string())),
8114            Expr::Variable("ts".to_string()),
8115        ];
8116
8117        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
8118
8119        assert!(properties.contains_key("e"));
8120        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
8121            .iter()
8122            .cloned()
8123            .collect();
8124        assert_eq!(properties.get("e").unwrap(), &e_props);
8125    }
8126
8127    #[test]
8128    fn test_keys_requires_wildcard() {
8129        // keys(n) → n: {*}
8130        let mut properties = HashMap::new();
8131
8132        let args = vec![Expr::Variable("n".to_string())];
8133
8134        analyze_function_property_requirements("keys", &args, &mut properties);
8135
8136        assert!(properties.contains_key("n"));
8137        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8138        assert_eq!(properties.get("n").unwrap(), &n_props);
8139    }
8140
8141    #[test]
8142    fn test_properties_requires_wildcard() {
8143        // properties(n) → n: {*}
8144        let mut properties = HashMap::new();
8145
8146        let args = vec![Expr::Variable("n".to_string())];
8147
8148        analyze_function_property_requirements("properties", &args, &mut properties);
8149
8150        assert!(properties.contains_key("n"));
8151        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8152        assert_eq!(properties.get("n").unwrap(), &n_props);
8153    }
8154
8155    #[test]
8156    fn test_unknown_function_conservative() {
8157        // customUdf(e) → e: {*}
8158        let mut properties = HashMap::new();
8159
8160        let args = vec![Expr::Variable("e".to_string())];
8161
8162        analyze_function_property_requirements("customUdf", &args, &mut properties);
8163
8164        assert!(properties.contains_key("e"));
8165        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8166        assert_eq!(properties.get("e").unwrap(), &e_props);
8167    }
8168
8169    #[test]
8170    fn test_parameter_property_name() {
8171        // validAt(e, $start, $end, ts) → e: {*}
8172        let mut properties = HashMap::new();
8173
8174        let args = vec![
8175            Expr::Variable("e".to_string()),
8176            Expr::Parameter("start".to_string()),
8177            Expr::Parameter("end".to_string()),
8178            Expr::Variable("ts".to_string()),
8179        ];
8180
8181        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
8182
8183        assert!(properties.contains_key("e"));
8184        assert!(properties.get("e").unwrap().contains("*"));
8185    }
8186
8187    #[test]
8188    fn test_validat_expr_extracts_properties() {
8189        // Test Expr::ValidAt variant property extraction
8190        let mut properties = HashMap::new();
8191
8192        let validat_expr = Expr::ValidAt {
8193            entity: Box::new(Expr::Variable("e".to_string())),
8194            timestamp: Box::new(Expr::Variable("ts".to_string())),
8195            start_prop: Some("valid_from".to_string()),
8196            end_prop: Some("valid_to".to_string()),
8197        };
8198
8199        collect_properties_from_expr_into(&validat_expr, &mut properties);
8200
8201        assert!(properties.contains_key("e"));
8202        assert!(properties.get("e").unwrap().contains("valid_from"));
8203        assert!(properties.get("e").unwrap().contains("valid_to"));
8204    }
8205
8206    #[test]
8207    fn test_array_index_requires_wildcard() {
8208        // e[prop] → e: {*}
8209        let mut properties = HashMap::new();
8210
8211        let array_index_expr = Expr::ArrayIndex {
8212            array: Box::new(Expr::Variable("e".to_string())),
8213            index: Box::new(Expr::Variable("prop".to_string())),
8214        };
8215
8216        collect_properties_from_expr_into(&array_index_expr, &mut properties);
8217
8218        assert!(properties.contains_key("e"));
8219        assert!(properties.get("e").unwrap().contains("*"));
8220    }
8221
8222    #[test]
8223    fn test_property_access_extraction() {
8224        // e.name → e: {name}
8225        let mut properties = HashMap::new();
8226
8227        let prop_access = Expr::Property(
8228            Box::new(Expr::Variable("e".to_string())),
8229            "name".to_string(),
8230        );
8231
8232        collect_properties_from_expr_into(&prop_access, &mut properties);
8233
8234        assert!(properties.contains_key("e"));
8235        assert!(properties.get("e").unwrap().contains("name"));
8236    }
8237}