Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::PredicateAnalyzer;
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig,
14    ScalarIndexConfig, ScalarIndexType, Schema, TokenizerConfig, VectorIndexConfig,
15    VectorIndexType,
16};
17use uni_cypher::ast::{
18    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
19    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
20    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
21    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
22    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
23    WithRecursiveClause,
24};
25
26/// Type of variable in scope for semantic validation.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum VariableType {
29    /// Node variable (from MATCH (n), CREATE (n), etc.)
30    Node,
31    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
32    Edge,
33    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
34    Path,
35    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
36    /// Could hold a map or dynamic value — property access is allowed.
37    Scalar,
38    /// Scalar from a known non-graph literal (int, float, bool, string, list).
39    /// Property access is NOT allowed on these at compile time.
40    ScalarLiteral,
41    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
42    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
43    Imported,
44}
45
46impl VariableType {
47    /// Returns true if this type is compatible with the expected type.
48    ///
49    /// `Imported` is always compatible because the actual type is unknown at plan time.
50    fn is_compatible_with(self, expected: VariableType) -> bool {
51        self == expected
52            || self == VariableType::Imported
53            // ScalarLiteral behaves like Scalar for compatibility checks
54            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
55    }
56}
57
58/// Information about a variable in scope.
59#[derive(Debug, Clone)]
60pub struct VariableInfo {
61    pub name: String,
62    pub var_type: VariableType,
63    /// True if this is a variable-length path (VLP) step variable.
64    /// VLP step variables are typed as Edge but semantically hold edge lists.
65    pub is_vlp: bool,
66}
67
68impl VariableInfo {
69    pub fn new(name: String, var_type: VariableType) -> Self {
70        Self {
71            name,
72            var_type,
73            is_vlp: false,
74        }
75    }
76}
77
78/// Find a variable in scope by name.
79fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
80    vars.iter().find(|v| v.name == name)
81}
82
83/// Check if a variable is in scope.
84fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
85    find_var_in_scope(vars, name).is_some()
86}
87
88/// Check if an expression contains a pattern predicate.
89fn contains_pattern_predicate(expr: &Expr) -> bool {
90    if matches!(
91        expr,
92        Expr::Exists {
93            from_pattern_predicate: true,
94            ..
95        }
96    ) {
97        return true;
98    }
99    let mut found = false;
100    expr.for_each_child(&mut |child| {
101        if !found {
102            found = contains_pattern_predicate(child);
103        }
104    });
105    found
106}
107
108/// Add a variable to scope with type conflict validation.
109/// Returns an error if the variable already exists with a different type.
110fn add_var_to_scope(
111    vars: &mut Vec<VariableInfo>,
112    name: &str,
113    var_type: VariableType,
114) -> Result<()> {
115    if name.is_empty() {
116        return Ok(());
117    }
118
119    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
120        if existing.var_type == VariableType::Imported {
121            // Imported vars upgrade to the concrete type
122            existing.var_type = var_type;
123        } else if var_type == VariableType::Imported || existing.var_type == var_type {
124            // New type is Imported (keep existing) or same type — no conflict
125        } else if matches!(
126            existing.var_type,
127            VariableType::Scalar | VariableType::ScalarLiteral
128        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
129        {
130            // Scalar can be used as Node/Edge in CREATE context — a scalar
131            // holding a node/edge reference is valid for pattern use
132            existing.var_type = var_type;
133        } else {
134            return Err(anyhow!(
135                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
136                name,
137                existing.var_type,
138                var_type
139            ));
140        }
141    } else {
142        vars.push(VariableInfo::new(name.to_string(), var_type));
143    }
144    Ok(())
145}
146
147/// Convert VariableInfo vec to String vec for backward compatibility
148fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
149    vars.iter().map(|v| v.name.clone()).collect()
150}
151
152fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
153    match expr {
154        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
155            .map(|info| info.var_type)
156            .unwrap_or(VariableType::Scalar),
157        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
158        // Known non-graph literals: property access is NOT valid on these.
159        Expr::Literal(CypherLiteral::Integer(_))
160        | Expr::Literal(CypherLiteral::Float(_))
161        | Expr::Literal(CypherLiteral::String(_))
162        | Expr::Literal(CypherLiteral::Bool(_))
163        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
164        Expr::FunctionCall { name, args, .. } => {
165            let lower = name.to_lowercase();
166            if lower == "coalesce" {
167                infer_coalesce_type(args, vars_in_scope)
168            } else if lower == "collect" && !args.is_empty() {
169                let collected = infer_with_output_type(&args[0], vars_in_scope);
170                if matches!(
171                    collected,
172                    VariableType::Node
173                        | VariableType::Edge
174                        | VariableType::Path
175                        | VariableType::Imported
176                ) {
177                    collected
178                } else {
179                    VariableType::Scalar
180                }
181            } else {
182                VariableType::Scalar
183            }
184        }
185        // WITH list literals/expressions produce scalar list values. Preserving
186        // entity typing here causes invalid node/edge reuse in later MATCH clauses
187        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
188        // Lists are ScalarLiteral since property access is not valid on them.
189        Expr::List(_) => VariableType::ScalarLiteral,
190        _ => VariableType::Scalar,
191    }
192}
193
194fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
195    let mut resolved: Option<VariableType> = None;
196    let mut saw_imported = false;
197    for arg in args {
198        let t = infer_with_output_type(arg, vars_in_scope);
199        match t {
200            VariableType::Node | VariableType::Edge | VariableType::Path => {
201                if let Some(existing) = resolved {
202                    if existing != t {
203                        return VariableType::Scalar;
204                    }
205                } else {
206                    resolved = Some(t);
207                }
208            }
209            VariableType::Imported => saw_imported = true,
210            VariableType::Scalar | VariableType::ScalarLiteral => {}
211        }
212    }
213    if let Some(t) = resolved {
214        t
215    } else if saw_imported {
216        VariableType::Imported
217    } else {
218        VariableType::Scalar
219    }
220}
221
222fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
223    match expr {
224        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
225            .map(|info| info.var_type)
226            .unwrap_or(VariableType::Scalar),
227        Expr::FunctionCall { name, args, .. }
228            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
229        {
230            infer_with_output_type(&args[0], vars_in_scope)
231        }
232        Expr::List(items) => {
233            let mut inferred: Option<VariableType> = None;
234            for item in items {
235                let t = infer_with_output_type(item, vars_in_scope);
236                if !matches!(
237                    t,
238                    VariableType::Node
239                        | VariableType::Edge
240                        | VariableType::Path
241                        | VariableType::Imported
242                ) {
243                    return VariableType::Scalar;
244                }
245                if let Some(existing) = inferred {
246                    if existing != t
247                        && t != VariableType::Imported
248                        && existing != VariableType::Imported
249                    {
250                        return VariableType::Scalar;
251                    }
252                    if existing == VariableType::Imported && t != VariableType::Imported {
253                        inferred = Some(t);
254                    }
255                } else {
256                    inferred = Some(t);
257                }
258            }
259            inferred.unwrap_or(VariableType::Scalar)
260        }
261        _ => VariableType::Scalar,
262    }
263}
264
265/// Collect all variable names referenced in an expression
266fn collect_expr_variables(expr: &Expr) -> Vec<String> {
267    let mut vars = Vec::new();
268    collect_expr_variables_inner(expr, &mut vars);
269    vars
270}
271
272fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
273    let mut add_var = |name: &String| {
274        if !vars.contains(name) {
275            vars.push(name.clone());
276        }
277    };
278
279    match expr {
280        Expr::Variable(name) => add_var(name),
281        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
282        Expr::BinaryOp { left, right, .. } => {
283            collect_expr_variables_inner(left, vars);
284            collect_expr_variables_inner(right, vars);
285        }
286        Expr::UnaryOp { expr: e, .. }
287        | Expr::IsNull(e)
288        | Expr::IsNotNull(e)
289        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
290        Expr::FunctionCall { args, .. } => {
291            for a in args {
292                collect_expr_variables_inner(a, vars);
293            }
294        }
295        Expr::List(items) => {
296            for item in items {
297                collect_expr_variables_inner(item, vars);
298            }
299        }
300        Expr::In { expr: e, list } => {
301            collect_expr_variables_inner(e, vars);
302            collect_expr_variables_inner(list, vars);
303        }
304        Expr::Case {
305            expr: case_expr,
306            when_then,
307            else_expr,
308        } => {
309            if let Some(e) = case_expr {
310                collect_expr_variables_inner(e, vars);
311            }
312            for (w, t) in when_then {
313                collect_expr_variables_inner(w, vars);
314                collect_expr_variables_inner(t, vars);
315            }
316            if let Some(e) = else_expr {
317                collect_expr_variables_inner(e, vars);
318            }
319        }
320        Expr::Map(entries) => {
321            for (_, v) in entries {
322                collect_expr_variables_inner(v, vars);
323            }
324        }
325        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
326        Expr::ArrayIndex { array, index } => {
327            collect_expr_variables_inner(array, vars);
328            collect_expr_variables_inner(index, vars);
329        }
330        Expr::ArraySlice { array, start, end } => {
331            collect_expr_variables_inner(array, vars);
332            if let Some(s) = start {
333                collect_expr_variables_inner(s, vars);
334            }
335            if let Some(e) = end {
336                collect_expr_variables_inner(e, vars);
337            }
338        }
339        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
340        // they introduce local variable bindings not in outer scope.
341        _ => {}
342    }
343}
344
345/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
346///
347/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
348/// before the final RETURN projection without losing alias semantics.
349fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
350    let repr = expr.to_string_repr();
351    if let Some(rewritten) = aliases.get(&repr) {
352        return rewritten.clone();
353    }
354
355    match expr {
356        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
357        Expr::Property(base, prop) => Expr::Property(
358            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
359            prop.clone(),
360        ),
361        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
362            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
363            op: *op,
364            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
365        },
366        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
367            op: *op,
368            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
369        },
370        Expr::FunctionCall {
371            name,
372            args,
373            distinct,
374            window_spec,
375        } => Expr::FunctionCall {
376            name: name.clone(),
377            args: args
378                .iter()
379                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
380                .collect(),
381            distinct: *distinct,
382            window_spec: window_spec.clone(),
383        },
384        Expr::List(items) => Expr::List(
385            items
386                .iter()
387                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
388                .collect(),
389        ),
390        Expr::Map(entries) => Expr::Map(
391            entries
392                .iter()
393                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
394                .collect(),
395        ),
396        Expr::Case {
397            expr: case_expr,
398            when_then,
399            else_expr,
400        } => Expr::Case {
401            expr: case_expr
402                .as_ref()
403                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
404            when_then: when_then
405                .iter()
406                .map(|(w, t)| {
407                    (
408                        rewrite_order_by_expr_with_aliases(w, aliases),
409                        rewrite_order_by_expr_with_aliases(t, aliases),
410                    )
411                })
412                .collect(),
413            else_expr: else_expr
414                .as_ref()
415                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
416        },
417        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
418        // they introduce local variable bindings that could shadow aliases.
419        _ => expr.clone(),
420    }
421}
422
423/// Validate function call argument types.
424/// Returns error if type constraints are violated.
425fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
426    let name_lower = name.to_lowercase();
427
428    // labels() requires Node
429    if name_lower == "labels"
430        && let Some(Expr::Variable(var_name)) = args.first()
431        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
432        && !info.var_type.is_compatible_with(VariableType::Node)
433    {
434        return Err(anyhow!(
435            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
436        ));
437    }
438
439    // type() requires Edge
440    if name_lower == "type"
441        && let Some(Expr::Variable(var_name)) = args.first()
442        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
443        && !info.var_type.is_compatible_with(VariableType::Edge)
444    {
445        return Err(anyhow!(
446            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
447        ));
448    }
449
450    // properties() requires Node/Edge/Map (not scalar literals)
451    if name_lower == "properties"
452        && let Some(arg) = args.first()
453    {
454        match arg {
455            Expr::Literal(CypherLiteral::Integer(_))
456            | Expr::Literal(CypherLiteral::Float(_))
457            | Expr::Literal(CypherLiteral::String(_))
458            | Expr::Literal(CypherLiteral::Bool(_))
459            | Expr::List(_) => {
460                return Err(anyhow!(
461                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
462                ));
463            }
464            Expr::Variable(var_name) => {
465                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
466                    && matches!(
467                        info.var_type,
468                        VariableType::Scalar | VariableType::ScalarLiteral
469                    )
470                {
471                    return Err(anyhow!(
472                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
473                    ));
474                }
475            }
476            _ => {}
477        }
478    }
479
480    // nodes()/relationships() require Path
481    if (name_lower == "nodes" || name_lower == "relationships")
482        && let Some(Expr::Variable(var_name)) = args.first()
483        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
484        && !info.var_type.is_compatible_with(VariableType::Path)
485    {
486        return Err(anyhow!(
487            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
488            name_lower
489        ));
490    }
491
492    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
493    if name_lower == "size"
494        && let Some(Expr::Variable(var_name)) = args.first()
495        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
496        && info.var_type == VariableType::Path
497    {
498        return Err(anyhow!(
499            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
500        ));
501    }
502
503    // length()/size() do NOT accept Node or single-Edge arguments.
504    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
505    // but are actually edge lists — size()/length() is valid on those.
506    if (name_lower == "length" || name_lower == "size")
507        && let Some(Expr::Variable(var_name)) = args.first()
508        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
509        && (info.var_type == VariableType::Node
510            || (info.var_type == VariableType::Edge && !info.is_vlp))
511    {
512        return Err(anyhow!(
513            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
514            name_lower
515        ));
516    }
517
518    Ok(())
519}
520
521/// Check if an expression is a non-boolean literal.
522fn is_non_boolean_literal(expr: &Expr) -> bool {
523    matches!(
524        expr,
525        Expr::Literal(CypherLiteral::Integer(_))
526            | Expr::Literal(CypherLiteral::Float(_))
527            | Expr::Literal(CypherLiteral::String(_))
528            | Expr::List(_)
529            | Expr::Map(_)
530    )
531}
532
533/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
534fn validate_boolean_expression(expr: &Expr) -> Result<()> {
535    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
536    if let Expr::BinaryOp { left, op, right } = expr
537        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
538    {
539        let op_name = format!("{:?}", op).to_uppercase();
540        for operand in [left.as_ref(), right.as_ref()] {
541            if is_non_boolean_literal(operand) {
542                return Err(anyhow!(
543                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
544                    op_name
545                ));
546            }
547        }
548    }
549    if let Expr::UnaryOp {
550        op: uni_cypher::ast::UnaryOp::Not,
551        expr: inner,
552    } = expr
553        && is_non_boolean_literal(inner)
554    {
555        return Err(anyhow!(
556            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
557        ));
558    }
559    let mut result = Ok(());
560    expr.for_each_child(&mut |child| {
561        if result.is_ok() {
562            result = validate_boolean_expression(child);
563        }
564    });
565    result
566}
567
568/// Validate that all variables used in an expression are in scope.
569fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
570    let used_vars = collect_expr_variables(expr);
571    for var in used_vars {
572        if !is_var_in_scope(vars_in_scope, &var) {
573            return Err(anyhow!(
574                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
575                var
576            ));
577        }
578    }
579    Ok(())
580}
581
582/// Check if a function name (lowercase) is an aggregate function.
583fn is_aggregate_function_name(name: &str) -> bool {
584    matches!(
585        name.to_lowercase().as_str(),
586        "count"
587            | "sum"
588            | "avg"
589            | "min"
590            | "max"
591            | "collect"
592            | "stdev"
593            | "stdevp"
594            | "percentiledisc"
595            | "percentilecont"
596    )
597}
598
599/// Returns true if the expression is a window function (FunctionCall with window_spec).
600fn is_window_function(expr: &Expr) -> bool {
601    matches!(
602        expr,
603        Expr::FunctionCall {
604            window_spec: Some(_),
605            ..
606        }
607    )
608}
609
610/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
611/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
612/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
613/// size() call, BinaryOp, etc.).
614fn is_compound_aggregate(expr: &Expr) -> bool {
615    if !expr.is_aggregate() {
616        return false;
617    }
618    match expr {
619        Expr::FunctionCall {
620            name, window_spec, ..
621        } => {
622            // A bare aggregate FunctionCall is NOT compound
623            if window_spec.is_some() {
624                return true; // window wrapping an aggregate — treat as compound
625            }
626            !is_aggregate_function_name(name)
627        }
628        // Subquery aggregates are "bare" (not compound)
629        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
630        // Everything else (ListComprehension, BinaryOp, etc.) is compound
631        _ => true,
632    }
633}
634
635/// Recursively collect all bare aggregate FunctionCall sub-expressions from
636/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
637/// the outermost aggregate boundaries).
638///
639/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
640/// is searched because the body (`map_expr`, `predicate`, `expr`) references
641/// the loop variable, not outer-scope aggregates.
642fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
643    let mut out = Vec::new();
644    extract_inner_aggregates_rec(expr, &mut out);
645    out
646}
647
648fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
649    match expr {
650        Expr::FunctionCall {
651            name, window_spec, ..
652        } if window_spec.is_none() && is_aggregate_function_name(name) => {
653            // Found a bare aggregate — collect it and stop recursing
654            out.push(expr.clone());
655        }
656        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
657            out.push(expr.clone());
658        }
659        // For list comprehension, only search the `list` source for aggregates
660        Expr::ListComprehension { list, .. } => {
661            extract_inner_aggregates_rec(list, out);
662        }
663        // For quantifier, only search the `list` source
664        Expr::Quantifier { list, .. } => {
665            extract_inner_aggregates_rec(list, out);
666        }
667        // For reduce, search `init` and `list` (not the body `expr`)
668        Expr::Reduce { init, list, .. } => {
669            extract_inner_aggregates_rec(init, out);
670            extract_inner_aggregates_rec(list, out);
671        }
672        // Standard recursive cases
673        Expr::FunctionCall { args, .. } => {
674            for arg in args {
675                extract_inner_aggregates_rec(arg, out);
676            }
677        }
678        Expr::BinaryOp { left, right, .. } => {
679            extract_inner_aggregates_rec(left, out);
680            extract_inner_aggregates_rec(right, out);
681        }
682        Expr::UnaryOp { expr: e, .. }
683        | Expr::IsNull(e)
684        | Expr::IsNotNull(e)
685        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
686        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
687        Expr::List(items) => {
688            for item in items {
689                extract_inner_aggregates_rec(item, out);
690            }
691        }
692        Expr::Case {
693            expr: case_expr,
694            when_then,
695            else_expr,
696        } => {
697            if let Some(e) = case_expr {
698                extract_inner_aggregates_rec(e, out);
699            }
700            for (w, t) in when_then {
701                extract_inner_aggregates_rec(w, out);
702                extract_inner_aggregates_rec(t, out);
703            }
704            if let Some(e) = else_expr {
705                extract_inner_aggregates_rec(e, out);
706            }
707        }
708        Expr::In {
709            expr: in_expr,
710            list,
711        } => {
712            extract_inner_aggregates_rec(in_expr, out);
713            extract_inner_aggregates_rec(list, out);
714        }
715        Expr::ArrayIndex { array, index } => {
716            extract_inner_aggregates_rec(array, out);
717            extract_inner_aggregates_rec(index, out);
718        }
719        Expr::ArraySlice { array, start, end } => {
720            extract_inner_aggregates_rec(array, out);
721            if let Some(s) = start {
722                extract_inner_aggregates_rec(s, out);
723            }
724            if let Some(e) = end {
725                extract_inner_aggregates_rec(e, out);
726            }
727        }
728        Expr::Map(entries) => {
729            for (_, v) in entries {
730                extract_inner_aggregates_rec(v, out);
731            }
732        }
733        _ => {}
734    }
735}
736
737/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
738/// `Expr::Variable(aggregate_column_name(agg))`.
739///
740/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
741/// rewritten (the body references the loop variable, not outer-scope columns).
742fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
743    match expr {
744        Expr::FunctionCall {
745            name, window_spec, ..
746        } if window_spec.is_none() && is_aggregate_function_name(name) => {
747            // Replace bare aggregate with column reference
748            Expr::Variable(aggregate_column_name(expr))
749        }
750        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
751            Expr::Variable(aggregate_column_name(expr))
752        }
753        Expr::ListComprehension {
754            variable,
755            list,
756            where_clause,
757            map_expr,
758        } => Expr::ListComprehension {
759            variable: variable.clone(),
760            list: Box::new(replace_aggregates_with_columns(list)),
761            where_clause: where_clause.clone(), // don't touch — references loop var
762            map_expr: map_expr.clone(),         // don't touch — references loop var
763        },
764        Expr::Quantifier {
765            quantifier,
766            variable,
767            list,
768            predicate,
769        } => Expr::Quantifier {
770            quantifier: *quantifier,
771            variable: variable.clone(),
772            list: Box::new(replace_aggregates_with_columns(list)),
773            predicate: predicate.clone(), // don't touch — references loop var
774        },
775        Expr::Reduce {
776            accumulator,
777            init,
778            variable,
779            list,
780            expr: body,
781        } => Expr::Reduce {
782            accumulator: accumulator.clone(),
783            init: Box::new(replace_aggregates_with_columns(init)),
784            variable: variable.clone(),
785            list: Box::new(replace_aggregates_with_columns(list)),
786            expr: body.clone(), // don't touch — references loop var
787        },
788        Expr::FunctionCall {
789            name,
790            args,
791            distinct,
792            window_spec,
793        } => Expr::FunctionCall {
794            name: name.clone(),
795            args: args.iter().map(replace_aggregates_with_columns).collect(),
796            distinct: *distinct,
797            window_spec: window_spec.clone(),
798        },
799        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
800            left: Box::new(replace_aggregates_with_columns(left)),
801            op: *op,
802            right: Box::new(replace_aggregates_with_columns(right)),
803        },
804        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
805            op: *op,
806            expr: Box::new(replace_aggregates_with_columns(e)),
807        },
808        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
809        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
810        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
811        Expr::Property(base, prop) => Expr::Property(
812            Box::new(replace_aggregates_with_columns(base)),
813            prop.clone(),
814        ),
815        Expr::List(items) => {
816            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
817        }
818        Expr::Case {
819            expr: case_expr,
820            when_then,
821            else_expr,
822        } => Expr::Case {
823            expr: case_expr
824                .as_ref()
825                .map(|e| Box::new(replace_aggregates_with_columns(e))),
826            when_then: when_then
827                .iter()
828                .map(|(w, t)| {
829                    (
830                        replace_aggregates_with_columns(w),
831                        replace_aggregates_with_columns(t),
832                    )
833                })
834                .collect(),
835            else_expr: else_expr
836                .as_ref()
837                .map(|e| Box::new(replace_aggregates_with_columns(e))),
838        },
839        Expr::In {
840            expr: in_expr,
841            list,
842        } => Expr::In {
843            expr: Box::new(replace_aggregates_with_columns(in_expr)),
844            list: Box::new(replace_aggregates_with_columns(list)),
845        },
846        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
847            array: Box::new(replace_aggregates_with_columns(array)),
848            index: Box::new(replace_aggregates_with_columns(index)),
849        },
850        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
851            array: Box::new(replace_aggregates_with_columns(array)),
852            start: start
853                .as_ref()
854                .map(|e| Box::new(replace_aggregates_with_columns(e))),
855            end: end
856                .as_ref()
857                .map(|e| Box::new(replace_aggregates_with_columns(e))),
858        },
859        Expr::Map(entries) => Expr::Map(
860            entries
861                .iter()
862                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
863                .collect(),
864        ),
865        // Leaf expressions — return as-is
866        other => other.clone(),
867    }
868}
869
870/// Check if an expression contains any aggregate function (recursively).
871fn contains_aggregate_recursive(expr: &Expr) -> bool {
872    match expr {
873        Expr::FunctionCall { name, args, .. } => {
874            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
875        }
876        Expr::BinaryOp { left, right, .. } => {
877            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
878        }
879        Expr::UnaryOp { expr: e, .. }
880        | Expr::IsNull(e)
881        | Expr::IsNotNull(e)
882        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
883        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
884        Expr::Case {
885            expr,
886            when_then,
887            else_expr,
888        } => {
889            expr.as_deref().is_some_and(contains_aggregate_recursive)
890                || when_then.iter().any(|(w, t)| {
891                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
892                })
893                || else_expr
894                    .as_deref()
895                    .is_some_and(contains_aggregate_recursive)
896        }
897        Expr::In { expr, list } => {
898            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
899        }
900        Expr::Property(base, _) => contains_aggregate_recursive(base),
901        Expr::ListComprehension { list, .. } => {
902            // Only check the list source — where_clause/map_expr reference the loop variable
903            contains_aggregate_recursive(list)
904        }
905        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
906        Expr::Reduce { init, list, .. } => {
907            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
908        }
909        Expr::ArrayIndex { array, index } => {
910            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
911        }
912        Expr::ArraySlice { array, start, end } => {
913            contains_aggregate_recursive(array)
914                || start.as_deref().is_some_and(contains_aggregate_recursive)
915                || end.as_deref().is_some_and(contains_aggregate_recursive)
916        }
917        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
918        _ => false,
919    }
920}
921
922/// Check if an expression contains a non-deterministic function (e.g. rand()).
923fn contains_non_deterministic(expr: &Expr) -> bool {
924    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
925        return true;
926    }
927    let mut found = false;
928    expr.for_each_child(&mut |child| {
929        if !found {
930            found = contains_non_deterministic(child);
931        }
932    });
933    found
934}
935
936fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
937    match expr {
938        Expr::FunctionCall { name, args, .. } => {
939            if is_aggregate_function_name(name) {
940                out.insert(expr.to_string_repr());
941                return;
942            }
943            for arg in args {
944                collect_aggregate_reprs(arg, out);
945            }
946        }
947        Expr::BinaryOp { left, right, .. } => {
948            collect_aggregate_reprs(left, out);
949            collect_aggregate_reprs(right, out);
950        }
951        Expr::UnaryOp { expr, .. }
952        | Expr::IsNull(expr)
953        | Expr::IsNotNull(expr)
954        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
955        Expr::List(items) => {
956            for item in items {
957                collect_aggregate_reprs(item, out);
958            }
959        }
960        Expr::Case {
961            expr,
962            when_then,
963            else_expr,
964        } => {
965            if let Some(e) = expr {
966                collect_aggregate_reprs(e, out);
967            }
968            for (w, t) in when_then {
969                collect_aggregate_reprs(w, out);
970                collect_aggregate_reprs(t, out);
971            }
972            if let Some(e) = else_expr {
973                collect_aggregate_reprs(e, out);
974            }
975        }
976        Expr::In { expr, list } => {
977            collect_aggregate_reprs(expr, out);
978            collect_aggregate_reprs(list, out);
979        }
980        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
981        Expr::ListComprehension { list, .. } => {
982            collect_aggregate_reprs(list, out);
983        }
984        Expr::Quantifier { list, .. } => {
985            collect_aggregate_reprs(list, out);
986        }
987        Expr::Reduce { init, list, .. } => {
988            collect_aggregate_reprs(init, out);
989            collect_aggregate_reprs(list, out);
990        }
991        Expr::ArrayIndex { array, index } => {
992            collect_aggregate_reprs(array, out);
993            collect_aggregate_reprs(index, out);
994        }
995        Expr::ArraySlice { array, start, end } => {
996            collect_aggregate_reprs(array, out);
997            if let Some(s) = start {
998                collect_aggregate_reprs(s, out);
999            }
1000            if let Some(e) = end {
1001                collect_aggregate_reprs(e, out);
1002            }
1003        }
1004        _ => {}
1005    }
1006}
1007
1008#[derive(Debug, Clone)]
1009enum NonAggregateRef {
1010    Var(String),
1011    Property {
1012        repr: String,
1013        base_var: Option<String>,
1014    },
1015}
1016
1017fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1018    match expr {
1019        Expr::FunctionCall { name, args, .. } => {
1020            if is_aggregate_function_name(name) {
1021                return;
1022            }
1023            for arg in args {
1024                collect_non_aggregate_refs(arg, inside_agg, out);
1025            }
1026        }
1027        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1028        Expr::Property(base, _) if !inside_agg => {
1029            let base_var = if let Expr::Variable(v) = base.as_ref() {
1030                Some(v.clone())
1031            } else {
1032                None
1033            };
1034            out.push(NonAggregateRef::Property {
1035                repr: expr.to_string_repr(),
1036                base_var,
1037            });
1038        }
1039        Expr::BinaryOp { left, right, .. } => {
1040            collect_non_aggregate_refs(left, inside_agg, out);
1041            collect_non_aggregate_refs(right, inside_agg, out);
1042        }
1043        Expr::UnaryOp { expr, .. }
1044        | Expr::IsNull(expr)
1045        | Expr::IsNotNull(expr)
1046        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1047        Expr::List(items) => {
1048            for item in items {
1049                collect_non_aggregate_refs(item, inside_agg, out);
1050            }
1051        }
1052        Expr::Case {
1053            expr,
1054            when_then,
1055            else_expr,
1056        } => {
1057            if let Some(e) = expr {
1058                collect_non_aggregate_refs(e, inside_agg, out);
1059            }
1060            for (w, t) in when_then {
1061                collect_non_aggregate_refs(w, inside_agg, out);
1062                collect_non_aggregate_refs(t, inside_agg, out);
1063            }
1064            if let Some(e) = else_expr {
1065                collect_non_aggregate_refs(e, inside_agg, out);
1066            }
1067        }
1068        Expr::In { expr, list } => {
1069            collect_non_aggregate_refs(expr, inside_agg, out);
1070            collect_non_aggregate_refs(list, inside_agg, out);
1071        }
1072        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1073        // source. The body references the loop variable, not outer-scope vars.
1074        Expr::ListComprehension { list, .. } => {
1075            collect_non_aggregate_refs(list, inside_agg, out);
1076        }
1077        Expr::Quantifier { list, .. } => {
1078            collect_non_aggregate_refs(list, inside_agg, out);
1079        }
1080        Expr::Reduce { init, list, .. } => {
1081            collect_non_aggregate_refs(init, inside_agg, out);
1082            collect_non_aggregate_refs(list, inside_agg, out);
1083        }
1084        _ => {}
1085    }
1086}
1087
1088fn validate_with_order_by_aggregate_item(
1089    expr: &Expr,
1090    projected_aggregate_reprs: &HashSet<String>,
1091    projected_simple_reprs: &HashSet<String>,
1092    projected_aliases: &HashSet<String>,
1093) -> Result<()> {
1094    let mut aggregate_reprs = HashSet::new();
1095    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1096    for agg in aggregate_reprs {
1097        if !projected_aggregate_reprs.contains(&agg) {
1098            return Err(anyhow!(
1099                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1100                agg
1101            ));
1102        }
1103    }
1104
1105    let mut refs = Vec::new();
1106    collect_non_aggregate_refs(expr, false, &mut refs);
1107    refs.retain(|r| match r {
1108        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1109        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1110    });
1111
1112    let mut dedup = HashSet::new();
1113    refs.retain(|r| {
1114        let key = match r {
1115            NonAggregateRef::Var(v) => format!("v:{v}"),
1116            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1117        };
1118        dedup.insert(key)
1119    });
1120
1121    if refs.len() > 1 {
1122        return Err(anyhow!(
1123            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1124        ));
1125    }
1126
1127    if let Some(r) = refs.first() {
1128        return match r {
1129            NonAggregateRef::Var(v) => Err(anyhow!(
1130                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1131                v
1132            )),
1133            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1134                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1135                base_var
1136                    .clone()
1137                    .unwrap_or_else(|| "<property-base>".to_string())
1138            )),
1139        };
1140    }
1141
1142    Ok(())
1143}
1144
1145/// Validate that no aggregation functions appear in WHERE clause.
1146fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1147    if contains_aggregate_recursive(predicate) {
1148        return Err(anyhow!(
1149            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1150        ));
1151    }
1152    Ok(())
1153}
1154
1155#[derive(Debug, Clone, Copy)]
1156enum ConstNumber {
1157    Int(i64),
1158    Float(f64),
1159}
1160
1161impl ConstNumber {
1162    fn to_f64(self) -> f64 {
1163        match self {
1164            Self::Int(v) => v as f64,
1165            Self::Float(v) => v,
1166        }
1167    }
1168}
1169
1170fn eval_const_numeric_expr(
1171    expr: &Expr,
1172    params: &HashMap<String, uni_common::Value>,
1173) -> Result<ConstNumber> {
1174    match expr {
1175        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1176        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1177        Expr::Parameter(name) => match params.get(name) {
1178            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1179            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1180            Some(uni_common::Value::Null) => Err(anyhow!(
1181                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1182                name
1183            )),
1184            Some(other) => Err(anyhow!(
1185                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1186                name,
1187                other
1188            )),
1189            None => Err(anyhow!(
1190                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1191            )),
1192        },
1193        Expr::UnaryOp {
1194            op: uni_cypher::ast::UnaryOp::Neg,
1195            expr,
1196        } => match eval_const_numeric_expr(expr, params)? {
1197            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1198            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1199        },
1200        Expr::BinaryOp { left, op, right } => {
1201            let l = eval_const_numeric_expr(left, params)?;
1202            let r = eval_const_numeric_expr(right, params)?;
1203            match op {
1204                BinaryOp::Add => match (l, r) {
1205                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1206                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1207                },
1208                BinaryOp::Sub => match (l, r) {
1209                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1210                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1211                },
1212                BinaryOp::Mul => match (l, r) {
1213                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1214                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1215                },
1216                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1217                BinaryOp::Mod => match (l, r) {
1218                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1219                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1220                },
1221                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1222                _ => Err(anyhow!(
1223                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1224                )),
1225            }
1226        }
1227        Expr::FunctionCall { name, args, .. } => {
1228            let lower = name.to_lowercase();
1229            match lower.as_str() {
1230                "rand" if args.is_empty() => {
1231                    use rand::Rng;
1232                    let mut rng = rand::thread_rng();
1233                    Ok(ConstNumber::Float(rng.r#gen::<f64>()))
1234                }
1235                "tointeger" | "toint" if args.len() == 1 => {
1236                    match eval_const_numeric_expr(&args[0], params)? {
1237                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1238                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1239                    }
1240                }
1241                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1242                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1243                )),
1244                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1245                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1246                )),
1247                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1248                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1249                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1250                },
1251                _ => Err(anyhow!(
1252                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1253                )),
1254            }
1255        }
1256        _ => Err(anyhow!(
1257            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1258        )),
1259    }
1260}
1261
1262/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1263/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1264fn parse_non_negative_integer(
1265    expr: &Expr,
1266    clause_name: &str,
1267    params: &HashMap<String, uni_common::Value>,
1268) -> Result<Option<usize>> {
1269    let referenced_vars = collect_expr_variables(expr);
1270    if !referenced_vars.is_empty() {
1271        return Err(anyhow!(
1272            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1273            clause_name
1274        ));
1275    }
1276
1277    let value = eval_const_numeric_expr(expr, params)?;
1278    let as_int = match value {
1279        ConstNumber::Int(v) => v,
1280        ConstNumber::Float(v) => {
1281            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1282                return Err(anyhow!(
1283                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1284                    clause_name
1285                ));
1286            }
1287            v as i64
1288        }
1289    };
1290    if as_int < 0 {
1291        return Err(anyhow!(
1292            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1293            clause_name
1294        ));
1295    }
1296    Ok(Some(as_int as usize))
1297}
1298
1299/// Validate that aggregation functions are not nested.
1300fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1301    if let Expr::FunctionCall { name, args, .. } = expr
1302        && is_aggregate_function_name(name)
1303    {
1304        for arg in args {
1305            if contains_aggregate_recursive(arg) {
1306                return Err(anyhow!(
1307                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1308                ));
1309            }
1310            if contains_non_deterministic(arg) {
1311                return Err(anyhow!(
1312                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1313                ));
1314            }
1315        }
1316    }
1317    let mut result = Ok(());
1318    expr.for_each_child(&mut |child| {
1319        if result.is_ok() {
1320            result = validate_no_nested_aggregation(child);
1321        }
1322    });
1323    result
1324}
1325
1326/// Validate that an expression does not access properties or labels of
1327/// deleted entities. `type(r)` on a deleted relationship is allowed per
1328/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1329fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1330    // Check n.prop on a deleted variable
1331    if let Expr::Property(inner, _) = expr
1332        && let Expr::Variable(name) = inner.as_ref()
1333        && deleted_vars.contains(name)
1334    {
1335        return Err(anyhow!(
1336            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1337            name
1338        ));
1339    }
1340    // Check labels(n) or keys(n) on a deleted variable
1341    if let Expr::FunctionCall { name, args, .. } = expr
1342        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1343        && args.len() == 1
1344        && let Expr::Variable(var) = &args[0]
1345        && deleted_vars.contains(var)
1346    {
1347        return Err(anyhow!(
1348            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1349            name.to_lowercase(),
1350            var
1351        ));
1352    }
1353    let mut result = Ok(());
1354    expr.for_each_child(&mut |child| {
1355        if result.is_ok() {
1356            result = validate_no_deleted_entity_access(child, deleted_vars);
1357        }
1358    });
1359    result
1360}
1361
1362/// Validate that all variables referenced in properties are defined,
1363/// either in scope or in the local CREATE variable list.
1364fn validate_property_variables(
1365    properties: &Option<Expr>,
1366    vars_in_scope: &[VariableInfo],
1367    create_vars: &[&str],
1368) -> Result<()> {
1369    if let Some(props) = properties {
1370        for var in collect_expr_variables(props) {
1371            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1372                return Err(anyhow!(
1373                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1374                    var
1375                ));
1376            }
1377        }
1378    }
1379    Ok(())
1380}
1381
1382/// Check that a variable name is not already bound in scope or in the local CREATE list.
1383/// Used to prevent rebinding in CREATE clauses.
1384fn check_not_already_bound(
1385    name: &str,
1386    vars_in_scope: &[VariableInfo],
1387    create_vars: &[&str],
1388) -> Result<()> {
1389    if is_var_in_scope(vars_in_scope, name) {
1390        return Err(anyhow!(
1391            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1392            name
1393        ));
1394    }
1395    if create_vars.contains(&name) {
1396        return Err(anyhow!(
1397            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1398            name
1399        ));
1400    }
1401    Ok(())
1402}
1403
1404fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1405    let mut scope = vars_in_scope.to_vec();
1406
1407    for path in &pattern.paths {
1408        if let Some(path_var) = &path.variable
1409            && !path_var.is_empty()
1410            && !is_var_in_scope(&scope, path_var)
1411        {
1412            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1413        }
1414        for element in &path.elements {
1415            match element {
1416                PatternElement::Node(n) => {
1417                    if let Some(v) = &n.variable
1418                        && !v.is_empty()
1419                        && !is_var_in_scope(&scope, v)
1420                    {
1421                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1422                    }
1423                }
1424                PatternElement::Relationship(r) => {
1425                    if let Some(v) = &r.variable
1426                        && !v.is_empty()
1427                        && !is_var_in_scope(&scope, v)
1428                    {
1429                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1430                    }
1431                }
1432                PatternElement::Parenthesized { .. } => {}
1433            }
1434        }
1435    }
1436
1437    scope
1438}
1439
1440fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1441    match item {
1442        SetItem::Property { expr, value } => {
1443            validate_expression_variables(expr, vars_in_scope)?;
1444            validate_expression(expr, vars_in_scope)?;
1445            validate_expression_variables(value, vars_in_scope)?;
1446            validate_expression(value, vars_in_scope)?;
1447            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1448                return Err(anyhow!(
1449                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1450                ));
1451            }
1452        }
1453        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1454            if !is_var_in_scope(vars_in_scope, variable) {
1455                return Err(anyhow!(
1456                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1457                    variable
1458                ));
1459            }
1460            validate_expression_variables(value, vars_in_scope)?;
1461            validate_expression(value, vars_in_scope)?;
1462            if contains_pattern_predicate(value) {
1463                return Err(anyhow!(
1464                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1465                ));
1466            }
1467        }
1468        SetItem::Labels { variable, .. } => {
1469            if !is_var_in_scope(vars_in_scope, variable) {
1470                return Err(anyhow!(
1471                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1472                    variable
1473                ));
1474            }
1475        }
1476    }
1477
1478    Ok(())
1479}
1480
1481/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1482/// The OpenCypher spec requires all property values in MERGE to be non-null.
1483fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1484    if let Some(Expr::Map(entries)) = properties {
1485        for (key, value) in entries {
1486            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1487                return Err(anyhow!(
1488                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1489                    key
1490                ));
1491            }
1492        }
1493    }
1494    Ok(())
1495}
1496
1497fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1498    for path in &merge_clause.pattern.paths {
1499        for element in &path.elements {
1500            match element {
1501                PatternElement::Node(n) => {
1502                    if let Some(Expr::Parameter(_)) = &n.properties {
1503                        return Err(anyhow!(
1504                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1505                        ));
1506                    }
1507                    reject_null_merge_properties(&n.properties)?;
1508                    // VariableAlreadyBound: reject if a bound variable is used
1509                    // as a standalone MERGE node or introduces new labels/properties.
1510                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1511                    if let Some(variable) = &n.variable
1512                        && !variable.is_empty()
1513                        && is_var_in_scope(vars_in_scope, variable)
1514                    {
1515                        let is_standalone = path.elements.len() == 1;
1516                        let has_new_labels = !n.labels.is_empty();
1517                        let has_new_properties = n.properties.is_some();
1518                        if is_standalone || has_new_labels || has_new_properties {
1519                            return Err(anyhow!(
1520                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1521                                variable
1522                            ));
1523                        }
1524                    }
1525                }
1526                PatternElement::Relationship(r) => {
1527                    if let Some(variable) = &r.variable
1528                        && !variable.is_empty()
1529                        && is_var_in_scope(vars_in_scope, variable)
1530                    {
1531                        return Err(anyhow!(
1532                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1533                            variable
1534                        ));
1535                    }
1536                    if r.types.len() != 1 {
1537                        return Err(anyhow!(
1538                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1539                        ));
1540                    }
1541                    if r.range.is_some() {
1542                        return Err(anyhow!(
1543                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1544                        ));
1545                    }
1546                    if let Some(Expr::Parameter(_)) = &r.properties {
1547                        return Err(anyhow!(
1548                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1549                        ));
1550                    }
1551                    reject_null_merge_properties(&r.properties)?;
1552                }
1553                PatternElement::Parenthesized { .. } => {}
1554            }
1555        }
1556    }
1557
1558    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1559    for item in &merge_clause.on_create {
1560        validate_merge_set_item(item, &merge_scope)?;
1561    }
1562    for item in &merge_clause.on_match {
1563        validate_merge_set_item(item, &merge_scope)?;
1564    }
1565
1566    Ok(())
1567}
1568
1569/// Recursively validate an expression for type errors, undefined variables, etc.
1570fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1571    // Validate boolean operators and nested aggregation first
1572    validate_boolean_expression(expr)?;
1573    validate_no_nested_aggregation(expr)?;
1574
1575    // Helper to validate multiple expressions
1576    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1577        for e in exprs {
1578            validate_expression(e, vars)?;
1579        }
1580        Ok(())
1581    }
1582
1583    match expr {
1584        Expr::FunctionCall { name, args, .. } => {
1585            validate_function_call(name, args, vars_in_scope)?;
1586            validate_all(args, vars_in_scope)
1587        }
1588        Expr::BinaryOp { left, right, .. } => {
1589            validate_expression(left, vars_in_scope)?;
1590            validate_expression(right, vars_in_scope)
1591        }
1592        Expr::UnaryOp { expr: e, .. }
1593        | Expr::IsNull(e)
1594        | Expr::IsNotNull(e)
1595        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1596        Expr::Property(base, prop) => {
1597            if let Expr::Variable(var_name) = base.as_ref()
1598                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1599            {
1600                // Paths don't have properties
1601                if var_info.var_type == VariableType::Path {
1602                    return Err(anyhow!(
1603                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1604                        var_name,
1605                        prop
1606                    ));
1607                }
1608                // Known non-graph literals (int, float, bool, string, list) don't have properties
1609                if var_info.var_type == VariableType::ScalarLiteral {
1610                    return Err(anyhow!(
1611                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1612                    ));
1613                }
1614            }
1615            validate_expression(base, vars_in_scope)
1616        }
1617        Expr::List(items) => validate_all(items, vars_in_scope),
1618        Expr::Case {
1619            expr: case_expr,
1620            when_then,
1621            else_expr,
1622        } => {
1623            if let Some(e) = case_expr {
1624                validate_expression(e, vars_in_scope)?;
1625            }
1626            for (w, t) in when_then {
1627                validate_expression(w, vars_in_scope)?;
1628                validate_expression(t, vars_in_scope)?;
1629            }
1630            if let Some(e) = else_expr {
1631                validate_expression(e, vars_in_scope)?;
1632            }
1633            Ok(())
1634        }
1635        Expr::In { expr: e, list } => {
1636            validate_expression(e, vars_in_scope)?;
1637            validate_expression(list, vars_in_scope)
1638        }
1639        Expr::Exists {
1640            query,
1641            from_pattern_predicate: true,
1642        } => {
1643            // Pattern predicates cannot introduce new named variables.
1644            // Extract named vars from inner MATCH pattern, check each is in scope.
1645            if let Query::Single(stmt) = query.as_ref() {
1646                for clause in &stmt.clauses {
1647                    if let Clause::Match(m) = clause {
1648                        for path in &m.pattern.paths {
1649                            for elem in &path.elements {
1650                                match elem {
1651                                    PatternElement::Node(n) => {
1652                                        if let Some(var) = &n.variable
1653                                            && !is_var_in_scope(vars_in_scope, var)
1654                                        {
1655                                            return Err(anyhow!(
1656                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1657                                                var
1658                                            ));
1659                                        }
1660                                    }
1661                                    PatternElement::Relationship(r) => {
1662                                        if let Some(var) = &r.variable
1663                                            && !is_var_in_scope(vars_in_scope, var)
1664                                        {
1665                                            return Err(anyhow!(
1666                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1667                                                var
1668                                            ));
1669                                        }
1670                                    }
1671                                    _ => {}
1672                                }
1673                            }
1674                        }
1675                    }
1676                }
1677            }
1678            Ok(())
1679        }
1680        _ => Ok(()),
1681    }
1682}
1683
1684/// One step (hop) in a Quantified Path Pattern sub-pattern.
1685/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1686#[derive(Debug, Clone)]
1687pub struct QppStepInfo {
1688    pub edge_type_ids: Vec<u32>,
1689    pub direction: Direction,
1690    pub target_label: Option<String>,
1691}
1692
1693#[derive(Debug, Clone)]
1694pub enum LogicalPlan {
1695    Union {
1696        left: Box<LogicalPlan>,
1697        right: Box<LogicalPlan>,
1698        all: bool,
1699    },
1700    Scan {
1701        label_id: u16,
1702        labels: Vec<String>,
1703        variable: String,
1704        filter: Option<Expr>,
1705        optional: bool,
1706    },
1707    /// Lookup vertices by ext_id using the main vertices table.
1708    /// Used when a query references ext_id without specifying a label.
1709    ExtIdLookup {
1710        variable: String,
1711        ext_id: String,
1712        filter: Option<Expr>,
1713        optional: bool,
1714    },
1715    /// Scan all vertices from main table (MATCH (n) without label).
1716    /// Used for schemaless queries that don't specify any label.
1717    ScanAll {
1718        variable: String,
1719        filter: Option<Expr>,
1720        optional: bool,
1721    },
1722    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1723    /// Used for labels not defined in schema (schemaless support).
1724    /// Scan main vertices table by label name(s) for schemaless support.
1725    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1726    ScanMainByLabels {
1727        labels: Vec<String>,
1728        variable: String,
1729        filter: Option<Expr>,
1730        optional: bool,
1731    },
1732    Empty, // Produces 1 empty row
1733    Unwind {
1734        input: Box<LogicalPlan>,
1735        expr: Expr,
1736        variable: String,
1737    },
1738    Traverse {
1739        input: Box<LogicalPlan>,
1740        edge_type_ids: Vec<u32>,
1741        direction: Direction,
1742        source_variable: String,
1743        target_variable: String,
1744        target_label_id: u16,
1745        step_variable: Option<String>,
1746        min_hops: usize,
1747        max_hops: usize,
1748        optional: bool,
1749        target_filter: Option<Expr>,
1750        path_variable: Option<String>,
1751        edge_properties: HashSet<String>,
1752        /// Whether this is a variable-length pattern (has `*` range specifier).
1753        /// When true, step_variable holds a list of edges (even for *1..1).
1754        is_variable_length: bool,
1755        /// All variables from this OPTIONAL MATCH pattern.
1756        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1757        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1758        optional_pattern_vars: HashSet<String>,
1759        /// Variable names (node + edge) from the current MATCH clause scope.
1760        /// Used for relationship uniqueness scoping: only edge ID columns whose
1761        /// associated variable is in this set participate in uniqueness filtering.
1762        /// Variables from previous disconnected MATCH clauses are excluded.
1763        scope_match_variables: HashSet<String>,
1764        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1765        edge_filter_expr: Option<Expr>,
1766        /// Path traversal semantics (Trail by default for OpenCypher).
1767        path_mode: crate::query::df_graph::nfa::PathMode,
1768        /// QPP steps for multi-hop quantified path patterns.
1769        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1770        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1771        qpp_steps: Option<Vec<QppStepInfo>>,
1772    },
1773    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1774    /// Used for edge types not defined in schema (schemaless support).
1775    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1776    TraverseMainByType {
1777        type_names: Vec<String>,
1778        input: Box<LogicalPlan>,
1779        direction: Direction,
1780        source_variable: String,
1781        target_variable: String,
1782        step_variable: Option<String>,
1783        min_hops: usize,
1784        max_hops: usize,
1785        optional: bool,
1786        target_filter: Option<Expr>,
1787        path_variable: Option<String>,
1788        /// Whether this is a variable-length pattern (has `*` range specifier).
1789        /// When true, step_variable holds a list of edges (even for *1..1).
1790        is_variable_length: bool,
1791        /// All variables from this OPTIONAL MATCH pattern.
1792        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1793        optional_pattern_vars: HashSet<String>,
1794        /// Variables belonging to the current MATCH clause scope.
1795        /// Used for relationship uniqueness scoping: only edge columns whose
1796        /// associated variable is in this set participate in uniqueness filtering.
1797        scope_match_variables: HashSet<String>,
1798        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1799        edge_filter_expr: Option<Expr>,
1800        /// Path traversal semantics (Trail by default for OpenCypher).
1801        path_mode: crate::query::df_graph::nfa::PathMode,
1802    },
1803    Filter {
1804        input: Box<LogicalPlan>,
1805        predicate: Expr,
1806        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1807        /// When evaluating the filter, if any of these variables are NULL,
1808        /// the row is preserved regardless of the predicate result.
1809        optional_variables: HashSet<String>,
1810    },
1811    Create {
1812        input: Box<LogicalPlan>,
1813        pattern: Pattern,
1814    },
1815    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1816    ///
1817    /// This variant combines multiple CREATE patterns into a single plan node
1818    /// to avoid deep recursion when executing many CREATEs sequentially.
1819    CreateBatch {
1820        input: Box<LogicalPlan>,
1821        patterns: Vec<Pattern>,
1822    },
1823    Merge {
1824        input: Box<LogicalPlan>,
1825        pattern: Pattern,
1826        on_match: Option<SetClause>,
1827        on_create: Option<SetClause>,
1828    },
1829    Set {
1830        input: Box<LogicalPlan>,
1831        items: Vec<SetItem>,
1832    },
1833    Remove {
1834        input: Box<LogicalPlan>,
1835        items: Vec<RemoveItem>,
1836    },
1837    Delete {
1838        input: Box<LogicalPlan>,
1839        items: Vec<Expr>,
1840        detach: bool,
1841    },
1842    /// FOREACH (variable IN list | clauses)
1843    Foreach {
1844        input: Box<LogicalPlan>,
1845        variable: String,
1846        list: Expr,
1847        body: Vec<LogicalPlan>,
1848    },
1849    Sort {
1850        input: Box<LogicalPlan>,
1851        order_by: Vec<SortItem>,
1852    },
1853    Limit {
1854        input: Box<LogicalPlan>,
1855        skip: Option<usize>,
1856        fetch: Option<usize>,
1857    },
1858    Aggregate {
1859        input: Box<LogicalPlan>,
1860        group_by: Vec<Expr>,
1861        aggregates: Vec<Expr>,
1862    },
1863    Distinct {
1864        input: Box<LogicalPlan>,
1865    },
1866    Window {
1867        input: Box<LogicalPlan>,
1868        window_exprs: Vec<Expr>,
1869    },
1870    Project {
1871        input: Box<LogicalPlan>,
1872        projections: Vec<(Expr, Option<String>)>,
1873    },
1874    CrossJoin {
1875        left: Box<LogicalPlan>,
1876        right: Box<LogicalPlan>,
1877    },
1878    Apply {
1879        input: Box<LogicalPlan>,
1880        subquery: Box<LogicalPlan>,
1881        input_filter: Option<Expr>,
1882    },
1883    RecursiveCTE {
1884        cte_name: String,
1885        initial: Box<LogicalPlan>,
1886        recursive: Box<LogicalPlan>,
1887    },
1888    ProcedureCall {
1889        procedure_name: String,
1890        arguments: Vec<Expr>,
1891        yield_items: Vec<(String, Option<String>)>,
1892    },
1893    SubqueryCall {
1894        input: Box<LogicalPlan>,
1895        subquery: Box<LogicalPlan>,
1896    },
1897    VectorKnn {
1898        label_id: u16,
1899        variable: String,
1900        property: String,
1901        query: Expr,
1902        k: usize,
1903        threshold: Option<f32>,
1904    },
1905    InvertedIndexLookup {
1906        label_id: u16,
1907        variable: String,
1908        property: String,
1909        terms: Expr,
1910    },
1911    ShortestPath {
1912        input: Box<LogicalPlan>,
1913        edge_type_ids: Vec<u32>,
1914        direction: Direction,
1915        source_variable: String,
1916        target_variable: String,
1917        target_label_id: u16,
1918        path_variable: String,
1919        /// Minimum number of hops (edges) in the path. Default is 1.
1920        min_hops: u32,
1921        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
1922        max_hops: u32,
1923    },
1924    /// allShortestPaths() - Returns all paths with minimum length
1925    AllShortestPaths {
1926        input: Box<LogicalPlan>,
1927        edge_type_ids: Vec<u32>,
1928        direction: Direction,
1929        source_variable: String,
1930        target_variable: String,
1931        target_label_id: u16,
1932        path_variable: String,
1933        /// Minimum number of hops (edges) in the path. Default is 1.
1934        min_hops: u32,
1935        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
1936        max_hops: u32,
1937    },
1938    QuantifiedPattern {
1939        input: Box<LogicalPlan>,
1940        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
1941        min_iterations: u32,
1942        max_iterations: u32,
1943        path_variable: Option<String>,
1944        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
1945        binding_variable: String, // Output variable of iteration (e.g. 'b')
1946    },
1947    // DDL Plans
1948    CreateVectorIndex {
1949        config: VectorIndexConfig,
1950        if_not_exists: bool,
1951    },
1952    CreateFullTextIndex {
1953        config: FullTextIndexConfig,
1954        if_not_exists: bool,
1955    },
1956    CreateScalarIndex {
1957        config: ScalarIndexConfig,
1958        if_not_exists: bool,
1959    },
1960    CreateJsonFtsIndex {
1961        config: JsonFtsIndexConfig,
1962        if_not_exists: bool,
1963    },
1964    DropIndex {
1965        name: String,
1966        if_exists: bool,
1967    },
1968    ShowIndexes {
1969        filter: Option<String>,
1970    },
1971    Copy {
1972        target: String,
1973        source: String,
1974        is_export: bool,
1975        options: HashMap<String, Value>,
1976    },
1977    Backup {
1978        destination: String,
1979        options: HashMap<String, Value>,
1980    },
1981    Explain {
1982        plan: Box<LogicalPlan>,
1983    },
1984    // Admin Plans
1985    ShowDatabase,
1986    ShowConfig,
1987    ShowStatistics,
1988    Vacuum,
1989    Checkpoint,
1990    CopyTo {
1991        label: String,
1992        path: String,
1993        format: String,
1994        options: HashMap<String, Value>,
1995    },
1996    CopyFrom {
1997        label: String,
1998        path: String,
1999        format: String,
2000        options: HashMap<String, Value>,
2001    },
2002    // Schema DDL
2003    CreateLabel(CreateLabel),
2004    CreateEdgeType(CreateEdgeType),
2005    AlterLabel(AlterLabel),
2006    AlterEdgeType(AlterEdgeType),
2007    DropLabel(DropLabel),
2008    DropEdgeType(DropEdgeType),
2009    // Constraints
2010    CreateConstraint(CreateConstraint),
2011    DropConstraint(DropConstraint),
2012    ShowConstraints(ShowConstraints),
2013    // Transaction Plans
2014    Begin,
2015    Commit,
2016    Rollback,
2017    /// Bind a zero-length path (single node pattern with path variable).
2018    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2019    BindZeroLengthPath {
2020        input: Box<LogicalPlan>,
2021        node_variable: String,
2022        path_variable: String,
2023    },
2024    /// Bind a fixed-length path from already-computed node and edge columns.
2025    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2026    BindPath {
2027        input: Box<LogicalPlan>,
2028        node_variables: Vec<String>,
2029        edge_variables: Vec<String>,
2030        path_variable: String,
2031    },
2032
2033    // ── Locy variants ──────────────────────────────────────────
2034    /// Top-level Locy program: stratified rules + commands.
2035    LocyProgram {
2036        strata: Vec<super::planner_locy_types::LocyStratum>,
2037        commands: Vec<super::planner_locy_types::LocyCommand>,
2038        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2039        max_iterations: usize,
2040        timeout: std::time::Duration,
2041        max_derived_bytes: usize,
2042        deterministic_best_by: bool,
2043    },
2044    /// FOLD operator: lattice-join non-key columns per KEY group.
2045    LocyFold {
2046        input: Box<LogicalPlan>,
2047        key_columns: Vec<String>,
2048        fold_bindings: Vec<(String, Expr)>,
2049    },
2050    /// BEST BY operator: select best row per KEY group by ordered criteria.
2051    LocyBestBy {
2052        input: Box<LogicalPlan>,
2053        key_columns: Vec<String>,
2054        /// (expression, ascending) pairs.
2055        criteria: Vec<(Expr, bool)>,
2056    },
2057    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2058    LocyPriority {
2059        input: Box<LogicalPlan>,
2060        key_columns: Vec<String>,
2061    },
2062    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2063    LocyDerivedScan {
2064        scan_index: usize,
2065        data: Arc<RwLock<Vec<RecordBatch>>>,
2066        schema: SchemaRef,
2067    },
2068    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2069    /// without carrying through helper/property columns like the regular Project.
2070    LocyProject {
2071        input: Box<LogicalPlan>,
2072        projections: Vec<(Expr, Option<String>)>,
2073        /// Expected output Arrow type per projection (for CAST support).
2074        target_types: Vec<DataType>,
2075    },
2076}
2077
2078/// Extracted vector similarity predicate info for optimization
2079struct VectorSimilarityPredicate {
2080    variable: String,
2081    property: String,
2082    query: Expr,
2083    threshold: Option<f32>,
2084}
2085
2086/// Result of extracting vector_similarity from a predicate
2087struct VectorSimilarityExtraction {
2088    /// The extracted vector similarity predicate
2089    predicate: VectorSimilarityPredicate,
2090    /// Remaining predicates that couldn't be optimized (if any)
2091    residual: Option<Expr>,
2092}
2093
2094/// Try to extract a vector_similarity predicate from an expression.
2095/// Matches patterns like:
2096/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2097/// - n.embedding ~= $query
2098///
2099/// Also handles AND predicates.
2100fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2101    match expr {
2102        Expr::BinaryOp { left, op, right } => {
2103            // Handle AND: check both sides for vector_similarity
2104            if matches!(op, BinaryOp::And) {
2105                // Try left side first
2106                if let Some(vs) = extract_simple_vector_similarity(left) {
2107                    return Some(VectorSimilarityExtraction {
2108                        predicate: vs,
2109                        residual: Some(right.as_ref().clone()),
2110                    });
2111                }
2112                // Try right side
2113                if let Some(vs) = extract_simple_vector_similarity(right) {
2114                    return Some(VectorSimilarityExtraction {
2115                        predicate: vs,
2116                        residual: Some(left.as_ref().clone()),
2117                    });
2118                }
2119                // Recursively check within left/right for nested ANDs
2120                if let Some(mut extraction) = extract_vector_similarity(left) {
2121                    extraction.residual = Some(combine_with_and(
2122                        extraction.residual,
2123                        right.as_ref().clone(),
2124                    ));
2125                    return Some(extraction);
2126                }
2127                if let Some(mut extraction) = extract_vector_similarity(right) {
2128                    extraction.residual =
2129                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2130                    return Some(extraction);
2131                }
2132                return None;
2133            }
2134
2135            // Simple case: direct vector_similarity comparison
2136            if let Some(vs) = extract_simple_vector_similarity(expr) {
2137                return Some(VectorSimilarityExtraction {
2138                    predicate: vs,
2139                    residual: None,
2140                });
2141            }
2142            None
2143        }
2144        _ => None,
2145    }
2146}
2147
2148/// Helper to combine an optional expression with another using AND
2149fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2150    match opt_expr {
2151        Some(e) => Expr::BinaryOp {
2152            left: Box::new(e),
2153            op: BinaryOp::And,
2154            right: Box::new(other),
2155        },
2156        None => other,
2157    }
2158}
2159
2160/// Extract a simple vector_similarity comparison (no AND)
2161fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2162    match expr {
2163        Expr::BinaryOp { left, op, right } => {
2164            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2165            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2166                && let (Some(vs), Some(thresh)) = (
2167                    extract_vector_similarity_call(left),
2168                    extract_float_literal(right),
2169                )
2170            {
2171                return Some(VectorSimilarityPredicate {
2172                    variable: vs.0,
2173                    property: vs.1,
2174                    query: vs.2,
2175                    threshold: Some(thresh),
2176                });
2177            }
2178            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2179            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2180                && let (Some(thresh), Some(vs)) = (
2181                    extract_float_literal(left),
2182                    extract_vector_similarity_call(right),
2183                )
2184            {
2185                return Some(VectorSimilarityPredicate {
2186                    variable: vs.0,
2187                    property: vs.1,
2188                    query: vs.2,
2189                    threshold: Some(thresh),
2190                });
2191            }
2192            // Pattern: n.embedding ~= query
2193            if matches!(op, BinaryOp::ApproxEq)
2194                && let Expr::Property(var_expr, prop) = left.as_ref()
2195                && let Expr::Variable(var) = var_expr.as_ref()
2196            {
2197                return Some(VectorSimilarityPredicate {
2198                    variable: var.clone(),
2199                    property: prop.clone(),
2200                    query: right.as_ref().clone(),
2201                    threshold: None,
2202                });
2203            }
2204            None
2205        }
2206        _ => None,
2207    }
2208}
2209
2210/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2211fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2212    if let Expr::FunctionCall { name, args, .. } = expr
2213        && name.eq_ignore_ascii_case("vector_similarity")
2214        && args.len() == 2
2215    {
2216        // First arg should be Property(Identifier(var), prop)
2217        if let Expr::Property(var_expr, prop) = &args[0]
2218            && let Expr::Variable(var) = var_expr.as_ref()
2219        {
2220            // Second arg is query
2221            return Some((var.clone(), prop.clone(), args[1].clone()));
2222        }
2223    }
2224    None
2225}
2226
2227/// Extract a float value from a literal expression
2228fn extract_float_literal(expr: &Expr) -> Option<f32> {
2229    match expr {
2230        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2231        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2232        _ => None,
2233    }
2234}
2235
2236pub struct QueryPlanner {
2237    schema: Arc<Schema>,
2238    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2239    gen_expr_cache: HashMap<(String, String), Expr>,
2240    /// Counter for generating unique anonymous variable names.
2241    anon_counter: std::cell::Cell<usize>,
2242    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2243    params: HashMap<String, uni_common::Value>,
2244}
2245
2246struct TraverseParams<'a> {
2247    rel: &'a RelationshipPattern,
2248    target_node: &'a NodePattern,
2249    optional: bool,
2250    path_variable: Option<String>,
2251    /// All variables from this OPTIONAL MATCH pattern.
2252    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2253    optional_pattern_vars: HashSet<String>,
2254}
2255
2256impl QueryPlanner {
2257    pub fn new(schema: Arc<Schema>) -> Self {
2258        // Pre-parse all generation expressions for caching
2259        let mut gen_expr_cache = HashMap::new();
2260        for (label, props) in &schema.properties {
2261            for (gen_col, meta) in props {
2262                if let Some(expr_str) = &meta.generation_expression
2263                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2264                {
2265                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2266                }
2267            }
2268        }
2269        Self {
2270            schema,
2271            gen_expr_cache,
2272            anon_counter: std::cell::Cell::new(0),
2273            params: HashMap::new(),
2274        }
2275    }
2276
2277    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2278    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2279        self.params = params;
2280        self
2281    }
2282
2283    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2284        self.plan_with_scope(query, Vec::new())
2285    }
2286
2287    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2288        // Apply query rewrites before planning
2289        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2290        if Self::has_mixed_union_modes(&rewritten_query) {
2291            return Err(anyhow!(
2292                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2293            ));
2294        }
2295
2296        match rewritten_query {
2297            Query::Single(stmt) => self.plan_single(stmt, vars),
2298            Query::Union { left, right, all } => {
2299                let l = self.plan_with_scope(*left, vars.clone())?;
2300                let r = self.plan_with_scope(*right, vars)?;
2301
2302                // Validate that both sides have the same column names
2303                let left_cols = Self::extract_projection_columns(&l);
2304                let right_cols = Self::extract_projection_columns(&r);
2305
2306                if left_cols != right_cols {
2307                    return Err(anyhow!(
2308                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2309                    ));
2310                }
2311
2312                Ok(LogicalPlan::Union {
2313                    left: Box::new(l),
2314                    right: Box::new(r),
2315                    all,
2316                })
2317            }
2318            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2319            Query::Transaction(cmd) => self.plan_transaction_command(cmd),
2320            Query::Explain(inner) => {
2321                let inner_plan = self.plan_with_scope(*inner, vars)?;
2322                Ok(LogicalPlan::Explain {
2323                    plan: Box::new(inner_plan),
2324                })
2325            }
2326            Query::TimeTravel { .. } => {
2327                unreachable!("TimeTravel should be resolved at API layer before planning")
2328            }
2329        }
2330    }
2331
2332    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2333        match query {
2334            Query::Union { left, right, all } => {
2335                out.insert(*all);
2336                Self::collect_union_modes(left, out);
2337                Self::collect_union_modes(right, out);
2338            }
2339            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2340            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2341            Query::Single(_) | Query::Schema(_) | Query::Transaction(_) => {}
2342        }
2343    }
2344
2345    fn has_mixed_union_modes(query: &Query) -> bool {
2346        let mut modes = HashSet::new();
2347        Self::collect_union_modes(query, &mut modes);
2348        modes.len() > 1
2349    }
2350
2351    fn next_anon_var(&self) -> String {
2352        let id = self.anon_counter.get();
2353        self.anon_counter.set(id + 1);
2354        format!("_anon_{}", id)
2355    }
2356
2357    /// Extract projection column names from a logical plan.
2358    /// Used for UNION column validation.
2359    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2360        match plan {
2361            LogicalPlan::Project { projections, .. } => projections
2362                .iter()
2363                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2364                .collect(),
2365            LogicalPlan::Limit { input, .. }
2366            | LogicalPlan::Sort { input, .. }
2367            | LogicalPlan::Distinct { input, .. }
2368            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2369            LogicalPlan::Union { left, right, .. } => {
2370                let left_cols = Self::extract_projection_columns(left);
2371                if left_cols.is_empty() {
2372                    Self::extract_projection_columns(right)
2373                } else {
2374                    left_cols
2375                }
2376            }
2377            LogicalPlan::Aggregate {
2378                group_by,
2379                aggregates,
2380                ..
2381            } => {
2382                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2383                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2384                cols
2385            }
2386            _ => Vec::new(),
2387        }
2388    }
2389
2390    fn plan_return_clause(
2391        &self,
2392        return_clause: &ReturnClause,
2393        plan: LogicalPlan,
2394        vars_in_scope: &[VariableInfo],
2395    ) -> Result<LogicalPlan> {
2396        let mut plan = plan;
2397        let mut group_by = Vec::new();
2398        let mut aggregates = Vec::new();
2399        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2400        let mut has_agg = false;
2401        let mut projections = Vec::new();
2402        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2403        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2404        let mut projected_aliases: HashSet<String> = HashSet::new();
2405
2406        for item in &return_clause.items {
2407            match item {
2408                ReturnItem::All => {
2409                    // RETURN * - add all user-named variables in scope
2410                    // (anonymous variables like _anon_0 are excluded)
2411                    let user_vars: Vec<_> = vars_in_scope
2412                        .iter()
2413                        .filter(|v| !v.name.starts_with("_anon_"))
2414                        .collect();
2415                    if user_vars.is_empty() {
2416                        return Err(anyhow!(
2417                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2418                        ));
2419                    }
2420                    for v in user_vars {
2421                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2422                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2423                            group_by.push(Expr::Variable(v.name.clone()));
2424                        }
2425                        projected_aliases.insert(v.name.clone());
2426                        projected_simple_reprs.insert(v.name.clone());
2427                    }
2428                }
2429                ReturnItem::Expr {
2430                    expr,
2431                    alias,
2432                    source_text,
2433                } => {
2434                    if matches!(expr, Expr::Wildcard) {
2435                        for v in vars_in_scope {
2436                            projections
2437                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2438                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2439                                group_by.push(Expr::Variable(v.name.clone()));
2440                            }
2441                            projected_aliases.insert(v.name.clone());
2442                            projected_simple_reprs.insert(v.name.clone());
2443                        }
2444                    } else {
2445                        // Validate expression variables are defined
2446                        validate_expression_variables(expr, vars_in_scope)?;
2447                        // Validate function argument types and boolean operators
2448                        validate_expression(expr, vars_in_scope)?;
2449                        // Pattern predicates are not allowed in RETURN
2450                        if contains_pattern_predicate(expr) {
2451                            return Err(anyhow!(
2452                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2453                            ));
2454                        }
2455
2456                        // Use source text as column name when no explicit alias
2457                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2458                        projections.push((expr.clone(), effective_alias));
2459                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2460                            // Bare aggregate — push directly
2461                            has_agg = true;
2462                            aggregates.push(expr.clone());
2463                            projected_aggregate_reprs.insert(expr.to_string_repr());
2464                        } else if !is_window_function(expr)
2465                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2466                        {
2467                            // Compound aggregate or expression containing aggregates —
2468                            // extract the inner bare aggregates for the Aggregate node
2469                            has_agg = true;
2470                            compound_agg_exprs.push(expr.clone());
2471                            for inner in extract_inner_aggregates(expr) {
2472                                let repr = inner.to_string_repr();
2473                                if !projected_aggregate_reprs.contains(&repr) {
2474                                    aggregates.push(inner);
2475                                    projected_aggregate_reprs.insert(repr);
2476                                }
2477                            }
2478                        } else if !group_by.contains(expr) {
2479                            group_by.push(expr.clone());
2480                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
2481                                projected_simple_reprs.insert(expr.to_string_repr());
2482                            }
2483                        }
2484
2485                        if let Some(a) = alias {
2486                            if projected_aliases.contains(a) {
2487                                return Err(anyhow!(
2488                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2489                                    a
2490                                ));
2491                            }
2492                            projected_aliases.insert(a.clone());
2493                        } else if let Expr::Variable(v) = expr {
2494                            if projected_aliases.contains(v) {
2495                                return Err(anyhow!(
2496                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2497                                    v
2498                                ));
2499                            }
2500                            projected_aliases.insert(v.clone());
2501                        }
2502                    }
2503                }
2504            }
2505        }
2506
2507        // Validate compound aggregate expressions: non-aggregate refs must be
2508        // individually present in the group_by as simple variables or properties.
2509        if has_agg {
2510            let group_by_reprs: HashSet<String> =
2511                group_by.iter().map(|e| e.to_string_repr()).collect();
2512            for expr in &compound_agg_exprs {
2513                let mut refs = Vec::new();
2514                collect_non_aggregate_refs(expr, false, &mut refs);
2515                for r in &refs {
2516                    let is_covered = match r {
2517                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
2518                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
2519                    };
2520                    if !is_covered {
2521                        return Err(anyhow!(
2522                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
2523                        ));
2524                    }
2525                }
2526            }
2527        }
2528
2529        if has_agg {
2530            plan = LogicalPlan::Aggregate {
2531                input: Box::new(plan),
2532                group_by,
2533                aggregates,
2534            };
2535        }
2536
2537        let mut window_exprs = Vec::new();
2538        for (expr, _) in &projections {
2539            Self::collect_window_functions(expr, &mut window_exprs);
2540        }
2541
2542        if let Some(order_by) = &return_clause.order_by {
2543            for item in order_by {
2544                Self::collect_window_functions(&item.expr, &mut window_exprs);
2545            }
2546        }
2547
2548        let has_window_exprs = !window_exprs.is_empty();
2549
2550        if has_window_exprs {
2551            // Before creating the Window node, we need to ensure all properties
2552            // referenced by window functions are available. Create a Project node
2553            // that loads these properties.
2554            let mut props_needed_for_window: Vec<Expr> = Vec::new();
2555            for window_expr in &window_exprs {
2556                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
2557            }
2558
2559            // Also include non-window expressions from projections that might be needed
2560            // Preserve qualified names (e.g., "e.salary") as aliases for properties
2561            let non_window_projections: Vec<_> = projections
2562                .iter()
2563                .filter_map(|(expr, alias)| {
2564                    // Keep expressions that don't have window_spec
2565                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
2566                        window_spec.is_none()
2567                    } else {
2568                        true
2569                    };
2570
2571                    if keep {
2572                        // For property references, use the qualified name as alias
2573                        let new_alias = if matches!(expr, Expr::Property(..)) {
2574                            Some(expr.to_string_repr())
2575                        } else {
2576                            alias.clone()
2577                        };
2578                        Some((expr.clone(), new_alias))
2579                    } else {
2580                        None
2581                    }
2582                })
2583                .collect();
2584
2585            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
2586                let mut intermediate_projections = non_window_projections;
2587                // Add any additional property references needed by window functions
2588                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
2589                for prop in &props_needed_for_window {
2590                    if !intermediate_projections
2591                        .iter()
2592                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
2593                    {
2594                        let qualified_name = prop.to_string_repr();
2595                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
2596                    }
2597                }
2598
2599                if !intermediate_projections.is_empty() {
2600                    plan = LogicalPlan::Project {
2601                        input: Box::new(plan),
2602                        projections: intermediate_projections,
2603                    };
2604                }
2605            }
2606
2607            // Transform property expressions in window functions to use qualified variable names
2608            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
2609            let transformed_window_exprs: Vec<Expr> = window_exprs
2610                .into_iter()
2611                .map(Self::transform_window_expr_properties)
2612                .collect();
2613
2614            plan = LogicalPlan::Window {
2615                input: Box::new(plan),
2616                window_exprs: transformed_window_exprs,
2617            };
2618        }
2619
2620        if let Some(order_by) = &return_clause.order_by {
2621            let alias_exprs: HashMap<String, Expr> = projections
2622                .iter()
2623                .filter_map(|(expr, alias)| {
2624                    alias.as_ref().map(|a| {
2625                        // ORDER BY is planned before the final RETURN projection.
2626                        // In aggregate contexts, aliases must resolve to the
2627                        // post-aggregate output columns, not raw aggregate calls.
2628                        let rewritten = if has_agg && !has_window_exprs {
2629                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
2630                                Expr::Variable(aggregate_column_name(expr))
2631                            } else if is_compound_aggregate(expr)
2632                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
2633                            {
2634                                replace_aggregates_with_columns(expr)
2635                            } else {
2636                                Expr::Variable(expr.to_string_repr())
2637                            }
2638                        } else {
2639                            expr.clone()
2640                        };
2641                        (a.clone(), rewritten)
2642                    })
2643                })
2644                .collect();
2645
2646            // Build an extended scope that includes RETURN aliases so ORDER BY
2647            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
2648            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
2649                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
2650                // Keep aliases and directly returned variables in scope.
2651                let mut scope = Vec::new();
2652                for (expr, alias) in &projections {
2653                    if let Some(a) = alias
2654                        && !is_var_in_scope(&scope, a)
2655                    {
2656                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
2657                    }
2658                    if let Expr::Variable(v) = expr
2659                        && !is_var_in_scope(&scope, v)
2660                    {
2661                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
2662                    }
2663                }
2664                scope
2665            } else {
2666                let mut scope = vars_in_scope.to_vec();
2667                for (expr, alias) in &projections {
2668                    if let Some(a) = alias
2669                        && !is_var_in_scope(&scope, a)
2670                    {
2671                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
2672                    } else if let Expr::Variable(v) = expr
2673                        && !is_var_in_scope(&scope, v)
2674                    {
2675                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
2676                    }
2677                }
2678                scope
2679            };
2680            // Validate ORDER BY expressions against the extended scope
2681            for item in order_by {
2682                // DISTINCT allows ORDER BY on the same projected expression
2683                // even when underlying variables are not otherwise visible.
2684                let matches_projected_expr = return_clause.distinct
2685                    && projections
2686                        .iter()
2687                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
2688                if !matches_projected_expr {
2689                    validate_expression_variables(&item.expr, &order_by_scope)?;
2690                    validate_expression(&item.expr, &order_by_scope)?;
2691                }
2692                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
2693                if has_aggregate_in_item && !has_agg {
2694                    return Err(anyhow!(
2695                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
2696                    ));
2697                }
2698                if has_agg && has_aggregate_in_item {
2699                    validate_with_order_by_aggregate_item(
2700                        &item.expr,
2701                        &projected_aggregate_reprs,
2702                        &projected_simple_reprs,
2703                        &projected_aliases,
2704                    )?;
2705                }
2706            }
2707            let rewritten_order_by: Vec<SortItem> = order_by
2708                .iter()
2709                .map(|item| SortItem {
2710                    expr: {
2711                        let mut rewritten =
2712                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
2713                        if has_agg && !has_window_exprs {
2714                            rewritten = replace_aggregates_with_columns(&rewritten);
2715                        }
2716                        rewritten
2717                    },
2718                    ascending: item.ascending,
2719                })
2720                .collect();
2721            plan = LogicalPlan::Sort {
2722                input: Box::new(plan),
2723                order_by: rewritten_order_by,
2724            };
2725        }
2726
2727        if return_clause.skip.is_some() || return_clause.limit.is_some() {
2728            let skip = return_clause
2729                .skip
2730                .as_ref()
2731                .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
2732                .transpose()?
2733                .flatten();
2734            let fetch = return_clause
2735                .limit
2736                .as_ref()
2737                .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
2738                .transpose()?
2739                .flatten();
2740
2741            plan = LogicalPlan::Limit {
2742                input: Box::new(plan),
2743                skip,
2744                fetch,
2745            };
2746        }
2747
2748        if !projections.is_empty() {
2749            // If we created an Aggregate or Window node, we need to adjust the final projections
2750            // to reference aggregate/window function results as columns instead of re-evaluating them
2751            let final_projections = if has_agg || has_window_exprs {
2752                projections
2753                    .into_iter()
2754                    .map(|(expr, alias)| {
2755                        // Check if this expression is an aggregate function
2756                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
2757                        {
2758                            // Bare aggregate — replace with column reference
2759                            let col_name = Self::get_aggregate_column_name(&expr);
2760                            (Expr::Variable(col_name), alias)
2761                        } else if !has_window_exprs
2762                            && (is_compound_aggregate(&expr)
2763                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
2764                        {
2765                            // Compound aggregate — replace inner aggregates with
2766                            // column references, keep outer expression for Project
2767                            (replace_aggregates_with_columns(&expr), alias)
2768                        }
2769                        // For grouped RETURN projections, reference the pre-computed
2770                        // group-by output column instead of re-evaluating the expression
2771                        // against the aggregate schema (which no longer has original vars).
2772                        else if has_agg
2773                            && !has_window_exprs
2774                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
2775                        {
2776                            (Expr::Variable(expr.to_string_repr()), alias)
2777                        }
2778                        // Check if this expression is a window function
2779                        else if let Expr::FunctionCall {
2780                            window_spec: Some(_),
2781                            ..
2782                        } = &expr
2783                        {
2784                            // Replace window function with a column reference to its result
2785                            // The column name in the Window output is the full expression string
2786                            let window_col_name = expr.to_string_repr();
2787                            // Keep the original alias for the final output
2788                            (Expr::Variable(window_col_name), alias)
2789                        } else {
2790                            (expr, alias)
2791                        }
2792                    })
2793                    .collect()
2794            } else {
2795                projections
2796            };
2797
2798            plan = LogicalPlan::Project {
2799                input: Box::new(plan),
2800                projections: final_projections,
2801            };
2802        }
2803
2804        if return_clause.distinct {
2805            plan = LogicalPlan::Distinct {
2806                input: Box::new(plan),
2807            };
2808        }
2809
2810        Ok(plan)
2811    }
2812
2813    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
2814        let typed_vars: Vec<VariableInfo> = initial_vars
2815            .into_iter()
2816            .map(|name| VariableInfo::new(name, VariableType::Imported))
2817            .collect();
2818        self.plan_single_typed(query, typed_vars)
2819    }
2820
2821    /// Rewrite a query then plan it, preserving typed variable scope when possible.
2822    ///
2823    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
2824    /// variable info through and avoid false type-conflict errors in subqueries.
2825    /// For unions and other compound queries, falls back to `plan_with_scope`.
2826    fn rewrite_and_plan_typed(
2827        &self,
2828        query: Query,
2829        typed_vars: &[VariableInfo],
2830    ) -> Result<LogicalPlan> {
2831        let rewritten = crate::query::rewrite::rewrite_query(query)?;
2832        match rewritten {
2833            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
2834            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
2835        }
2836    }
2837
2838    fn plan_single_typed(
2839        &self,
2840        query: Statement,
2841        initial_vars: Vec<VariableInfo>,
2842    ) -> Result<LogicalPlan> {
2843        let mut plan = LogicalPlan::Empty;
2844
2845        if !initial_vars.is_empty() {
2846            // Project bound variables from outer scope as parameters.
2847            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
2848            // Use Parameter expressions to read from params, not Variable which would read from input row.
2849            let projections = initial_vars
2850                .iter()
2851                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
2852                .collect();
2853            plan = LogicalPlan::Project {
2854                input: Box::new(plan),
2855                projections,
2856            };
2857        }
2858
2859        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
2860        // Track variables introduced by CREATE clauses so we can distinguish
2861        // MATCH-introduced variables (which cannot be re-created as bare nodes)
2862        // from CREATE-introduced variables (which can be referenced as bare nodes).
2863        let mut create_introduced_vars: HashSet<String> = HashSet::new();
2864        // Track variables targeted by DELETE so we can reject property/label
2865        // access on deleted entities in subsequent RETURN clauses.
2866        let mut deleted_vars: HashSet<String> = HashSet::new();
2867
2868        let clause_count = query.clauses.len();
2869        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
2870            match clause {
2871                Clause::Match(match_clause) => {
2872                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
2873                }
2874                Clause::Unwind(unwind) => {
2875                    plan = LogicalPlan::Unwind {
2876                        input: Box::new(plan),
2877                        expr: unwind.expr.clone(),
2878                        variable: unwind.variable.clone(),
2879                    };
2880                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
2881                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
2882                }
2883                Clause::Call(call_clause) => {
2884                    match &call_clause.kind {
2885                        CallKind::Procedure {
2886                            procedure,
2887                            arguments,
2888                        } => {
2889                            // Validate that procedure arguments don't contain aggregation functions
2890                            for arg in arguments {
2891                                if contains_aggregate_recursive(arg) {
2892                                    return Err(anyhow!(
2893                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
2894                                    ));
2895                                }
2896                            }
2897
2898                            let has_yield_star = call_clause.yield_items.len() == 1
2899                                && call_clause.yield_items[0].name == "*"
2900                                && call_clause.yield_items[0].alias.is_none();
2901                            if has_yield_star && clause_idx + 1 < clause_count {
2902                                return Err(anyhow!(
2903                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
2904                                ));
2905                            }
2906
2907                            // Validate for duplicate yield names (VariableAlreadyBound)
2908                            let mut yield_names = Vec::new();
2909                            for item in &call_clause.yield_items {
2910                                if item.name == "*" {
2911                                    continue;
2912                                }
2913                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
2914                                if yield_names.contains(output_name) {
2915                                    return Err(anyhow!(
2916                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
2917                                        output_name
2918                                    ));
2919                                }
2920                                // Check against existing scope (in-query CALL must not shadow)
2921                                if clause_idx > 0
2922                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
2923                                {
2924                                    return Err(anyhow!(
2925                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
2926                                        output_name
2927                                    ));
2928                                }
2929                                yield_names.push(output_name.clone());
2930                            }
2931
2932                            let mut yields = Vec::new();
2933                            for item in &call_clause.yield_items {
2934                                if item.name == "*" {
2935                                    continue;
2936                                }
2937                                yields.push((item.name.clone(), item.alias.clone()));
2938                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
2939                                // Use Imported because procedure return types are unknown
2940                                // at plan time (could be nodes, edges, or scalars)
2941                                add_var_to_scope(
2942                                    &mut vars_in_scope,
2943                                    var_name,
2944                                    VariableType::Imported,
2945                                )?;
2946                            }
2947                            let proc_plan = LogicalPlan::ProcedureCall {
2948                                procedure_name: procedure.clone(),
2949                                arguments: arguments.clone(),
2950                                yield_items: yields.clone(),
2951                            };
2952
2953                            if matches!(plan, LogicalPlan::Empty) {
2954                                // Standalone CALL (first clause) — use directly
2955                                plan = proc_plan;
2956                            } else if yields.is_empty() {
2957                                // In-query CALL with no YIELD (void procedure):
2958                                // preserve the input rows unchanged
2959                            } else {
2960                                // In-query CALL with YIELD: cross-join input × procedure output
2961                                plan = LogicalPlan::Apply {
2962                                    input: Box::new(plan),
2963                                    subquery: Box::new(proc_plan),
2964                                    input_filter: None,
2965                                };
2966                            }
2967                        }
2968                        CallKind::Subquery(query) => {
2969                            let subquery_plan =
2970                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
2971
2972                            // Extract variables from subquery RETURN clause
2973                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
2974
2975                            // Add new variables to scope (as Scalar since they come from subquery projection)
2976                            for var in subquery_vars {
2977                                if !is_var_in_scope(&vars_in_scope, &var) {
2978                                    add_var_to_scope(
2979                                        &mut vars_in_scope,
2980                                        &var,
2981                                        VariableType::Scalar,
2982                                    )?;
2983                                }
2984                            }
2985
2986                            plan = LogicalPlan::SubqueryCall {
2987                                input: Box::new(plan),
2988                                subquery: Box::new(subquery_plan),
2989                            };
2990                        }
2991                    }
2992                }
2993                Clause::Merge(merge_clause) => {
2994                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
2995
2996                    plan = LogicalPlan::Merge {
2997                        input: Box::new(plan),
2998                        pattern: merge_clause.pattern.clone(),
2999                        on_match: Some(SetClause {
3000                            items: merge_clause.on_match.clone(),
3001                        }),
3002                        on_create: Some(SetClause {
3003                            items: merge_clause.on_create.clone(),
3004                        }),
3005                    };
3006
3007                    for path in &merge_clause.pattern.paths {
3008                        if let Some(path_var) = &path.variable
3009                            && !path_var.is_empty()
3010                            && !is_var_in_scope(&vars_in_scope, path_var)
3011                        {
3012                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3013                        }
3014                        for element in &path.elements {
3015                            if let PatternElement::Node(n) = element {
3016                                if let Some(v) = &n.variable
3017                                    && !is_var_in_scope(&vars_in_scope, v)
3018                                {
3019                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3020                                }
3021                            } else if let PatternElement::Relationship(r) = element
3022                                && let Some(v) = &r.variable
3023                                && !is_var_in_scope(&vars_in_scope, v)
3024                            {
3025                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3026                            }
3027                        }
3028                    }
3029                }
3030                Clause::Create(create_clause) => {
3031                    // Validate CREATE patterns:
3032                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3033                    // - Bare nodes (v) are "references" if bound, "creations" if not
3034                    // - Relationships are always creations - can't rebind
3035                    // - Within CREATE, each new variable can only be defined once
3036                    // - Variables used in properties must be defined
3037                    let mut create_vars: Vec<&str> = Vec::new();
3038                    for path in &create_clause.pattern.paths {
3039                        let is_standalone_node = path.elements.len() == 1;
3040                        for element in &path.elements {
3041                            match element {
3042                                PatternElement::Node(n) => {
3043                                    validate_property_variables(
3044                                        &n.properties,
3045                                        &vars_in_scope,
3046                                        &create_vars,
3047                                    )?;
3048
3049                                    if let Some(v) = n.variable.as_deref()
3050                                        && !v.is_empty()
3051                                    {
3052                                        // A node is a "creation" if it has labels or properties
3053                                        let is_creation =
3054                                            !n.labels.is_empty() || n.properties.is_some();
3055
3056                                        if is_creation {
3057                                            check_not_already_bound(
3058                                                v,
3059                                                &vars_in_scope,
3060                                                &create_vars,
3061                                            )?;
3062                                            create_vars.push(v);
3063                                        } else if is_standalone_node
3064                                            && is_var_in_scope(&vars_in_scope, v)
3065                                            && !create_introduced_vars.contains(v)
3066                                        {
3067                                            // Standalone bare node referencing a variable from a
3068                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3069                                            // Bare nodes used as relationship endpoints
3070                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3071                                            return Err(anyhow!(
3072                                                "SyntaxError: VariableAlreadyBound - '{}'",
3073                                                v
3074                                            ));
3075                                        } else if !create_vars.contains(&v) {
3076                                            // New bare variable — register it
3077                                            create_vars.push(v);
3078                                        }
3079                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3080                                    }
3081                                }
3082                                PatternElement::Relationship(r) => {
3083                                    validate_property_variables(
3084                                        &r.properties,
3085                                        &vars_in_scope,
3086                                        &create_vars,
3087                                    )?;
3088
3089                                    if let Some(v) = r.variable.as_deref()
3090                                        && !v.is_empty()
3091                                    {
3092                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3093                                        create_vars.push(v);
3094                                    }
3095
3096                                    // Validate relationship constraints for CREATE
3097                                    if r.types.len() != 1 {
3098                                        return Err(anyhow!(
3099                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3100                                        ));
3101                                    }
3102                                    if r.direction == Direction::Both {
3103                                        return Err(anyhow!(
3104                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3105                                        ));
3106                                    }
3107                                    if r.range.is_some() {
3108                                        return Err(anyhow!(
3109                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3110                                        ));
3111                                    }
3112                                }
3113                                PatternElement::Parenthesized { .. } => {}
3114                            }
3115                        }
3116                    }
3117
3118                    // Batch consecutive CREATEs to avoid deep recursion
3119                    match &mut plan {
3120                        LogicalPlan::CreateBatch { patterns, .. } => {
3121                            // Append to existing batch
3122                            patterns.push(create_clause.pattern.clone());
3123                        }
3124                        LogicalPlan::Create { input, pattern } => {
3125                            // Convert single Create to CreateBatch with both patterns
3126                            let first_pattern = pattern.clone();
3127                            plan = LogicalPlan::CreateBatch {
3128                                input: input.clone(),
3129                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3130                            };
3131                        }
3132                        _ => {
3133                            // Start new Create (may become batch if more CREATEs follow)
3134                            plan = LogicalPlan::Create {
3135                                input: Box::new(plan),
3136                                pattern: create_clause.pattern.clone(),
3137                            };
3138                        }
3139                    }
3140                    // Add variables from created nodes and relationships to scope
3141                    for path in &create_clause.pattern.paths {
3142                        for element in &path.elements {
3143                            match element {
3144                                PatternElement::Node(n) => {
3145                                    if let Some(var) = &n.variable
3146                                        && !var.is_empty()
3147                                    {
3148                                        create_introduced_vars.insert(var.clone());
3149                                        add_var_to_scope(
3150                                            &mut vars_in_scope,
3151                                            var,
3152                                            VariableType::Node,
3153                                        )?;
3154                                    }
3155                                }
3156                                PatternElement::Relationship(r) => {
3157                                    if let Some(var) = &r.variable
3158                                        && !var.is_empty()
3159                                    {
3160                                        create_introduced_vars.insert(var.clone());
3161                                        add_var_to_scope(
3162                                            &mut vars_in_scope,
3163                                            var,
3164                                            VariableType::Edge,
3165                                        )?;
3166                                    }
3167                                }
3168                                PatternElement::Parenthesized { .. } => {
3169                                    // Skip for now - not commonly used in CREATE
3170                                }
3171                            }
3172                        }
3173                    }
3174                }
3175                Clause::Set(set_clause) => {
3176                    // Validate SET value expressions
3177                    for item in &set_clause.items {
3178                        match item {
3179                            SetItem::Property { value, .. }
3180                            | SetItem::Variable { value, .. }
3181                            | SetItem::VariablePlus { value, .. } => {
3182                                validate_expression_variables(value, &vars_in_scope)?;
3183                                validate_expression(value, &vars_in_scope)?;
3184                                if contains_pattern_predicate(value) {
3185                                    return Err(anyhow!(
3186                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3187                                    ));
3188                                }
3189                            }
3190                            SetItem::Labels { .. } => {}
3191                        }
3192                    }
3193                    plan = LogicalPlan::Set {
3194                        input: Box::new(plan),
3195                        items: set_clause.items.clone(),
3196                    };
3197                }
3198                Clause::Remove(remove_clause) => {
3199                    plan = LogicalPlan::Remove {
3200                        input: Box::new(plan),
3201                        items: remove_clause.items.clone(),
3202                    };
3203                }
3204                Clause::Delete(delete_clause) => {
3205                    // Validate DELETE targets
3206                    for item in &delete_clause.items {
3207                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3208                        if matches!(item, Expr::LabelCheck { .. }) {
3209                            return Err(anyhow!(
3210                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3211                            ));
3212                        }
3213                        let vars_used = collect_expr_variables(item);
3214                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3215                        if vars_used.is_empty() {
3216                            return Err(anyhow!(
3217                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3218                            ));
3219                        }
3220                        for var in &vars_used {
3221                            // Check if variable is defined
3222                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3223                                return Err(anyhow!(
3224                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3225                                    var
3226                                ));
3227                            }
3228                        }
3229                        // Strict type check only for simple variable references —
3230                        // complex expressions (property access, array index, etc.)
3231                        // may resolve to a node/edge at runtime even if the base
3232                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3233                        if let Expr::Variable(name) = item
3234                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3235                            && matches!(
3236                                info.var_type,
3237                                VariableType::Scalar | VariableType::ScalarLiteral
3238                            )
3239                        {
3240                            return Err(anyhow!(
3241                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3242                                name
3243                            ));
3244                        }
3245                    }
3246                    // Track deleted variables for later validation
3247                    for item in &delete_clause.items {
3248                        if let Expr::Variable(name) = item {
3249                            deleted_vars.insert(name.clone());
3250                        }
3251                    }
3252                    plan = LogicalPlan::Delete {
3253                        input: Box::new(plan),
3254                        items: delete_clause.items.clone(),
3255                        detach: delete_clause.detach,
3256                    };
3257                }
3258                Clause::With(with_clause) => {
3259                    let (new_plan, new_vars) =
3260                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3261                    plan = new_plan;
3262                    vars_in_scope = new_vars;
3263                }
3264                Clause::WithRecursive(with_recursive) => {
3265                    // Plan the recursive CTE
3266                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3267                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3268                    add_var_to_scope(
3269                        &mut vars_in_scope,
3270                        &with_recursive.name,
3271                        VariableType::Scalar,
3272                    )?;
3273                }
3274                Clause::Return(return_clause) => {
3275                    // Check for property/label access on deleted entities
3276                    if !deleted_vars.is_empty() {
3277                        for item in &return_clause.items {
3278                            if let ReturnItem::Expr { expr, .. } = item {
3279                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3280                            }
3281                        }
3282                    }
3283                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3284                } // All Clause variants are handled above - no catch-all needed
3285            }
3286        }
3287
3288        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3289        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3290        // If RETURN was used, the plan will have been wrapped in Project, so we only
3291        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3292        let plan = match &plan {
3293            LogicalPlan::Create { .. }
3294            | LogicalPlan::CreateBatch { .. }
3295            | LogicalPlan::Delete { .. }
3296            | LogicalPlan::Set { .. }
3297            | LogicalPlan::Remove { .. }
3298            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3299                input: Box::new(plan),
3300                skip: None,
3301                fetch: Some(0),
3302            },
3303            _ => plan,
3304        };
3305
3306        Ok(plan)
3307    }
3308
3309    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3310        match expr {
3311            Expr::Property(_, _) => {
3312                if !collected
3313                    .iter()
3314                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3315                {
3316                    collected.push(expr.clone());
3317                }
3318            }
3319            Expr::Variable(_) => {
3320                // Variables are already available, don't need to project them
3321            }
3322            Expr::BinaryOp { left, right, .. } => {
3323                Self::collect_properties_from_expr(left, collected);
3324                Self::collect_properties_from_expr(right, collected);
3325            }
3326            Expr::FunctionCall {
3327                args, window_spec, ..
3328            } => {
3329                for arg in args {
3330                    Self::collect_properties_from_expr(arg, collected);
3331                }
3332                if let Some(spec) = window_spec {
3333                    for partition_expr in &spec.partition_by {
3334                        Self::collect_properties_from_expr(partition_expr, collected);
3335                    }
3336                    for sort_item in &spec.order_by {
3337                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3338                    }
3339                }
3340            }
3341            Expr::List(items) => {
3342                for item in items {
3343                    Self::collect_properties_from_expr(item, collected);
3344                }
3345            }
3346            Expr::UnaryOp { expr: e, .. }
3347            | Expr::IsNull(e)
3348            | Expr::IsNotNull(e)
3349            | Expr::IsUnique(e) => {
3350                Self::collect_properties_from_expr(e, collected);
3351            }
3352            Expr::Case {
3353                expr,
3354                when_then,
3355                else_expr,
3356            } => {
3357                if let Some(e) = expr {
3358                    Self::collect_properties_from_expr(e, collected);
3359                }
3360                for (w, t) in when_then {
3361                    Self::collect_properties_from_expr(w, collected);
3362                    Self::collect_properties_from_expr(t, collected);
3363                }
3364                if let Some(e) = else_expr {
3365                    Self::collect_properties_from_expr(e, collected);
3366                }
3367            }
3368            Expr::In { expr, list } => {
3369                Self::collect_properties_from_expr(expr, collected);
3370                Self::collect_properties_from_expr(list, collected);
3371            }
3372            Expr::ArrayIndex { array, index } => {
3373                Self::collect_properties_from_expr(array, collected);
3374                Self::collect_properties_from_expr(index, collected);
3375            }
3376            Expr::ArraySlice { array, start, end } => {
3377                Self::collect_properties_from_expr(array, collected);
3378                if let Some(s) = start {
3379                    Self::collect_properties_from_expr(s, collected);
3380                }
3381                if let Some(e) = end {
3382                    Self::collect_properties_from_expr(e, collected);
3383                }
3384            }
3385            _ => {}
3386        }
3387    }
3388
3389    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3390        if let Expr::FunctionCall { window_spec, .. } = expr {
3391            // Collect any function with a window spec (OVER clause)
3392            if window_spec.is_some() {
3393                if !collected
3394                    .iter()
3395                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3396                {
3397                    collected.push(expr.clone());
3398                }
3399                return;
3400            }
3401        }
3402
3403        match expr {
3404            Expr::BinaryOp { left, right, .. } => {
3405                Self::collect_window_functions(left, collected);
3406                Self::collect_window_functions(right, collected);
3407            }
3408            Expr::FunctionCall { args, .. } => {
3409                for arg in args {
3410                    Self::collect_window_functions(arg, collected);
3411                }
3412            }
3413            Expr::List(items) => {
3414                for i in items {
3415                    Self::collect_window_functions(i, collected);
3416                }
3417            }
3418            Expr::Map(items) => {
3419                for (_, i) in items {
3420                    Self::collect_window_functions(i, collected);
3421                }
3422            }
3423            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
3424                Self::collect_window_functions(e, collected);
3425            }
3426            Expr::Case {
3427                expr,
3428                when_then,
3429                else_expr,
3430            } => {
3431                if let Some(e) = expr {
3432                    Self::collect_window_functions(e, collected);
3433                }
3434                for (w, t) in when_then {
3435                    Self::collect_window_functions(w, collected);
3436                    Self::collect_window_functions(t, collected);
3437                }
3438                if let Some(e) = else_expr {
3439                    Self::collect_window_functions(e, collected);
3440                }
3441            }
3442            Expr::Reduce {
3443                init, list, expr, ..
3444            } => {
3445                Self::collect_window_functions(init, collected);
3446                Self::collect_window_functions(list, collected);
3447                Self::collect_window_functions(expr, collected);
3448            }
3449            Expr::Quantifier {
3450                list, predicate, ..
3451            } => {
3452                Self::collect_window_functions(list, collected);
3453                Self::collect_window_functions(predicate, collected);
3454            }
3455            Expr::In { expr, list } => {
3456                Self::collect_window_functions(expr, collected);
3457                Self::collect_window_functions(list, collected);
3458            }
3459            Expr::ArrayIndex { array, index } => {
3460                Self::collect_window_functions(array, collected);
3461                Self::collect_window_functions(index, collected);
3462            }
3463            Expr::ArraySlice { array, start, end } => {
3464                Self::collect_window_functions(array, collected);
3465                if let Some(s) = start {
3466                    Self::collect_window_functions(s, collected);
3467                }
3468                if let Some(e) = end {
3469                    Self::collect_window_functions(e, collected);
3470                }
3471            }
3472            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
3473            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
3474            _ => {}
3475        }
3476    }
3477
3478    /// Transform property expressions in manual window functions to use qualified variable names.
3479    ///
3480    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
3481    /// so the executor can look up values directly from the row HashMap after the
3482    /// intermediate projection has materialized these properties with qualified names.
3483    ///
3484    /// Transforms ALL window functions (both manual and aggregate).
3485    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
3486    fn transform_window_expr_properties(expr: Expr) -> Expr {
3487        let Expr::FunctionCall {
3488            name,
3489            args,
3490            window_spec: Some(spec),
3491            distinct,
3492        } = expr
3493        else {
3494            return expr;
3495        };
3496
3497        // Transform arguments for ALL window functions
3498        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
3499        let transformed_args = args
3500            .into_iter()
3501            .map(Self::transform_property_to_variable)
3502            .collect();
3503
3504        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
3505        let transformed_partition_by = spec
3506            .partition_by
3507            .into_iter()
3508            .map(Self::transform_property_to_variable)
3509            .collect();
3510
3511        let transformed_order_by = spec
3512            .order_by
3513            .into_iter()
3514            .map(|item| SortItem {
3515                expr: Self::transform_property_to_variable(item.expr),
3516                ascending: item.ascending,
3517            })
3518            .collect();
3519
3520        Expr::FunctionCall {
3521            name,
3522            args: transformed_args,
3523            window_spec: Some(WindowSpec {
3524                partition_by: transformed_partition_by,
3525                order_by: transformed_order_by,
3526            }),
3527            distinct,
3528        }
3529    }
3530
3531    /// Transform a property expression to a variable expression with qualified name.
3532    ///
3533    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
3534    fn transform_property_to_variable(expr: Expr) -> Expr {
3535        let Expr::Property(base, prop) = expr else {
3536            return expr;
3537        };
3538
3539        match *base {
3540            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
3541            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
3542        }
3543    }
3544
3545    /// Transform VALID_AT macro into function call
3546    ///
3547    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
3548    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
3549    fn transform_valid_at_to_function(expr: Expr) -> Expr {
3550        match expr {
3551            Expr::ValidAt {
3552                entity,
3553                timestamp,
3554                start_prop,
3555                end_prop,
3556            } => {
3557                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
3558                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
3559
3560                Expr::FunctionCall {
3561                    name: "uni.temporal.validAt".to_string(),
3562                    args: vec![
3563                        Self::transform_valid_at_to_function(*entity),
3564                        Expr::Literal(CypherLiteral::String(start)),
3565                        Expr::Literal(CypherLiteral::String(end)),
3566                        Self::transform_valid_at_to_function(*timestamp),
3567                    ],
3568                    distinct: false,
3569                    window_spec: None,
3570                }
3571            }
3572            // Recursively transform nested expressions
3573            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
3574                left: Box::new(Self::transform_valid_at_to_function(*left)),
3575                op,
3576                right: Box::new(Self::transform_valid_at_to_function(*right)),
3577            },
3578            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
3579                op,
3580                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
3581            },
3582            Expr::FunctionCall {
3583                name,
3584                args,
3585                distinct,
3586                window_spec,
3587            } => Expr::FunctionCall {
3588                name,
3589                args: args
3590                    .into_iter()
3591                    .map(Self::transform_valid_at_to_function)
3592                    .collect(),
3593                distinct,
3594                window_spec,
3595            },
3596            Expr::Property(base, prop) => {
3597                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
3598            }
3599            Expr::List(items) => Expr::List(
3600                items
3601                    .into_iter()
3602                    .map(Self::transform_valid_at_to_function)
3603                    .collect(),
3604            ),
3605            Expr::In { expr, list } => Expr::In {
3606                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
3607                list: Box::new(Self::transform_valid_at_to_function(*list)),
3608            },
3609            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
3610            Expr::IsNotNull(e) => {
3611                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
3612            }
3613            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
3614            // Other cases: return as-is
3615            other => other,
3616        }
3617    }
3618
3619    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
3620    fn plan_match_clause(
3621        &self,
3622        match_clause: &MatchClause,
3623        plan: LogicalPlan,
3624        vars_in_scope: &mut Vec<VariableInfo>,
3625    ) -> Result<LogicalPlan> {
3626        let mut plan = plan;
3627
3628        if match_clause.pattern.paths.is_empty() {
3629            return Err(anyhow!("Empty pattern"));
3630        }
3631
3632        // Track variables introduced by this OPTIONAL MATCH
3633        let vars_before_pattern = vars_in_scope.len();
3634
3635        for path in &match_clause.pattern.paths {
3636            if let Some(mode) = &path.shortest_path_mode {
3637                plan =
3638                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
3639            } else {
3640                plan = self.plan_path(
3641                    path,
3642                    plan,
3643                    vars_in_scope,
3644                    match_clause.optional,
3645                    vars_before_pattern,
3646                )?;
3647            }
3648        }
3649
3650        // Collect variables introduced by this OPTIONAL MATCH pattern
3651        let optional_vars: HashSet<String> = if match_clause.optional {
3652            vars_in_scope[vars_before_pattern..]
3653                .iter()
3654                .map(|v| v.name.clone())
3655                .collect()
3656        } else {
3657            HashSet::new()
3658        };
3659
3660        // Handle WHERE clause with vector_similarity and predicate pushdown
3661        if let Some(predicate) = &match_clause.where_clause {
3662            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
3663        }
3664
3665        Ok(plan)
3666    }
3667
3668    /// Plan a shortestPath pattern.
3669    fn plan_shortest_path(
3670        &self,
3671        path: &PathPattern,
3672        plan: LogicalPlan,
3673        vars_in_scope: &mut Vec<VariableInfo>,
3674        mode: &ShortestPathMode,
3675        _vars_before_pattern: usize,
3676    ) -> Result<LogicalPlan> {
3677        let mut plan = plan;
3678        let elements = &path.elements;
3679
3680        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
3681        if elements.len() < 3 || elements.len().is_multiple_of(2) {
3682            return Err(anyhow!(
3683                "shortestPath requires at least one relationship: (a)-[*]->(b)"
3684            ));
3685        }
3686
3687        let source_node = match &elements[0] {
3688            PatternElement::Node(n) => n,
3689            _ => return Err(anyhow!("ShortestPath must start with a node")),
3690        };
3691        let rel = match &elements[1] {
3692            PatternElement::Relationship(r) => r,
3693            _ => {
3694                return Err(anyhow!(
3695                    "ShortestPath middle element must be a relationship"
3696                ));
3697            }
3698        };
3699        let target_node = match &elements[2] {
3700            PatternElement::Node(n) => n,
3701            _ => return Err(anyhow!("ShortestPath must end with a node")),
3702        };
3703
3704        let source_var = source_node
3705            .variable
3706            .clone()
3707            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
3708        let target_var = target_node
3709            .variable
3710            .clone()
3711            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
3712        let path_var = path
3713            .variable
3714            .clone()
3715            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
3716
3717        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
3718        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
3719
3720        // Plan source node if not bound
3721        if !source_bound {
3722            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
3723        } else if let Some(prop_filter) =
3724            self.properties_to_expr(&source_var, &source_node.properties)
3725        {
3726            plan = LogicalPlan::Filter {
3727                input: Box::new(plan),
3728                predicate: prop_filter,
3729                optional_variables: HashSet::new(),
3730            };
3731        }
3732
3733        // Plan target node if not bound
3734        let target_label_id = if !target_bound {
3735            // Use first label for target_label_id
3736            let target_label_name = target_node
3737                .labels
3738                .first()
3739                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
3740            let target_label_meta = self
3741                .schema
3742                .get_label_case_insensitive(target_label_name)
3743                .ok_or_else(|| anyhow!("Label {} not found", target_label_name))?;
3744
3745            let target_scan = LogicalPlan::Scan {
3746                label_id: target_label_meta.id,
3747                labels: target_node.labels.clone(),
3748                variable: target_var.clone(),
3749                filter: self.properties_to_expr(&target_var, &target_node.properties),
3750                optional: false,
3751            };
3752
3753            plan = Self::join_with_plan(plan, target_scan);
3754            target_label_meta.id
3755        } else {
3756            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
3757            {
3758                plan = LogicalPlan::Filter {
3759                    input: Box::new(plan),
3760                    predicate: prop_filter,
3761                    optional_variables: HashSet::new(),
3762                };
3763            }
3764            0 // Wildcard for already-bound target
3765        };
3766
3767        // Add ShortestPath operator
3768        let edge_type_ids = if rel.types.is_empty() {
3769            // If no type specified, fetch all edge types (both schema and schemaless)
3770            self.schema.all_edge_type_ids()
3771        } else {
3772            let mut ids = Vec::new();
3773            for type_name in &rel.types {
3774                let edge_meta = self
3775                    .schema
3776                    .edge_types
3777                    .get(type_name)
3778                    .ok_or_else(|| anyhow!("Edge type {} not found", type_name))?;
3779                ids.push(edge_meta.id);
3780            }
3781            ids
3782        };
3783
3784        // Extract hop constraints from relationship pattern
3785        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
3786        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
3787
3788        let sp_plan = match mode {
3789            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
3790                input: Box::new(plan),
3791                edge_type_ids,
3792                direction: rel.direction.clone(),
3793                source_variable: source_var.clone(),
3794                target_variable: target_var.clone(),
3795                target_label_id,
3796                path_variable: path_var.clone(),
3797                min_hops,
3798                max_hops,
3799            },
3800            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
3801                input: Box::new(plan),
3802                edge_type_ids,
3803                direction: rel.direction.clone(),
3804                source_variable: source_var.clone(),
3805                target_variable: target_var.clone(),
3806                target_label_id,
3807                path_variable: path_var.clone(),
3808                min_hops,
3809                max_hops,
3810            },
3811        };
3812
3813        if !source_bound {
3814            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
3815        }
3816        if !target_bound {
3817            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
3818        }
3819        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
3820
3821        Ok(sp_plan)
3822    }
3823    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
3824    ///
3825    /// This is a public entry point for the Locy plan builder to reuse the
3826    /// existing pattern-planning logic for clause bodies.
3827    pub fn plan_pattern(
3828        &self,
3829        pattern: &Pattern,
3830        initial_vars: &[VariableInfo],
3831    ) -> Result<LogicalPlan> {
3832        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
3833        let vars_before_pattern = vars_in_scope.len();
3834        let mut plan = LogicalPlan::Empty;
3835        for path in &pattern.paths {
3836            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
3837        }
3838        Ok(plan)
3839    }
3840
3841    /// Plan a regular MATCH path (not shortestPath).
3842    fn plan_path(
3843        &self,
3844        path: &PathPattern,
3845        plan: LogicalPlan,
3846        vars_in_scope: &mut Vec<VariableInfo>,
3847        optional: bool,
3848        vars_before_pattern: usize,
3849    ) -> Result<LogicalPlan> {
3850        let mut plan = plan;
3851        let elements = &path.elements;
3852        let mut i = 0;
3853
3854        let path_variable = path.variable.clone();
3855
3856        // Check for VariableAlreadyBound: path variable already in scope
3857        if let Some(pv) = &path_variable
3858            && !pv.is_empty()
3859            && is_var_in_scope(vars_in_scope, pv)
3860        {
3861            return Err(anyhow!(
3862                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3863                pv
3864            ));
3865        }
3866
3867        // Check for VariableAlreadyBound: path variable conflicts with element variables
3868        if let Some(pv) = &path_variable
3869            && !pv.is_empty()
3870        {
3871            for element in elements {
3872                match element {
3873                    PatternElement::Node(n) => {
3874                        if let Some(v) = &n.variable
3875                            && v == pv
3876                        {
3877                            return Err(anyhow!(
3878                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3879                                pv
3880                            ));
3881                        }
3882                    }
3883                    PatternElement::Relationship(r) => {
3884                        if let Some(v) = &r.variable
3885                            && v == pv
3886                        {
3887                            return Err(anyhow!(
3888                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3889                                pv
3890                            ));
3891                        }
3892                    }
3893                    PatternElement::Parenthesized { .. } => {}
3894                }
3895            }
3896        }
3897
3898        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
3899        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
3900        let mut optional_pattern_vars: HashSet<String> = if optional {
3901            let mut vars = HashSet::new();
3902            for element in elements {
3903                match element {
3904                    PatternElement::Node(n) => {
3905                        if let Some(v) = &n.variable
3906                            && !v.is_empty()
3907                            && !is_var_in_scope(vars_in_scope, v)
3908                        {
3909                            vars.insert(v.clone());
3910                        }
3911                    }
3912                    PatternElement::Relationship(r) => {
3913                        if let Some(v) = &r.variable
3914                            && !v.is_empty()
3915                            && !is_var_in_scope(vars_in_scope, v)
3916                        {
3917                            vars.insert(v.clone());
3918                        }
3919                    }
3920                    PatternElement::Parenthesized { pattern, .. } => {
3921                        // Also check nested patterns
3922                        for nested_elem in &pattern.elements {
3923                            match nested_elem {
3924                                PatternElement::Node(n) => {
3925                                    if let Some(v) = &n.variable
3926                                        && !v.is_empty()
3927                                        && !is_var_in_scope(vars_in_scope, v)
3928                                    {
3929                                        vars.insert(v.clone());
3930                                    }
3931                                }
3932                                PatternElement::Relationship(r) => {
3933                                    if let Some(v) = &r.variable
3934                                        && !v.is_empty()
3935                                        && !is_var_in_scope(vars_in_scope, v)
3936                                    {
3937                                        vars.insert(v.clone());
3938                                    }
3939                                }
3940                                _ => {}
3941                            }
3942                        }
3943                    }
3944                }
3945            }
3946            // Include path variable if present
3947            if let Some(pv) = &path_variable
3948                && !pv.is_empty()
3949            {
3950                vars.insert(pv.clone());
3951            }
3952            vars
3953        } else {
3954            HashSet::new()
3955        };
3956
3957        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
3958        // These must participate in Trail mode (relationship uniqueness) enforcement
3959        // across ALL segments in this path, so that VLP segments like [*0..1] don't
3960        // traverse through edges already claimed by a bound relationship [r].
3961        let path_bound_edge_vars: HashSet<String> = {
3962            let mut bound = HashSet::new();
3963            for element in elements {
3964                if let PatternElement::Relationship(rel) = element
3965                    && let Some(ref var_name) = rel.variable
3966                    && !var_name.is_empty()
3967                    && vars_in_scope[..vars_before_pattern]
3968                        .iter()
3969                        .any(|v| v.name == *var_name)
3970                {
3971                    bound.insert(var_name.clone());
3972                }
3973            }
3974            bound
3975        };
3976
3977        // Track if any traverses were added (for zero-length path detection)
3978        let mut had_traverses = false;
3979        // Track the node variable for zero-length path binding
3980        let mut single_node_variable: Option<String> = None;
3981        // Collect node/edge variables for BindPath (fixed-length path binding)
3982        let mut path_node_vars: Vec<String> = Vec::new();
3983        let mut path_edge_vars: Vec<String> = Vec::new();
3984        // Track the last processed outer node variable for QPP source binding.
3985        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
3986        let mut last_outer_node_var: Option<String> = None;
3987
3988        // Multi-hop path variables are now supported - path is accumulated across hops
3989        while i < elements.len() {
3990            let element = &elements[i];
3991            match element {
3992                PatternElement::Node(n) => {
3993                    let mut variable = n.variable.clone().unwrap_or_default();
3994                    if variable.is_empty() {
3995                        variable = self.next_anon_var();
3996                    }
3997                    // Track first node variable for zero-length path
3998                    if single_node_variable.is_none() {
3999                        single_node_variable = Some(variable.clone());
4000                    }
4001                    let is_bound =
4002                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4003                    if optional && !is_bound {
4004                        optional_pattern_vars.insert(variable.clone());
4005                    }
4006
4007                    if is_bound {
4008                        // Check for type conflict - can't use an Edge/Path as a Node
4009                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4010                            && !info.var_type.is_compatible_with(VariableType::Node)
4011                        {
4012                            return Err(anyhow!(
4013                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4014                                variable,
4015                                info.var_type
4016                            ));
4017                        }
4018                        if let Some(node_filter) =
4019                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4020                        {
4021                            plan = LogicalPlan::Filter {
4022                                input: Box::new(plan),
4023                                predicate: node_filter,
4024                                optional_variables: HashSet::new(),
4025                            };
4026                        }
4027                    } else {
4028                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4029                        if !variable.is_empty() {
4030                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4031                        }
4032                    }
4033
4034                    // Track source node for BindPath
4035                    if path_variable.is_some() && path_node_vars.is_empty() {
4036                        path_node_vars.push(variable.clone());
4037                    }
4038
4039                    // Look ahead for relationships
4040                    let mut current_source_var = variable;
4041                    last_outer_node_var = Some(current_source_var.clone());
4042                    i += 1;
4043                    while i < elements.len() {
4044                        if let PatternElement::Relationship(r) = &elements[i] {
4045                            if i + 1 < elements.len() {
4046                                let target_node_part = &elements[i + 1];
4047                                if let PatternElement::Node(n_target) = target_node_part {
4048                                    // For VLP traversals, pass path_variable through
4049                                    // For fixed-length, we use BindPath instead
4050                                    let is_vlp = r.range.is_some();
4051                                    let traverse_path_var =
4052                                        if is_vlp { path_variable.clone() } else { None };
4053
4054                                    // If we're about to start a VLP segment and there are
4055                                    // collected fixed-hop path vars, create an intermediate
4056                                    // BindPath for the fixed prefix first. The VLP will then
4057                                    // extend this existing path.
4058                                    if is_vlp
4059                                        && let Some(pv) = path_variable.as_ref()
4060                                        && !path_node_vars.is_empty()
4061                                    {
4062                                        plan = LogicalPlan::BindPath {
4063                                            input: Box::new(plan),
4064                                            node_variables: std::mem::take(&mut path_node_vars),
4065                                            edge_variables: std::mem::take(&mut path_edge_vars),
4066                                            path_variable: pv.clone(),
4067                                        };
4068                                        if !is_var_in_scope(vars_in_scope, pv) {
4069                                            add_var_to_scope(
4070                                                vars_in_scope,
4071                                                pv,
4072                                                VariableType::Path,
4073                                            )?;
4074                                        }
4075                                    }
4076
4077                                    // Plan the traverse from the current source node
4078                                    let target_was_bound =
4079                                        n_target.variable.as_ref().is_some_and(|v| {
4080                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4081                                        });
4082                                    let (new_plan, target_var, effective_target) = self
4083                                        .plan_traverse_with_source(
4084                                            plan,
4085                                            vars_in_scope,
4086                                            TraverseParams {
4087                                                rel: r,
4088                                                target_node: n_target,
4089                                                optional,
4090                                                path_variable: traverse_path_var,
4091                                                optional_pattern_vars: optional_pattern_vars
4092                                                    .clone(),
4093                                            },
4094                                            &current_source_var,
4095                                            vars_before_pattern,
4096                                            &path_bound_edge_vars,
4097                                        )?;
4098                                    plan = new_plan;
4099                                    if optional && !target_was_bound {
4100                                        optional_pattern_vars.insert(target_var.clone());
4101                                    }
4102
4103                                    // Track edge/target node for BindPath
4104                                    if path_variable.is_some() && !is_vlp {
4105                                        // Use the edge variable if given, otherwise use
4106                                        // the internal tracking column pattern.
4107                                        // Use effective_target (which may be __rebound_x
4108                                        // for bound-target traversals) to match the actual
4109                                        // column name produced by GraphTraverseExec.
4110                                        if let Some(ev) = &r.variable {
4111                                            path_edge_vars.push(ev.clone());
4112                                        } else {
4113                                            path_edge_vars
4114                                                .push(format!("__eid_to_{}", effective_target));
4115                                        }
4116                                        path_node_vars.push(target_var.clone());
4117                                    }
4118
4119                                    current_source_var = target_var;
4120                                    last_outer_node_var = Some(current_source_var.clone());
4121                                    had_traverses = true;
4122                                    i += 2;
4123                                } else {
4124                                    return Err(anyhow!("Relationship must be followed by a node"));
4125                                }
4126                            } else {
4127                                return Err(anyhow!("Relationship cannot be the last element"));
4128                            }
4129                        } else {
4130                            break;
4131                        }
4132                    }
4133                }
4134                PatternElement::Relationship(_) => {
4135                    return Err(anyhow!("Pattern must start with a node"));
4136                }
4137                PatternElement::Parenthesized { pattern, range } => {
4138                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4139                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4140                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4141                        return Err(anyhow!(
4142                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4143                        ));
4144                    }
4145
4146                    let source_node = match &pattern.elements[0] {
4147                        PatternElement::Node(n) => n,
4148                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4149                    };
4150
4151                    // Extract all relationship-node pairs (QPP steps)
4152                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4153                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4154                        let rel = match &pattern.elements[pair_idx] {
4155                            PatternElement::Relationship(r) => r,
4156                            _ => {
4157                                return Err(anyhow!(
4158                                    "Quantified pattern element at position {} must be a relationship",
4159                                    pair_idx
4160                                ));
4161                            }
4162                        };
4163                        let node = match &pattern.elements[pair_idx + 1] {
4164                            PatternElement::Node(n) => n,
4165                            _ => {
4166                                return Err(anyhow!(
4167                                    "Quantified pattern element at position {} must be a node",
4168                                    pair_idx + 1
4169                                ));
4170                            }
4171                        };
4172                        // Reject nested quantifiers
4173                        if rel.range.is_some() {
4174                            return Err(anyhow!(
4175                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4176                            ));
4177                        }
4178                        qpp_rels.push((rel, node));
4179                    }
4180
4181                    // Check if there's an outer target node after the Parenthesized element.
4182                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4183                    // target that should receive the traversal result.
4184                    let inner_target_node = qpp_rels.last().unwrap().1;
4185                    let outer_target_node = if i + 1 < elements.len() {
4186                        match &elements[i + 1] {
4187                            PatternElement::Node(n) => Some(n),
4188                            _ => None,
4189                        }
4190                    } else {
4191                        None
4192                    };
4193                    // Use the outer target for variable binding and filters; inner target
4194                    // labels are used for state constraints within the NFA.
4195                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4196
4197                    // For simple 3-element single-hop QPP without intermediate label constraints,
4198                    // fall back to existing VLP behavior (copy range to relationship).
4199                    let use_simple_vlp = qpp_rels.len() == 1
4200                        && inner_target_node
4201                            .labels
4202                            .first()
4203                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4204                            .is_none();
4205
4206                    // Plan source node.
4207                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4208                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4209                    // node variable, use it; otherwise fall back to the inner source.
4210                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4211                        // The preceding outer node is already bound and in scope
4212                        // Apply any property filters from the inner source node
4213                        if let Some(prop_filter) =
4214                            self.properties_to_expr(outer_src, &source_node.properties)
4215                        {
4216                            plan = LogicalPlan::Filter {
4217                                input: Box::new(plan),
4218                                predicate: prop_filter,
4219                                optional_variables: HashSet::new(),
4220                            };
4221                        }
4222                        outer_src.clone()
4223                    } else {
4224                        let sv = source_node
4225                            .variable
4226                            .clone()
4227                            .filter(|v| !v.is_empty())
4228                            .unwrap_or_else(|| self.next_anon_var());
4229
4230                        if is_var_in_scope(vars_in_scope, &sv) {
4231                            // Source is already bound, apply property filter if needed
4232                            if let Some(prop_filter) =
4233                                self.properties_to_expr(&sv, &source_node.properties)
4234                            {
4235                                plan = LogicalPlan::Filter {
4236                                    input: Box::new(plan),
4237                                    predicate: prop_filter,
4238                                    optional_variables: HashSet::new(),
4239                                };
4240                            }
4241                        } else {
4242                            // Source is unbound, scan it
4243                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4244                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4245                            if optional {
4246                                optional_pattern_vars.insert(sv.clone());
4247                            }
4248                        }
4249                        sv
4250                    };
4251
4252                    if use_simple_vlp {
4253                        // Simple single-hop QPP: apply range to relationship and use VLP path
4254                        let mut relationship = qpp_rels[0].0.clone();
4255                        relationship.range = range.clone();
4256
4257                        let target_was_bound = target_node
4258                            .variable
4259                            .as_ref()
4260                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4261                        let (new_plan, target_var, _effective_target) = self
4262                            .plan_traverse_with_source(
4263                                plan,
4264                                vars_in_scope,
4265                                TraverseParams {
4266                                    rel: &relationship,
4267                                    target_node,
4268                                    optional,
4269                                    path_variable: path_variable.clone(),
4270                                    optional_pattern_vars: optional_pattern_vars.clone(),
4271                                },
4272                                &source_variable,
4273                                vars_before_pattern,
4274                                &path_bound_edge_vars,
4275                            )?;
4276                        plan = new_plan;
4277                        if optional && !target_was_bound {
4278                            optional_pattern_vars.insert(target_var);
4279                        }
4280                    } else {
4281                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4282                        let mut qpp_step_infos = Vec::new();
4283                        let mut all_edge_type_ids = Vec::new();
4284
4285                        for (rel, node) in &qpp_rels {
4286                            let mut step_edge_type_ids = Vec::new();
4287                            if rel.types.is_empty() {
4288                                step_edge_type_ids = self.schema.all_edge_type_ids();
4289                            } else {
4290                                for type_name in &rel.types {
4291                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4292                                        step_edge_type_ids.push(edge_meta.id);
4293                                    }
4294                                }
4295                            }
4296                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4297
4298                            let target_label = node.labels.first().and_then(|l| {
4299                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4300                            });
4301
4302                            qpp_step_infos.push(QppStepInfo {
4303                                edge_type_ids: step_edge_type_ids,
4304                                direction: rel.direction.clone(),
4305                                target_label,
4306                            });
4307                        }
4308
4309                        // Deduplicate edge type IDs for adjacency warming
4310                        all_edge_type_ids.sort_unstable();
4311                        all_edge_type_ids.dedup();
4312
4313                        // Compute iteration bounds from range
4314                        let hops_per_iter = qpp_step_infos.len();
4315                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4316                        let (min_iter, max_iter) = if let Some(range) = range {
4317                            let min = range.min.unwrap_or(1) as usize;
4318                            let max = range
4319                                .max
4320                                .map(|m| m as usize)
4321                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4322                            (min, max)
4323                        } else {
4324                            (1, 1)
4325                        };
4326                        let min_hops = min_iter * hops_per_iter;
4327                        let max_hops = max_iter * hops_per_iter;
4328
4329                        // Target variable from the last node in the QPP sub-pattern
4330                        let target_variable = target_node
4331                            .variable
4332                            .clone()
4333                            .filter(|v| !v.is_empty())
4334                            .unwrap_or_else(|| self.next_anon_var());
4335
4336                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
4337
4338                        // Determine target label for the final node
4339                        let target_label_meta = target_node
4340                            .labels
4341                            .first()
4342                            .and_then(|l| self.schema.get_label_case_insensitive(l));
4343
4344                        // Collect scope match variables
4345                        let mut scope_match_variables: HashSet<String> = vars_in_scope
4346                            [vars_before_pattern..]
4347                            .iter()
4348                            .map(|v| v.name.clone())
4349                            .collect();
4350                        scope_match_variables.insert(target_variable.clone());
4351
4352                        // Handle bound target: use rebound variable for traverse
4353                        let rebound_target_var = if target_is_bound {
4354                            Some(target_variable.clone())
4355                        } else {
4356                            None
4357                        };
4358                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4359                            format!("__rebound_{}", bv)
4360                        } else {
4361                            target_variable.clone()
4362                        };
4363
4364                        plan = LogicalPlan::Traverse {
4365                            input: Box::new(plan),
4366                            edge_type_ids: all_edge_type_ids,
4367                            direction: qpp_rels[0].0.direction.clone(),
4368                            source_variable: source_variable.to_string(),
4369                            target_variable: effective_target_var.clone(),
4370                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4371                            step_variable: None, // QPP doesn't expose intermediate edges
4372                            min_hops,
4373                            max_hops,
4374                            optional,
4375                            target_filter: self.node_filter_expr(
4376                                &target_variable,
4377                                &target_node.labels,
4378                                &target_node.properties,
4379                            ),
4380                            path_variable: path_variable.clone(),
4381                            edge_properties: HashSet::new(),
4382                            is_variable_length: true,
4383                            optional_pattern_vars: optional_pattern_vars.clone(),
4384                            scope_match_variables,
4385                            edge_filter_expr: None,
4386                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4387                            qpp_steps: Some(qpp_step_infos),
4388                        };
4389
4390                        // Handle bound target: filter rebound results against original variable
4391                        if let Some(ref btv) = rebound_target_var {
4392                            // Filter: __rebound_x._vid = x._vid
4393                            let filter_pred = Expr::BinaryOp {
4394                                left: Box::new(Expr::Property(
4395                                    Box::new(Expr::Variable(effective_target_var.clone())),
4396                                    "_vid".to_string(),
4397                                )),
4398                                op: BinaryOp::Eq,
4399                                right: Box::new(Expr::Property(
4400                                    Box::new(Expr::Variable(btv.clone())),
4401                                    "_vid".to_string(),
4402                                )),
4403                            };
4404                            plan = LogicalPlan::Filter {
4405                                input: Box::new(plan),
4406                                predicate: filter_pred,
4407                                optional_variables: if optional {
4408                                    optional_pattern_vars.clone()
4409                                } else {
4410                                    HashSet::new()
4411                                },
4412                            };
4413                        }
4414
4415                        // Add target variable to scope
4416                        if !target_is_bound {
4417                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4418                        }
4419
4420                        // Add path variable to scope
4421                        if let Some(ref pv) = path_variable
4422                            && !pv.is_empty()
4423                            && !is_var_in_scope(vars_in_scope, pv)
4424                        {
4425                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4426                        }
4427                    }
4428                    had_traverses = true;
4429
4430                    // Skip the outer target node if we consumed it
4431                    if outer_target_node.is_some() {
4432                        i += 2; // skip both Parenthesized and the following Node
4433                    } else {
4434                        i += 1;
4435                    }
4436                }
4437            }
4438        }
4439
4440        // If this is a single-node pattern with a path variable, bind the zero-length path
4441        // E.g., `p = (a)` should create a Path with one node and zero edges
4442        if let Some(ref path_var) = path_variable
4443            && !path_var.is_empty()
4444            && !had_traverses
4445            && let Some(node_var) = single_node_variable
4446        {
4447            plan = LogicalPlan::BindZeroLengthPath {
4448                input: Box::new(plan),
4449                node_variable: node_var,
4450                path_variable: path_var.clone(),
4451            };
4452            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
4453        }
4454
4455        // Bind fixed-length path from collected node/edge variables
4456        if let Some(ref path_var) = path_variable
4457            && !path_var.is_empty()
4458            && had_traverses
4459            && !path_node_vars.is_empty()
4460            && !is_var_in_scope(vars_in_scope, path_var)
4461        {
4462            plan = LogicalPlan::BindPath {
4463                input: Box::new(plan),
4464                node_variables: path_node_vars,
4465                edge_variables: path_edge_vars,
4466                path_variable: path_var.clone(),
4467            };
4468            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
4469        }
4470
4471        Ok(plan)
4472    }
4473
4474    /// Plan a traverse with an explicit source variable name.
4475    ///
4476    /// Returns `(plan, target_variable, effective_target_variable)` where:
4477    /// - `target_variable` is the semantic variable name for downstream scope
4478    /// - `effective_target_variable` is the actual column-name prefix used by
4479    ///   the traverse (may be `__rebound_x` for bound-target patterns)
4480    fn plan_traverse_with_source(
4481        &self,
4482        plan: LogicalPlan,
4483        vars_in_scope: &mut Vec<VariableInfo>,
4484        params: TraverseParams<'_>,
4485        source_variable: &str,
4486        vars_before_pattern: usize,
4487        path_bound_edge_vars: &HashSet<String>,
4488    ) -> Result<(LogicalPlan, String, String)> {
4489        // Check for parameter used as relationship predicate
4490        if let Some(Expr::Parameter(_)) = &params.rel.properties {
4491            return Err(anyhow!(
4492                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
4493            ));
4494        }
4495
4496        let mut edge_type_ids = Vec::new();
4497        let mut dst_labels = Vec::new();
4498        let mut unknown_types = Vec::new();
4499
4500        if params.rel.types.is_empty() {
4501            // All types - include both schema and schemaless edge types
4502            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
4503            edge_type_ids = self.schema.all_edge_type_ids();
4504            for meta in self.schema.edge_types.values() {
4505                dst_labels.extend(meta.dst_labels.iter().cloned());
4506            }
4507        } else {
4508            for type_name in &params.rel.types {
4509                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4510                    // Known type - use standard Traverse with type_id
4511                    edge_type_ids.push(edge_meta.id);
4512                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
4513                } else {
4514                    // Unknown type - will use TraverseMainByType
4515                    unknown_types.push(type_name.clone());
4516                }
4517            }
4518        }
4519
4520        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
4521        edge_type_ids.sort_unstable();
4522        edge_type_ids.dedup();
4523        unknown_types.sort_unstable();
4524        unknown_types.dedup();
4525
4526        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
4527        if target_variable.is_empty() {
4528            target_variable = self.next_anon_var();
4529        }
4530        let target_is_bound =
4531            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
4532
4533        // Check for VariableTypeConflict: relationship variable used as node
4534        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
4535        if let Some(rel_var) = &params.rel.variable
4536            && !rel_var.is_empty()
4537            && rel_var == &target_variable
4538        {
4539            return Err(anyhow!(
4540                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
4541                rel_var
4542            ));
4543        }
4544
4545        // Check for VariableTypeConflict/RelationshipUniquenessViolation
4546        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
4547        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
4548        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
4549        let mut bound_edge_var: Option<String> = None;
4550        let mut bound_edge_list_var: Option<String> = None;
4551        if let Some(rel_var) = &params.rel.variable
4552            && !rel_var.is_empty()
4553            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
4554        {
4555            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
4556                .iter()
4557                .any(|v| v.name == *rel_var);
4558
4559            if info.var_type == VariableType::Edge {
4560                // Check if this edge variable comes from a previous clause (before this MATCH)
4561                if is_from_previous_clause {
4562                    // Edge variable bound from previous clause - this is allowed
4563                    // We'll filter the traversal to match this specific edge
4564                    bound_edge_var = Some(rel_var.clone());
4565                } else {
4566                    // Same relationship variable used twice in the same MATCH clause
4567                    return Err(anyhow!(
4568                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
4569                        rel_var
4570                    ));
4571                }
4572            } else if params.rel.range.is_some()
4573                && is_from_previous_clause
4574                && matches!(
4575                    info.var_type,
4576                    VariableType::Scalar | VariableType::ScalarLiteral
4577                )
4578            {
4579                // Allow VLP rebound against a previously bound relationship list
4580                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
4581                bound_edge_list_var = Some(rel_var.clone());
4582            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
4583                return Err(anyhow!(
4584                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
4585                    rel_var,
4586                    info.var_type
4587                ));
4588            }
4589        }
4590
4591        // Check for VariableTypeConflict: target node variable already bound as non-Node
4592        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
4593        if target_is_bound
4594            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4595            && !info.var_type.is_compatible_with(VariableType::Node)
4596        {
4597            return Err(anyhow!(
4598                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4599                target_variable,
4600                info.var_type
4601            ));
4602        }
4603
4604        // If all requested types are unknown (schemaless), use TraverseMainByType
4605        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
4606        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
4607        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
4608            // All types are unknown - use schemaless traversal
4609
4610            let is_variable_length = params.rel.range.is_some();
4611
4612            const DEFAULT_MAX_HOPS: usize = 100;
4613            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
4614                let min = range.min.unwrap_or(1) as usize;
4615                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
4616                (min, max)
4617            } else {
4618                (1, 1)
4619            };
4620
4621            // For both single-hop and variable-length paths:
4622            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
4623            //   Single-hop: step_var holds a single edge object
4624            //   VLP: step_var holds a list of edge objects
4625            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
4626            let step_var = params.rel.variable.clone();
4627            let path_var = params.path_variable.clone();
4628
4629            // Compute scope_match_variables for relationship uniqueness scoping.
4630            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
4631                .iter()
4632                .map(|v| v.name.clone())
4633                .collect();
4634            if let Some(ref sv) = step_var {
4635                // Only add the step variable to scope if it's NOT rebound from a previous clause.
4636                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
4637                // filtering because the second MATCH intentionally reuses the same edge.
4638                if bound_edge_var.is_none() {
4639                    scope_match_variables.insert(sv.clone());
4640                }
4641            }
4642            scope_match_variables.insert(target_variable.clone());
4643            // Include bound edge variables from this path for cross-segment Trail mode
4644            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
4645            // edges already claimed by a bound relationship [r] in the same path.
4646            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
4647            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
4648            scope_match_variables.extend(
4649                path_bound_edge_vars
4650                    .iter()
4651                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
4652                    .cloned(),
4653            );
4654
4655            let mut plan = LogicalPlan::TraverseMainByType {
4656                type_names: unknown_types,
4657                input: Box::new(plan),
4658                direction: params.rel.direction.clone(),
4659                source_variable: source_variable.to_string(),
4660                target_variable: target_variable.clone(),
4661                step_variable: step_var.clone(),
4662                min_hops,
4663                max_hops,
4664                optional: params.optional,
4665                target_filter: self.node_filter_expr(
4666                    &target_variable,
4667                    &params.target_node.labels,
4668                    &params.target_node.properties,
4669                ),
4670                path_variable: path_var.clone(),
4671                is_variable_length,
4672                optional_pattern_vars: params.optional_pattern_vars.clone(),
4673                scope_match_variables,
4674                edge_filter_expr: if is_variable_length {
4675                    let filter_var = step_var
4676                        .clone()
4677                        .unwrap_or_else(|| "__anon_edge".to_string());
4678                    self.properties_to_expr(&filter_var, &params.rel.properties)
4679                } else {
4680                    None
4681                },
4682                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4683            };
4684
4685            // Only apply bound target filter for Imported variables (from outer scope/subquery).
4686            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
4687            // uses Parameter which requires the value to be in params (subquery context).
4688            if target_is_bound
4689                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4690                && info.var_type == VariableType::Imported
4691            {
4692                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
4693            }
4694
4695            // Apply relationship property predicates for fixed-length schemaless
4696            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
4697            // For VLP, predicates are stored inline in edge_filter_expr (above).
4698            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
4699            if !is_variable_length
4700                && let Some(edge_var_name) = step_var.as_ref()
4701                && let Some(edge_prop_filter) =
4702                    self.properties_to_expr(edge_var_name, &params.rel.properties)
4703            {
4704                let filter_optional_vars = if params.optional {
4705                    params.optional_pattern_vars.clone()
4706                } else {
4707                    HashSet::new()
4708                };
4709                plan = LogicalPlan::Filter {
4710                    input: Box::new(plan),
4711                    predicate: edge_prop_filter,
4712                    optional_variables: filter_optional_vars,
4713                };
4714            }
4715
4716            // Add the bound variables to scope
4717            if let Some(sv) = &step_var {
4718                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
4719                if is_variable_length
4720                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
4721                {
4722                    info.is_vlp = true;
4723                }
4724            }
4725            if let Some(pv) = &path_var
4726                && !is_var_in_scope(vars_in_scope, pv)
4727            {
4728                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4729            }
4730            if !is_var_in_scope(vars_in_scope, &target_variable) {
4731                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4732            }
4733
4734            return Ok((plan, target_variable.clone(), target_variable));
4735        }
4736
4737        // If we have a mix of known and unknown types, error for now
4738        // (could be extended to Union of Traverse + TraverseMainByType)
4739        if !unknown_types.is_empty() {
4740            return Err(anyhow!(
4741                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
4742                unknown_types
4743            ));
4744        }
4745
4746        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
4747            // Use first label for target_label_id
4748            // For schemaless support, allow unknown target labels
4749            self.schema.get_label_case_insensitive(label_name)
4750        } else if !target_is_bound {
4751            // Infer from edge type(s)
4752            let unique_dsts: Vec<_> = dst_labels
4753                .into_iter()
4754                .collect::<HashSet<_>>()
4755                .into_iter()
4756                .collect();
4757            if unique_dsts.len() == 1 {
4758                let label_name = &unique_dsts[0];
4759                self.schema.get_label_case_insensitive(label_name)
4760            } else {
4761                // Multiple or no destination labels inferred - allow any target
4762                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
4763                // where the edge type can connect to multiple labels
4764                None
4765            }
4766        } else {
4767            None
4768        };
4769
4770        // Check if this is a variable-length pattern (has range specifier like *1..3)
4771        let is_variable_length = params.rel.range.is_some();
4772
4773        // For VLP patterns, default min to 1 and max to a reasonable limit.
4774        // For single-hop patterns (no range), both are 1.
4775        const DEFAULT_MAX_HOPS: usize = 100;
4776        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
4777            let min = range.min.unwrap_or(1) as usize;
4778            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
4779            (min, max)
4780        } else {
4781            (1, 1)
4782        };
4783
4784        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
4785        //   Single-hop: step_var holds a single edge object
4786        //   VLP: step_var holds a list of edge objects
4787        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
4788        let step_var = params.rel.variable.clone();
4789        let path_var = params.path_variable.clone();
4790
4791        // If we have a bound edge variable from a previous clause, use a temp variable
4792        // for the Traverse step, then filter to match the bound edge
4793        let rebound_var = bound_edge_var
4794            .as_ref()
4795            .or(bound_edge_list_var.as_ref())
4796            .cloned();
4797        let effective_step_var = if let Some(ref bv) = rebound_var {
4798            Some(format!("__rebound_{}", bv))
4799        } else {
4800            step_var.clone()
4801        };
4802
4803        // If we have a bound target variable from a previous clause (e.g. WITH),
4804        // use a temp variable for the Traverse step, then filter to match the bound
4805        // target — mirroring the bound edge pattern above.
4806        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
4807            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
4808                .map(|info| info.var_type == VariableType::Imported)
4809                .unwrap_or(false);
4810            if !is_imported {
4811                Some(target_variable.clone())
4812            } else {
4813                None
4814            }
4815        } else {
4816            None
4817        };
4818
4819        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4820            format!("__rebound_{}", bv)
4821        } else {
4822            target_variable.clone()
4823        };
4824
4825        // Collect all variables (node + edge) from the current MATCH clause scope
4826        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
4827        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
4828        // if their associated variable is in this set. This prevents relationship
4829        // uniqueness from being enforced across disconnected MATCH clauses.
4830        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
4831            .iter()
4832            .map(|v| v.name.clone())
4833            .collect();
4834        // Include the current traverse's edge variable (not yet added to vars_in_scope)
4835        if let Some(ref sv) = effective_step_var {
4836            scope_match_variables.insert(sv.clone());
4837        }
4838        // Include the target variable (not yet added to vars_in_scope)
4839        scope_match_variables.insert(effective_target_var.clone());
4840        // Include bound edge variables from this path for cross-segment Trail mode
4841        // enforcement (same as the schemaless path above).
4842        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
4843
4844        let mut plan = LogicalPlan::Traverse {
4845            input: Box::new(plan),
4846            edge_type_ids,
4847            direction: params.rel.direction.clone(),
4848            source_variable: source_variable.to_string(),
4849            target_variable: effective_target_var.clone(),
4850            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4851            step_variable: effective_step_var.clone(),
4852            min_hops,
4853            max_hops,
4854            optional: params.optional,
4855            target_filter: self.node_filter_expr(
4856                &target_variable,
4857                &params.target_node.labels,
4858                &params.target_node.properties,
4859            ),
4860            path_variable: path_var.clone(),
4861            edge_properties: HashSet::new(),
4862            is_variable_length,
4863            optional_pattern_vars: params.optional_pattern_vars.clone(),
4864            scope_match_variables,
4865            edge_filter_expr: if is_variable_length {
4866                // Use the step variable name, or a fallback for anonymous edges.
4867                // The variable name is used by properties_to_expr to build
4868                // `var.prop = value` expressions. For BFS property checking,
4869                // only the property name and value matter (the variable name
4870                // is stripped during extraction).
4871                let filter_var = effective_step_var
4872                    .clone()
4873                    .unwrap_or_else(|| "__anon_edge".to_string());
4874                self.properties_to_expr(&filter_var, &params.rel.properties)
4875            } else {
4876                None
4877            },
4878            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4879            qpp_steps: None,
4880        };
4881
4882        // Pre-compute optional variables set for filter nodes in this traverse.
4883        // Used by relationship property filters and bound-edge filters below.
4884        let filter_optional_vars = if params.optional {
4885            params.optional_pattern_vars.clone()
4886        } else {
4887            HashSet::new()
4888        };
4889
4890        // Apply relationship property predicates (e.g. [r {k: v}]).
4891        // For VLP, predicates are stored inline in edge_filter_expr (above).
4892        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
4893        if !is_variable_length
4894            && let Some(edge_var_name) = effective_step_var.as_ref()
4895            && let Some(edge_prop_filter) =
4896                self.properties_to_expr(edge_var_name, &params.rel.properties)
4897        {
4898            plan = LogicalPlan::Filter {
4899                input: Box::new(plan),
4900                predicate: edge_prop_filter,
4901                optional_variables: filter_optional_vars.clone(),
4902            };
4903        }
4904
4905        // Only apply bound target filter for Imported variables (from outer scope/subquery).
4906        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
4907        // uses Parameter which requires the value to be in params (subquery context).
4908        if target_is_bound
4909            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4910            && info.var_type == VariableType::Imported
4911        {
4912            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
4913        }
4914
4915        // If we have a bound edge variable, add a filter to match it
4916        if let Some(ref bv) = bound_edge_var {
4917            let temp_var = format!("__rebound_{}", bv);
4918            let bound_check = Expr::BinaryOp {
4919                left: Box::new(Expr::Property(
4920                    Box::new(Expr::Variable(temp_var)),
4921                    "_eid".to_string(),
4922                )),
4923                op: BinaryOp::Eq,
4924                right: Box::new(Expr::Property(
4925                    Box::new(Expr::Variable(bv.clone())),
4926                    "_eid".to_string(),
4927                )),
4928            };
4929            plan = LogicalPlan::Filter {
4930                input: Box::new(plan),
4931                predicate: bound_check,
4932                optional_variables: filter_optional_vars.clone(),
4933            };
4934        }
4935
4936        // If we have a bound relationship list variable for a VLP pattern,
4937        // add a filter to match the traversed relationship list exactly.
4938        if let Some(ref bv) = bound_edge_list_var {
4939            let temp_var = format!("__rebound_{}", bv);
4940            let temp_eids = Expr::ListComprehension {
4941                variable: "__rebound_edge".to_string(),
4942                list: Box::new(Expr::Variable(temp_var)),
4943                where_clause: None,
4944                map_expr: Box::new(Expr::FunctionCall {
4945                    name: "toInteger".to_string(),
4946                    args: vec![Expr::Property(
4947                        Box::new(Expr::Variable("__rebound_edge".to_string())),
4948                        "_eid".to_string(),
4949                    )],
4950                    distinct: false,
4951                    window_spec: None,
4952                }),
4953            };
4954            let bound_eids = Expr::ListComprehension {
4955                variable: "__bound_edge".to_string(),
4956                list: Box::new(Expr::Variable(bv.clone())),
4957                where_clause: None,
4958                map_expr: Box::new(Expr::FunctionCall {
4959                    name: "toInteger".to_string(),
4960                    args: vec![Expr::Property(
4961                        Box::new(Expr::Variable("__bound_edge".to_string())),
4962                        "_eid".to_string(),
4963                    )],
4964                    distinct: false,
4965                    window_spec: None,
4966                }),
4967            };
4968            let bound_list_check = Expr::BinaryOp {
4969                left: Box::new(temp_eids),
4970                op: BinaryOp::Eq,
4971                right: Box::new(bound_eids),
4972            };
4973            plan = LogicalPlan::Filter {
4974                input: Box::new(plan),
4975                predicate: bound_list_check,
4976                optional_variables: filter_optional_vars.clone(),
4977            };
4978        }
4979
4980        // If we have a bound target variable (non-imported), add a filter to constrain
4981        // the traversal output to match the previously bound target node.
4982        if let Some(ref bv) = rebound_target_var {
4983            let temp_var = format!("__rebound_{}", bv);
4984            let bound_check = Expr::BinaryOp {
4985                left: Box::new(Expr::Property(
4986                    Box::new(Expr::Variable(temp_var.clone())),
4987                    "_vid".to_string(),
4988                )),
4989                op: BinaryOp::Eq,
4990                right: Box::new(Expr::Property(
4991                    Box::new(Expr::Variable(bv.clone())),
4992                    "_vid".to_string(),
4993                )),
4994            };
4995            // For OPTIONAL MATCH, include the rebound variable in optional_variables
4996            // so that OptionalFilterExec excludes it from the grouping key and
4997            // properly nullifies it in recovery rows when all matches are filtered out.
4998            // Without this, each traverse result creates its own group (keyed by
4999            // __rebound_c._vid), and null-row recovery emits a spurious null row
5000            // for every non-matching target instead of one per source group.
5001            let mut rebound_filter_vars = filter_optional_vars;
5002            if params.optional {
5003                rebound_filter_vars.insert(temp_var);
5004            }
5005            plan = LogicalPlan::Filter {
5006                input: Box::new(plan),
5007                predicate: bound_check,
5008                optional_variables: rebound_filter_vars,
5009            };
5010        }
5011
5012        // Add the bound variables to scope
5013        // Skip adding the edge variable if it's already bound from a previous clause
5014        if let Some(sv) = &step_var
5015            && bound_edge_var.is_none()
5016            && bound_edge_list_var.is_none()
5017        {
5018            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5019            if is_variable_length
5020                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5021            {
5022                info.is_vlp = true;
5023            }
5024        }
5025        if let Some(pv) = &path_var
5026            && !is_var_in_scope(vars_in_scope, pv)
5027        {
5028            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5029        }
5030        if !is_var_in_scope(vars_in_scope, &target_variable) {
5031            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5032        }
5033
5034        Ok((plan, target_variable, effective_target_var))
5035    }
5036
5037    /// Combine a new scan plan with an existing plan.
5038    ///
5039    /// If the existing plan is `Empty`, returns the new plan directly.
5040    /// Otherwise, wraps them in a `CrossJoin`.
5041    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5042        if matches!(existing, LogicalPlan::Empty) {
5043            new
5044        } else {
5045            LogicalPlan::CrossJoin {
5046                left: Box::new(existing),
5047                right: Box::new(new),
5048            }
5049        }
5050    }
5051
5052    /// Split node map predicates into scan-pushable and residual filters.
5053    ///
5054    /// A predicate is scan-pushable when its value expression references only
5055    /// the node variable itself (or no variables). Predicates referencing other
5056    /// in-scope variables (correlated predicates) are returned as residual so
5057    /// they can be applied after joining with the existing plan.
5058    fn split_node_property_filters_for_scan(
5059        &self,
5060        variable: &str,
5061        properties: &Option<Expr>,
5062    ) -> (Option<Expr>, Option<Expr>) {
5063        let entries = match properties {
5064            Some(Expr::Map(entries)) => entries,
5065            _ => return (None, None),
5066        };
5067
5068        if entries.is_empty() {
5069            return (None, None);
5070        }
5071
5072        let mut pushdown_entries = Vec::new();
5073        let mut residual_entries = Vec::new();
5074
5075        for (prop, val_expr) in entries {
5076            let vars = collect_expr_variables(val_expr);
5077            if vars.iter().all(|v| v == variable) {
5078                pushdown_entries.push((prop.clone(), val_expr.clone()));
5079            } else {
5080                residual_entries.push((prop.clone(), val_expr.clone()));
5081            }
5082        }
5083
5084        let pushdown_map = if pushdown_entries.is_empty() {
5085            None
5086        } else {
5087            Some(Expr::Map(pushdown_entries))
5088        };
5089        let residual_map = if residual_entries.is_empty() {
5090            None
5091        } else {
5092            Some(Expr::Map(residual_entries))
5093        };
5094
5095        (
5096            self.properties_to_expr(variable, &pushdown_map),
5097            self.properties_to_expr(variable, &residual_map),
5098        )
5099    }
5100
5101    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5102    fn plan_unbound_node(
5103        &self,
5104        node: &NodePattern,
5105        variable: &str,
5106        plan: LogicalPlan,
5107        optional: bool,
5108    ) -> Result<LogicalPlan> {
5109        // Properties handling
5110        let properties = match &node.properties {
5111            Some(Expr::Map(entries)) => entries.as_slice(),
5112            Some(Expr::Parameter(_)) => {
5113                return Err(anyhow!(
5114                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5115                ));
5116            }
5117            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5118            None => &[],
5119        };
5120
5121        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5122
5123        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5124            if let Some(predicate) = residual {
5125                LogicalPlan::Filter {
5126                    input: Box::new(input),
5127                    predicate,
5128                    optional_variables: HashSet::new(),
5129                }
5130            } else {
5131                input
5132            }
5133        };
5134
5135        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5136            self.split_node_property_filters_for_scan(variable, &node.properties)
5137        } else {
5138            (self.properties_to_expr(variable, &node.properties), None)
5139        };
5140
5141        // Check for ext_id in properties when no label is specified
5142        if node.labels.is_empty() {
5143            // Try to find ext_id property for main table lookup
5144            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5145                // Extract the ext_id value as a string
5146                let ext_id = match ext_id_value {
5147                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5148                    _ => {
5149                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5150                    }
5151                };
5152
5153                // Build filter for remaining properties (excluding ext_id)
5154                let remaining_props: Vec<_> = properties
5155                    .iter()
5156                    .filter(|(k, _)| k != "ext_id")
5157                    .cloned()
5158                    .collect();
5159
5160                let remaining_expr = if remaining_props.is_empty() {
5161                    None
5162                } else {
5163                    Some(Expr::Map(remaining_props))
5164                };
5165
5166                let (prop_filter, residual_filter) = if has_existing_scope {
5167                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5168                } else {
5169                    (self.properties_to_expr(variable, &remaining_expr), None)
5170                };
5171
5172                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5173                    variable: variable.to_string(),
5174                    ext_id,
5175                    filter: prop_filter,
5176                    optional,
5177                };
5178
5179                let joined = Self::join_with_plan(plan, ext_id_lookup);
5180                return Ok(apply_residual_filter(joined, residual_filter));
5181            }
5182
5183            // No ext_id: create ScanAll for unlabeled node pattern
5184            let scan_all = LogicalPlan::ScanAll {
5185                variable: variable.to_string(),
5186                filter: node_scan_filter,
5187                optional,
5188            };
5189
5190            let joined = Self::join_with_plan(plan, scan_all);
5191            return Ok(apply_residual_filter(joined, node_residual_filter));
5192        }
5193
5194        // Use first label for label_id (primary label for dataset selection)
5195        let label_name = &node.labels[0];
5196
5197        // Check if label exists in schema
5198        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
5199            // Known label: use standard Scan
5200            let scan = LogicalPlan::Scan {
5201                label_id: label_meta.id,
5202                labels: node.labels.clone(),
5203                variable: variable.to_string(),
5204                filter: node_scan_filter,
5205                optional,
5206            };
5207
5208            let joined = Self::join_with_plan(plan, scan);
5209            Ok(apply_residual_filter(joined, node_residual_filter))
5210        } else {
5211            // Unknown label: use ScanMainByLabels for schemaless support
5212            let scan_main = LogicalPlan::ScanMainByLabels {
5213                labels: node.labels.clone(),
5214                variable: variable.to_string(),
5215                filter: node_scan_filter,
5216                optional,
5217            };
5218
5219            let joined = Self::join_with_plan(plan, scan_main);
5220            Ok(apply_residual_filter(joined, node_residual_filter))
5221        }
5222    }
5223
5224    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
5225    ///
5226    /// When `optional_vars` is non-empty, the Filter will preserve rows where
5227    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
5228    fn plan_where_clause(
5229        &self,
5230        predicate: &Expr,
5231        plan: LogicalPlan,
5232        vars_in_scope: &[VariableInfo],
5233        optional_vars: HashSet<String>,
5234    ) -> Result<LogicalPlan> {
5235        // Validate no aggregation functions in WHERE clause
5236        validate_no_aggregation_in_where(predicate)?;
5237
5238        // Validate all variables used are in scope
5239        validate_expression_variables(predicate, vars_in_scope)?;
5240
5241        // Validate expression types (function args, boolean operators)
5242        validate_expression(predicate, vars_in_scope)?;
5243
5244        // Check that WHERE predicate isn't a bare node/edge/path variable
5245        if let Expr::Variable(var_name) = predicate
5246            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
5247            && matches!(
5248                info.var_type,
5249                VariableType::Node | VariableType::Edge | VariableType::Path
5250            )
5251        {
5252            return Err(anyhow!(
5253                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
5254                info.var_type
5255            ));
5256        }
5257
5258        let mut plan = plan;
5259
5260        // Transform VALID_AT macro to function call
5261        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
5262
5263        let mut current_predicate =
5264            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
5265
5266        // 1. Try to extract vector_similarity predicate for optimization
5267        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
5268            let vs = &extraction.predicate;
5269            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
5270                plan = Self::replace_scan_with_knn(
5271                    plan,
5272                    &vs.variable,
5273                    &vs.property,
5274                    vs.query.clone(),
5275                    vs.threshold,
5276                );
5277                if let Some(residual) = extraction.residual {
5278                    current_predicate = residual;
5279                } else {
5280                    current_predicate = Expr::TRUE;
5281                }
5282            }
5283        }
5284
5285        // 3. Push eligible predicates to Scan OR Traverse filters
5286        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
5287        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
5288        // rows. Let them stay in the Filter operator which handles NULL preservation.
5289        for var in vars_in_scope {
5290            // Skip pushdown for optional variables - they need NULL preservation in Filter
5291            if optional_vars.contains(&var.name) {
5292                continue;
5293            }
5294
5295            // Check if var is produced by a Scan
5296            if Self::find_scan_label_id(&plan, &var.name).is_some() {
5297                let (pushable, residual) =
5298                    Self::extract_variable_predicates(&current_predicate, &var.name);
5299
5300                for pred in pushable {
5301                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
5302                }
5303
5304                if let Some(r) = residual {
5305                    current_predicate = r;
5306                } else {
5307                    current_predicate = Expr::TRUE;
5308                }
5309            } else if Self::is_traverse_target(&plan, &var.name) {
5310                // Push to Traverse
5311                let (pushable, residual) =
5312                    Self::extract_variable_predicates(&current_predicate, &var.name);
5313
5314                for pred in pushable {
5315                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
5316                }
5317
5318                if let Some(r) = residual {
5319                    current_predicate = r;
5320                } else {
5321                    current_predicate = Expr::TRUE;
5322                }
5323            }
5324        }
5325
5326        // 4. Push predicates to Apply.input_filter
5327        // This filters input rows BEFORE executing correlated subqueries.
5328        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
5329
5330        // 5. Add Filter node for any remaining predicates
5331        if !current_predicate.is_true_literal() {
5332            plan = LogicalPlan::Filter {
5333                input: Box::new(plan),
5334                predicate: current_predicate,
5335                optional_variables: optional_vars,
5336            };
5337        }
5338
5339        Ok(plan)
5340    }
5341
5342    fn rewrite_predicates_using_indexes(
5343        &self,
5344        predicate: &Expr,
5345        plan: &LogicalPlan,
5346        vars_in_scope: &[VariableInfo],
5347    ) -> Result<Expr> {
5348        let mut rewritten = predicate.clone();
5349
5350        for var in vars_in_scope {
5351            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
5352                // Find label name
5353                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
5354
5355                if let Some(label) = label_name
5356                    && let Some(props) = self.schema.properties.get(&label)
5357                {
5358                    for (gen_col, meta) in props {
5359                        if meta.generation_expression.is_some() {
5360                            // Use cached parsed expression
5361                            if let Some(schema_expr) =
5362                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
5363                            {
5364                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
5365                                rewritten = Self::replace_expression(
5366                                    rewritten,
5367                                    schema_expr,
5368                                    &var.name,
5369                                    gen_col,
5370                                );
5371                            }
5372                        }
5373                    }
5374                }
5375            }
5376        }
5377        Ok(rewritten)
5378    }
5379
5380    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
5381        // First, normalize schema_expr to use query_var
5382        let schema_var = schema_expr.extract_variable();
5383
5384        if let Some(s_var) = schema_var {
5385            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
5386
5387            if expr == target_expr {
5388                return Expr::Property(
5389                    Box::new(Expr::Variable(query_var.to_string())),
5390                    gen_col.to_string(),
5391                );
5392            }
5393        }
5394
5395        // Recurse
5396        match expr {
5397            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
5398                left: Box::new(Self::replace_expression(
5399                    *left,
5400                    schema_expr,
5401                    query_var,
5402                    gen_col,
5403                )),
5404                op,
5405                right: Box::new(Self::replace_expression(
5406                    *right,
5407                    schema_expr,
5408                    query_var,
5409                    gen_col,
5410                )),
5411            },
5412            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
5413                op,
5414                expr: Box::new(Self::replace_expression(
5415                    *expr,
5416                    schema_expr,
5417                    query_var,
5418                    gen_col,
5419                )),
5420            },
5421            Expr::FunctionCall {
5422                name,
5423                args,
5424                distinct,
5425                window_spec,
5426            } => Expr::FunctionCall {
5427                name,
5428                args: args
5429                    .into_iter()
5430                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
5431                    .collect(),
5432                distinct,
5433                window_spec,
5434            },
5435            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
5436                *expr,
5437                schema_expr,
5438                query_var,
5439                gen_col,
5440            ))),
5441            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
5442                *expr,
5443                schema_expr,
5444                query_var,
5445                gen_col,
5446            ))),
5447            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
5448                *expr,
5449                schema_expr,
5450                query_var,
5451                gen_col,
5452            ))),
5453            Expr::ArrayIndex {
5454                array: e,
5455                index: idx,
5456            } => Expr::ArrayIndex {
5457                array: Box::new(Self::replace_expression(
5458                    *e,
5459                    schema_expr,
5460                    query_var,
5461                    gen_col,
5462                )),
5463                index: Box::new(Self::replace_expression(
5464                    *idx,
5465                    schema_expr,
5466                    query_var,
5467                    gen_col,
5468                )),
5469            },
5470            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
5471                array: Box::new(Self::replace_expression(
5472                    *array,
5473                    schema_expr,
5474                    query_var,
5475                    gen_col,
5476                )),
5477                start: start.map(|s| {
5478                    Box::new(Self::replace_expression(
5479                        *s,
5480                        schema_expr,
5481                        query_var,
5482                        gen_col,
5483                    ))
5484                }),
5485                end: end.map(|e| {
5486                    Box::new(Self::replace_expression(
5487                        *e,
5488                        schema_expr,
5489                        query_var,
5490                        gen_col,
5491                    ))
5492                }),
5493            },
5494            Expr::List(exprs) => Expr::List(
5495                exprs
5496                    .into_iter()
5497                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
5498                    .collect(),
5499            ),
5500            Expr::Map(entries) => Expr::Map(
5501                entries
5502                    .into_iter()
5503                    .map(|(k, v)| {
5504                        (
5505                            k,
5506                            Self::replace_expression(v, schema_expr, query_var, gen_col),
5507                        )
5508                    })
5509                    .collect(),
5510            ),
5511            Expr::Property(e, prop) => Expr::Property(
5512                Box::new(Self::replace_expression(
5513                    *e,
5514                    schema_expr,
5515                    query_var,
5516                    gen_col,
5517                )),
5518                prop,
5519            ),
5520            Expr::Case {
5521                expr: case_expr,
5522                when_then,
5523                else_expr,
5524            } => Expr::Case {
5525                expr: case_expr.map(|e| {
5526                    Box::new(Self::replace_expression(
5527                        *e,
5528                        schema_expr,
5529                        query_var,
5530                        gen_col,
5531                    ))
5532                }),
5533                when_then: when_then
5534                    .into_iter()
5535                    .map(|(w, t)| {
5536                        (
5537                            Self::replace_expression(w, schema_expr, query_var, gen_col),
5538                            Self::replace_expression(t, schema_expr, query_var, gen_col),
5539                        )
5540                    })
5541                    .collect(),
5542                else_expr: else_expr.map(|e| {
5543                    Box::new(Self::replace_expression(
5544                        *e,
5545                        schema_expr,
5546                        query_var,
5547                        gen_col,
5548                    ))
5549                }),
5550            },
5551            Expr::Reduce {
5552                accumulator,
5553                init,
5554                variable: reduce_var,
5555                list,
5556                expr: reduce_expr,
5557            } => Expr::Reduce {
5558                accumulator,
5559                init: Box::new(Self::replace_expression(
5560                    *init,
5561                    schema_expr,
5562                    query_var,
5563                    gen_col,
5564                )),
5565                variable: reduce_var,
5566                list: Box::new(Self::replace_expression(
5567                    *list,
5568                    schema_expr,
5569                    query_var,
5570                    gen_col,
5571                )),
5572                expr: Box::new(Self::replace_expression(
5573                    *reduce_expr,
5574                    schema_expr,
5575                    query_var,
5576                    gen_col,
5577                )),
5578            },
5579
5580            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
5581            _ => expr,
5582        }
5583    }
5584
5585    /// Check if the variable is the target of a Traverse node
5586    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
5587        match plan {
5588            LogicalPlan::Traverse {
5589                target_variable,
5590                input,
5591                ..
5592            } => target_variable == variable || Self::is_traverse_target(input, variable),
5593            LogicalPlan::Filter { input, .. }
5594            | LogicalPlan::Project { input, .. }
5595            | LogicalPlan::Sort { input, .. }
5596            | LogicalPlan::Limit { input, .. }
5597            | LogicalPlan::Aggregate { input, .. }
5598            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
5599            LogicalPlan::CrossJoin { left, right } => {
5600                Self::is_traverse_target(left, variable)
5601                    || Self::is_traverse_target(right, variable)
5602            }
5603            _ => false,
5604        }
5605    }
5606
5607    /// Push a predicate into a Traverse's target_filter for the specified variable
5608    fn push_predicate_to_traverse(
5609        plan: LogicalPlan,
5610        variable: &str,
5611        predicate: Expr,
5612    ) -> LogicalPlan {
5613        match plan {
5614            LogicalPlan::Traverse {
5615                input,
5616                edge_type_ids,
5617                direction,
5618                source_variable,
5619                target_variable,
5620                target_label_id,
5621                step_variable,
5622                min_hops,
5623                max_hops,
5624                optional,
5625                target_filter,
5626                path_variable,
5627                edge_properties,
5628                is_variable_length,
5629                optional_pattern_vars,
5630                scope_match_variables,
5631                edge_filter_expr,
5632                path_mode,
5633                qpp_steps,
5634            } => {
5635                if target_variable == variable {
5636                    // Found the traverse producing this variable
5637                    let new_filter = match target_filter {
5638                        Some(existing) => Some(Expr::BinaryOp {
5639                            left: Box::new(existing),
5640                            op: BinaryOp::And,
5641                            right: Box::new(predicate),
5642                        }),
5643                        None => Some(predicate),
5644                    };
5645                    LogicalPlan::Traverse {
5646                        input,
5647                        edge_type_ids,
5648                        direction,
5649                        source_variable,
5650                        target_variable,
5651                        target_label_id,
5652                        step_variable,
5653                        min_hops,
5654                        max_hops,
5655                        optional,
5656                        target_filter: new_filter,
5657                        path_variable,
5658                        edge_properties,
5659                        is_variable_length,
5660                        optional_pattern_vars,
5661                        scope_match_variables,
5662                        edge_filter_expr,
5663                        path_mode,
5664                        qpp_steps,
5665                    }
5666                } else {
5667                    // Recurse into input
5668                    LogicalPlan::Traverse {
5669                        input: Box::new(Self::push_predicate_to_traverse(
5670                            *input, variable, predicate,
5671                        )),
5672                        edge_type_ids,
5673                        direction,
5674                        source_variable,
5675                        target_variable,
5676                        target_label_id,
5677                        step_variable,
5678                        min_hops,
5679                        max_hops,
5680                        optional,
5681                        target_filter,
5682                        path_variable,
5683                        edge_properties,
5684                        is_variable_length,
5685                        optional_pattern_vars,
5686                        scope_match_variables,
5687                        edge_filter_expr,
5688                        path_mode,
5689                        qpp_steps,
5690                    }
5691                }
5692            }
5693            LogicalPlan::Filter {
5694                input,
5695                predicate: p,
5696                optional_variables: opt_vars,
5697            } => LogicalPlan::Filter {
5698                input: Box::new(Self::push_predicate_to_traverse(
5699                    *input, variable, predicate,
5700                )),
5701                predicate: p,
5702                optional_variables: opt_vars,
5703            },
5704            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
5705                input: Box::new(Self::push_predicate_to_traverse(
5706                    *input, variable, predicate,
5707                )),
5708                projections,
5709            },
5710            LogicalPlan::CrossJoin { left, right } => {
5711                // Check which side has the variable
5712                if Self::is_traverse_target(&left, variable) {
5713                    LogicalPlan::CrossJoin {
5714                        left: Box::new(Self::push_predicate_to_traverse(
5715                            *left, variable, predicate,
5716                        )),
5717                        right,
5718                    }
5719                } else {
5720                    LogicalPlan::CrossJoin {
5721                        left,
5722                        right: Box::new(Self::push_predicate_to_traverse(
5723                            *right, variable, predicate,
5724                        )),
5725                    }
5726                }
5727            }
5728            other => other,
5729        }
5730    }
5731
5732    /// Plan a WITH clause, handling aggregations and projections.
5733    fn plan_with_clause(
5734        &self,
5735        with_clause: &WithClause,
5736        plan: LogicalPlan,
5737        vars_in_scope: &[VariableInfo],
5738    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
5739        let mut plan = plan;
5740        let mut group_by: Vec<Expr> = Vec::new();
5741        let mut aggregates: Vec<Expr> = Vec::new();
5742        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
5743        let mut has_agg = false;
5744        let mut projections = Vec::new();
5745        let mut new_vars: Vec<VariableInfo> = Vec::new();
5746        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
5747        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
5748        let mut projected_aliases: HashSet<String> = HashSet::new();
5749        let mut has_unaliased_non_variable_expr = false;
5750
5751        for item in &with_clause.items {
5752            match item {
5753                ReturnItem::All => {
5754                    // WITH * - add all variables in scope
5755                    for v in vars_in_scope {
5756                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
5757                        projected_aliases.insert(v.name.clone());
5758                        projected_simple_reprs.insert(v.name.clone());
5759                    }
5760                    new_vars.extend(vars_in_scope.iter().cloned());
5761                }
5762                ReturnItem::Expr { expr, alias, .. } => {
5763                    if matches!(expr, Expr::Wildcard) {
5764                        for v in vars_in_scope {
5765                            projections
5766                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
5767                            projected_aliases.insert(v.name.clone());
5768                            projected_simple_reprs.insert(v.name.clone());
5769                        }
5770                        new_vars.extend(vars_in_scope.iter().cloned());
5771                    } else {
5772                        // Validate expression variables and syntax
5773                        validate_expression_variables(expr, vars_in_scope)?;
5774                        validate_expression(expr, vars_in_scope)?;
5775                        // Pattern predicates are not allowed in WITH
5776                        if contains_pattern_predicate(expr) {
5777                            return Err(anyhow!(
5778                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
5779                            ));
5780                        }
5781
5782                        projections.push((expr.clone(), alias.clone()));
5783                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
5784                            // Bare aggregate — push directly
5785                            has_agg = true;
5786                            aggregates.push(expr.clone());
5787                            projected_aggregate_reprs.insert(expr.to_string_repr());
5788                        } else if !is_window_function(expr)
5789                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
5790                        {
5791                            // Compound aggregate or expression containing aggregates
5792                            has_agg = true;
5793                            compound_agg_exprs.push(expr.clone());
5794                            for inner in extract_inner_aggregates(expr) {
5795                                let repr = inner.to_string_repr();
5796                                if !projected_aggregate_reprs.contains(&repr) {
5797                                    aggregates.push(inner);
5798                                    projected_aggregate_reprs.insert(repr);
5799                                }
5800                            }
5801                        } else if !group_by.contains(expr) {
5802                            group_by.push(expr.clone());
5803                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
5804                                projected_simple_reprs.insert(expr.to_string_repr());
5805                            }
5806                        }
5807
5808                        // Preserve non-scalar type information when WITH aliases
5809                        // entity/path-capable expressions.
5810                        if let Some(a) = alias {
5811                            if projected_aliases.contains(a) {
5812                                return Err(anyhow!(
5813                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
5814                                    a
5815                                ));
5816                            }
5817                            let inferred = infer_with_output_type(expr, vars_in_scope);
5818                            new_vars.push(VariableInfo::new(a.clone(), inferred));
5819                            projected_aliases.insert(a.clone());
5820                        } else if let Expr::Variable(v) = expr {
5821                            if projected_aliases.contains(v) {
5822                                return Err(anyhow!(
5823                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
5824                                    v
5825                                ));
5826                            }
5827                            // Preserve the original type if the variable is just passed through
5828                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
5829                                new_vars.push(existing.clone());
5830                            } else {
5831                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
5832                            }
5833                            projected_aliases.insert(v.clone());
5834                        } else {
5835                            has_unaliased_non_variable_expr = true;
5836                        }
5837                    }
5838                }
5839            }
5840        }
5841
5842        // Collect extra variables that need to survive the projection stage
5843        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
5844        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
5845        let mut passthrough_extras: Vec<String> = Vec::new();
5846        let mut seen_passthrough: HashSet<String> = HashSet::new();
5847
5848        if let Some(predicate) = &with_clause.where_clause {
5849            for name in collect_expr_variables(predicate) {
5850                if !projected_names.contains(name.as_str())
5851                    && find_var_in_scope(vars_in_scope, &name).is_some()
5852                    && seen_passthrough.insert(name.clone())
5853                {
5854                    passthrough_extras.push(name);
5855                }
5856            }
5857        }
5858
5859        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
5860        // Carry those variables through the projection so Sort can resolve them.
5861        if !has_agg && let Some(order_by) = &with_clause.order_by {
5862            for item in order_by {
5863                for name in collect_expr_variables(&item.expr) {
5864                    if !projected_names.contains(name.as_str())
5865                        && find_var_in_scope(vars_in_scope, &name).is_some()
5866                        && seen_passthrough.insert(name.clone())
5867                    {
5868                        passthrough_extras.push(name);
5869                    }
5870                }
5871            }
5872        }
5873
5874        let needs_cleanup = !passthrough_extras.is_empty();
5875        for extra in &passthrough_extras {
5876            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
5877        }
5878
5879        // Validate compound aggregate expressions: non-aggregate refs must be
5880        // individually present in the group_by as simple variables or properties.
5881        if has_agg {
5882            let group_by_reprs: HashSet<String> =
5883                group_by.iter().map(|e| e.to_string_repr()).collect();
5884            for expr in &compound_agg_exprs {
5885                let mut refs = Vec::new();
5886                collect_non_aggregate_refs(expr, false, &mut refs);
5887                for r in &refs {
5888                    let is_covered = match r {
5889                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
5890                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
5891                    };
5892                    if !is_covered {
5893                        return Err(anyhow!(
5894                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
5895                        ));
5896                    }
5897                }
5898            }
5899        }
5900
5901        if has_agg {
5902            plan = LogicalPlan::Aggregate {
5903                input: Box::new(plan),
5904                group_by,
5905                aggregates,
5906            };
5907
5908            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
5909            // can reference the WITH aliases instead of raw column names.
5910            let rename_projections: Vec<(Expr, Option<String>)> = projections
5911                .iter()
5912                .map(|(expr, alias)| {
5913                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
5914                        // Bare aggregate — reference by column name
5915                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
5916                    } else if is_compound_aggregate(expr)
5917                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
5918                    {
5919                        // Compound aggregate — replace inner aggregates with
5920                        // column references, keep outer expression
5921                        (replace_aggregates_with_columns(expr), alias.clone())
5922                    } else {
5923                        (Expr::Variable(expr.to_string_repr()), alias.clone())
5924                    }
5925                })
5926                .collect();
5927            plan = LogicalPlan::Project {
5928                input: Box::new(plan),
5929                projections: rename_projections,
5930            };
5931        } else if !projections.is_empty() {
5932            plan = LogicalPlan::Project {
5933                input: Box::new(plan),
5934                projections: projections.clone(),
5935            };
5936        }
5937
5938        // Apply the WHERE filter (post-projection, with extras still visible).
5939        if let Some(predicate) = &with_clause.where_clause {
5940            plan = LogicalPlan::Filter {
5941                input: Box::new(plan),
5942                predicate: predicate.clone(),
5943                optional_variables: HashSet::new(),
5944            };
5945        }
5946
5947        // Validate and apply ORDER BY for WITH clause.
5948        // Keep pre-WITH vars in scope for parser compatibility, then apply
5949        // stricter checks for aggregate-containing ORDER BY items.
5950        if let Some(order_by) = &with_clause.order_by {
5951            // Build a mapping from aliases and projected expression reprs to
5952            // output columns of the preceding Project/Aggregate pipeline.
5953            let with_order_aliases: HashMap<String, Expr> = projections
5954                .iter()
5955                .flat_map(|(expr, alias)| {
5956                    let output_col = if let Some(a) = alias {
5957                        a.clone()
5958                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
5959                        aggregate_column_name(expr)
5960                    } else {
5961                        expr.to_string_repr()
5962                    };
5963
5964                    let mut entries = Vec::new();
5965                    // ORDER BY alias
5966                    if let Some(a) = alias {
5967                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
5968                    }
5969                    // ORDER BY projected expression (e.g. me.age)
5970                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
5971                    entries
5972                })
5973                .collect();
5974
5975            let order_by_scope: Vec<VariableInfo> = {
5976                let mut scope = new_vars.clone();
5977                for v in vars_in_scope {
5978                    if !is_var_in_scope(&scope, &v.name) {
5979                        scope.push(v.clone());
5980                    }
5981                }
5982                scope
5983            };
5984            for item in order_by {
5985                validate_expression_variables(&item.expr, &order_by_scope)?;
5986                validate_expression(&item.expr, &order_by_scope)?;
5987                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
5988                if has_aggregate_in_item && !has_agg {
5989                    return Err(anyhow!(
5990                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
5991                    ));
5992                }
5993                if has_agg && has_aggregate_in_item {
5994                    validate_with_order_by_aggregate_item(
5995                        &item.expr,
5996                        &projected_aggregate_reprs,
5997                        &projected_simple_reprs,
5998                        &projected_aliases,
5999                    )?;
6000                }
6001            }
6002            let rewritten_order_by: Vec<SortItem> = order_by
6003                .iter()
6004                .map(|item| {
6005                    let mut expr =
6006                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
6007                    if has_agg {
6008                        // Rewrite any aggregate calls to the aggregate output
6009                        // columns produced by Aggregate.
6010                        expr = replace_aggregates_with_columns(&expr);
6011                        // Then re-map projected property expressions to aliases
6012                        // from the WITH projection.
6013                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
6014                    }
6015                    SortItem {
6016                        expr,
6017                        ascending: item.ascending,
6018                    }
6019                })
6020                .collect();
6021            plan = LogicalPlan::Sort {
6022                input: Box::new(plan),
6023                order_by: rewritten_order_by,
6024            };
6025        }
6026
6027        // Non-variable expressions in WITH must be aliased.
6028        // This check is intentionally placed after ORDER BY validation so
6029        // higher-priority semantic errors (e.g., ambiguous aggregation in
6030        // ORDER BY) can surface first.
6031        if has_unaliased_non_variable_expr {
6032            return Err(anyhow!(
6033                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
6034            ));
6035        }
6036
6037        // Validate and apply SKIP/LIMIT for WITH clause
6038        let skip = with_clause
6039            .skip
6040            .as_ref()
6041            .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
6042            .transpose()?
6043            .flatten();
6044        let fetch = with_clause
6045            .limit
6046            .as_ref()
6047            .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
6048            .transpose()?
6049            .flatten();
6050
6051        if skip.is_some() || fetch.is_some() {
6052            plan = LogicalPlan::Limit {
6053                input: Box::new(plan),
6054                skip,
6055                fetch,
6056            };
6057        }
6058
6059        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
6060        if needs_cleanup {
6061            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
6062                .iter()
6063                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
6064                .collect();
6065            plan = LogicalPlan::Project {
6066                input: Box::new(plan),
6067                projections: cleanup_projections,
6068            };
6069        }
6070
6071        if with_clause.distinct {
6072            plan = LogicalPlan::Distinct {
6073                input: Box::new(plan),
6074            };
6075        }
6076
6077        Ok((plan, new_vars))
6078    }
6079
6080    fn plan_with_recursive(
6081        &self,
6082        with_recursive: &WithRecursiveClause,
6083        _prev_plan: LogicalPlan,
6084        vars_in_scope: &[VariableInfo],
6085    ) -> Result<LogicalPlan> {
6086        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
6087        match &*with_recursive.query {
6088            Query::Union { left, right, .. } => {
6089                // Plan the anchor (initial) query with current scope
6090                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
6091
6092                // Plan the recursive query with the CTE name added to scope
6093                // so it can reference itself
6094                let mut recursive_scope = vars_in_scope.to_vec();
6095                recursive_scope.push(VariableInfo::new(
6096                    with_recursive.name.clone(),
6097                    VariableType::Scalar,
6098                ));
6099                let recursive_plan =
6100                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
6101
6102                Ok(LogicalPlan::RecursiveCTE {
6103                    cte_name: with_recursive.name.clone(),
6104                    initial: Box::new(initial_plan),
6105                    recursive: Box::new(recursive_plan),
6106                })
6107            }
6108            _ => Err(anyhow::anyhow!(
6109                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
6110            )),
6111        }
6112    }
6113
6114    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
6115        let entries = match properties {
6116            Some(Expr::Map(entries)) => entries,
6117            _ => return None,
6118        };
6119
6120        if entries.is_empty() {
6121            return None;
6122        }
6123        let mut final_expr = None;
6124        for (prop, val_expr) in entries {
6125            let eq_expr = Expr::BinaryOp {
6126                left: Box::new(Expr::Property(
6127                    Box::new(Expr::Variable(variable.to_string())),
6128                    prop.clone(),
6129                )),
6130                op: BinaryOp::Eq,
6131                right: Box::new(val_expr.clone()),
6132            };
6133
6134            if let Some(e) = final_expr {
6135                final_expr = Some(Expr::BinaryOp {
6136                    left: Box::new(e),
6137                    op: BinaryOp::And,
6138                    right: Box::new(eq_expr),
6139                });
6140            } else {
6141                final_expr = Some(eq_expr);
6142            }
6143        }
6144        final_expr
6145    }
6146
6147    /// Build a filter expression from node properties and labels.
6148    ///
6149    /// This is used for TraverseMainByType where we need to filter target nodes
6150    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
6151    pub fn node_filter_expr(
6152        &self,
6153        variable: &str,
6154        labels: &[String],
6155        properties: &Option<Expr>,
6156    ) -> Option<Expr> {
6157        let mut final_expr = None;
6158
6159        // Add label checks using hasLabel(variable, 'label')
6160        for label in labels {
6161            let label_check = Expr::FunctionCall {
6162                name: "hasLabel".to_string(),
6163                args: vec![
6164                    Expr::Variable(variable.to_string()),
6165                    Expr::Literal(CypherLiteral::String(label.clone())),
6166                ],
6167                distinct: false,
6168                window_spec: None,
6169            };
6170
6171            final_expr = match final_expr {
6172                Some(e) => Some(Expr::BinaryOp {
6173                    left: Box::new(e),
6174                    op: BinaryOp::And,
6175                    right: Box::new(label_check),
6176                }),
6177                None => Some(label_check),
6178            };
6179        }
6180
6181        // Add property checks
6182        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
6183            final_expr = match final_expr {
6184                Some(e) => Some(Expr::BinaryOp {
6185                    left: Box::new(e),
6186                    op: BinaryOp::And,
6187                    right: Box::new(prop_expr),
6188                }),
6189                None => Some(prop_expr),
6190            };
6191        }
6192
6193        final_expr
6194    }
6195
6196    /// Create a filter plan that ensures traversed target matches a bound variable.
6197    ///
6198    /// Used in EXISTS subquery patterns where the target is already bound.
6199    /// Compares the target's VID against the bound variable's VID.
6200    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
6201        // Compare the traverse-discovered target's VID against the bound variable's VID.
6202        // Left side: Property access on the variable from current scope.
6203        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
6204        // We use Variable("{var}._vid") to access the VID column from the traverse output,
6205        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
6206        let bound_check = Expr::BinaryOp {
6207            left: Box::new(Expr::Property(
6208                Box::new(Expr::Variable(target_variable.to_string())),
6209                "_vid".to_string(),
6210            )),
6211            op: BinaryOp::Eq,
6212            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
6213        };
6214        LogicalPlan::Filter {
6215            input: Box::new(plan),
6216            predicate: bound_check,
6217            optional_variables: HashSet::new(),
6218        }
6219    }
6220
6221    /// Replace a Scan node matching the variable with a VectorKnn node
6222    fn replace_scan_with_knn(
6223        plan: LogicalPlan,
6224        variable: &str,
6225        property: &str,
6226        query: Expr,
6227        threshold: Option<f32>,
6228    ) -> LogicalPlan {
6229        match plan {
6230            LogicalPlan::Scan {
6231                label_id,
6232                labels,
6233                variable: scan_var,
6234                filter,
6235                optional,
6236            } => {
6237                if scan_var == variable {
6238                    // Inject any existing scan filter into VectorKnn?
6239                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
6240                    // Typically filter is applied post-Knn or during Knn if supported.
6241                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
6242                    // If `filter` is present on Scan, it must be preserved.
6243                    // We can wrap VectorKnn in Filter if Scan had filter.
6244
6245                    let knn = LogicalPlan::VectorKnn {
6246                        label_id,
6247                        variable: variable.to_string(),
6248                        property: property.to_string(),
6249                        query,
6250                        k: 100, // Default K, should push down LIMIT
6251                        threshold,
6252                    };
6253
6254                    if let Some(f) = filter {
6255                        LogicalPlan::Filter {
6256                            input: Box::new(knn),
6257                            predicate: f,
6258                            optional_variables: HashSet::new(),
6259                        }
6260                    } else {
6261                        knn
6262                    }
6263                } else {
6264                    LogicalPlan::Scan {
6265                        label_id,
6266                        labels,
6267                        variable: scan_var,
6268                        filter,
6269                        optional,
6270                    }
6271                }
6272            }
6273            LogicalPlan::Filter {
6274                input,
6275                predicate,
6276                optional_variables,
6277            } => LogicalPlan::Filter {
6278                input: Box::new(Self::replace_scan_with_knn(
6279                    *input, variable, property, query, threshold,
6280                )),
6281                predicate,
6282                optional_variables,
6283            },
6284            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6285                input: Box::new(Self::replace_scan_with_knn(
6286                    *input, variable, property, query, threshold,
6287                )),
6288                projections,
6289            },
6290            LogicalPlan::Limit { input, skip, fetch } => {
6291                // If we encounter Limit, we should ideally push K down to VectorKnn
6292                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
6293                // Limit comes later.
6294                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
6295                LogicalPlan::Limit {
6296                    input: Box::new(Self::replace_scan_with_knn(
6297                        *input, variable, property, query, threshold,
6298                    )),
6299                    skip,
6300                    fetch,
6301                }
6302            }
6303            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6304                left: Box::new(Self::replace_scan_with_knn(
6305                    *left,
6306                    variable,
6307                    property,
6308                    query.clone(),
6309                    threshold,
6310                )),
6311                right: Box::new(Self::replace_scan_with_knn(
6312                    *right, variable, property, query, threshold,
6313                )),
6314            },
6315            other => other,
6316        }
6317    }
6318
6319    /// Find the label_id for a Scan node matching the given variable
6320    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
6321        match plan {
6322            LogicalPlan::Scan {
6323                label_id,
6324                variable: var,
6325                ..
6326            } if var == variable => Some(*label_id),
6327            LogicalPlan::Filter { input, .. }
6328            | LogicalPlan::Project { input, .. }
6329            | LogicalPlan::Sort { input, .. }
6330            | LogicalPlan::Limit { input, .. }
6331            | LogicalPlan::Aggregate { input, .. }
6332            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
6333            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
6334                .or_else(|| Self::find_scan_label_id(right, variable)),
6335            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
6336            _ => None,
6337        }
6338    }
6339
6340    /// Push a predicate into a Scan's filter for the specified variable
6341    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
6342        match plan {
6343            LogicalPlan::Scan {
6344                label_id,
6345                labels,
6346                variable: var,
6347                filter,
6348                optional,
6349            } if var == variable => {
6350                // Merge the predicate with existing filter
6351                let new_filter = match filter {
6352                    Some(existing) => Some(Expr::BinaryOp {
6353                        left: Box::new(existing),
6354                        op: BinaryOp::And,
6355                        right: Box::new(predicate),
6356                    }),
6357                    None => Some(predicate),
6358                };
6359                LogicalPlan::Scan {
6360                    label_id,
6361                    labels,
6362                    variable: var,
6363                    filter: new_filter,
6364                    optional,
6365                }
6366            }
6367            LogicalPlan::Filter {
6368                input,
6369                predicate: p,
6370                optional_variables: opt_vars,
6371            } => LogicalPlan::Filter {
6372                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6373                predicate: p,
6374                optional_variables: opt_vars,
6375            },
6376            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6377                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6378                projections,
6379            },
6380            LogicalPlan::CrossJoin { left, right } => {
6381                // Check which side has the variable
6382                if Self::find_scan_label_id(&left, variable).is_some() {
6383                    LogicalPlan::CrossJoin {
6384                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
6385                        right,
6386                    }
6387                } else {
6388                    LogicalPlan::CrossJoin {
6389                        left,
6390                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
6391                    }
6392                }
6393            }
6394            LogicalPlan::Traverse {
6395                input,
6396                edge_type_ids,
6397                direction,
6398                source_variable,
6399                target_variable,
6400                target_label_id,
6401                step_variable,
6402                min_hops,
6403                max_hops,
6404                optional,
6405                target_filter,
6406                path_variable,
6407                edge_properties,
6408                is_variable_length,
6409                optional_pattern_vars,
6410                scope_match_variables,
6411                edge_filter_expr,
6412                path_mode,
6413                qpp_steps,
6414            } => LogicalPlan::Traverse {
6415                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6416                edge_type_ids,
6417                direction,
6418                source_variable,
6419                target_variable,
6420                target_label_id,
6421                step_variable,
6422                min_hops,
6423                max_hops,
6424                optional,
6425                target_filter,
6426                path_variable,
6427                edge_properties,
6428                is_variable_length,
6429                optional_pattern_vars,
6430                scope_match_variables,
6431                edge_filter_expr,
6432                path_mode,
6433                qpp_steps,
6434            },
6435            other => other,
6436        }
6437    }
6438
6439    /// Extract predicates that reference only the specified variable
6440    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
6441        let analyzer = PredicateAnalyzer::new();
6442        let analysis = analyzer.analyze(predicate, variable);
6443
6444        // Return pushable predicates and combined residual
6445        let residual = if analysis.residual.is_empty() {
6446            None
6447        } else {
6448            let mut iter = analysis.residual.into_iter();
6449            let first = iter.next().unwrap();
6450            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
6451                left: Box::new(acc),
6452                op: BinaryOp::And,
6453                right: Box::new(e),
6454            }))
6455        };
6456
6457        (analysis.pushable, residual)
6458    }
6459
6460    // =====================================================================
6461    // Apply Predicate Pushdown - Helper Functions
6462    // =====================================================================
6463
6464    /// Split AND-connected predicates into a list.
6465    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
6466        match expr {
6467            Expr::BinaryOp {
6468                left,
6469                op: BinaryOp::And,
6470                right,
6471            } => {
6472                let mut result = Self::split_and_conjuncts(left);
6473                result.extend(Self::split_and_conjuncts(right));
6474                result
6475            }
6476            _ => vec![expr.clone()],
6477        }
6478    }
6479
6480    /// Combine predicates with AND.
6481    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
6482        if predicates.is_empty() {
6483            return None;
6484        }
6485        let mut result = predicates[0].clone();
6486        for pred in predicates.iter().skip(1) {
6487            result = Expr::BinaryOp {
6488                left: Box::new(result),
6489                op: BinaryOp::And,
6490                right: Box::new(pred.clone()),
6491            };
6492        }
6493        Some(result)
6494    }
6495
6496    /// Collect all variable names referenced in an expression.
6497    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
6498        let mut vars = HashSet::new();
6499        Self::collect_expr_variables_impl(expr, &mut vars);
6500        vars
6501    }
6502
6503    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
6504        match expr {
6505            Expr::Variable(name) => {
6506                vars.insert(name.clone());
6507            }
6508            Expr::Property(inner, _) => {
6509                if let Expr::Variable(name) = inner.as_ref() {
6510                    vars.insert(name.clone());
6511                } else {
6512                    Self::collect_expr_variables_impl(inner, vars);
6513                }
6514            }
6515            Expr::BinaryOp { left, right, .. } => {
6516                Self::collect_expr_variables_impl(left, vars);
6517                Self::collect_expr_variables_impl(right, vars);
6518            }
6519            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
6520            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
6521            Expr::FunctionCall { args, .. } => {
6522                for arg in args {
6523                    Self::collect_expr_variables_impl(arg, vars);
6524                }
6525            }
6526            Expr::List(items) => {
6527                for item in items {
6528                    Self::collect_expr_variables_impl(item, vars);
6529                }
6530            }
6531            Expr::Case {
6532                expr,
6533                when_then,
6534                else_expr,
6535            } => {
6536                if let Some(e) = expr {
6537                    Self::collect_expr_variables_impl(e, vars);
6538                }
6539                for (w, t) in when_then {
6540                    Self::collect_expr_variables_impl(w, vars);
6541                    Self::collect_expr_variables_impl(t, vars);
6542                }
6543                if let Some(e) = else_expr {
6544                    Self::collect_expr_variables_impl(e, vars);
6545                }
6546            }
6547            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
6548            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
6549            // they introduce local variable bindings not in outer scope.
6550            _ => {}
6551        }
6552    }
6553
6554    /// Collect all variables produced by a logical plan.
6555    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
6556        let mut vars = HashSet::new();
6557        Self::collect_plan_variables_impl(plan, &mut vars);
6558        vars
6559    }
6560
6561    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
6562        match plan {
6563            LogicalPlan::Scan { variable, .. } => {
6564                vars.insert(variable.clone());
6565            }
6566            LogicalPlan::Traverse {
6567                target_variable,
6568                step_variable,
6569                input,
6570                path_variable,
6571                ..
6572            } => {
6573                vars.insert(target_variable.clone());
6574                if let Some(sv) = step_variable {
6575                    vars.insert(sv.clone());
6576                }
6577                if let Some(pv) = path_variable {
6578                    vars.insert(pv.clone());
6579                }
6580                Self::collect_plan_variables_impl(input, vars);
6581            }
6582            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
6583            LogicalPlan::Project { input, projections } => {
6584                for (expr, alias) in projections {
6585                    if let Some(a) = alias {
6586                        vars.insert(a.clone());
6587                    } else if let Expr::Variable(v) = expr {
6588                        vars.insert(v.clone());
6589                    }
6590                }
6591                Self::collect_plan_variables_impl(input, vars);
6592            }
6593            LogicalPlan::Apply {
6594                input, subquery, ..
6595            } => {
6596                Self::collect_plan_variables_impl(input, vars);
6597                Self::collect_plan_variables_impl(subquery, vars);
6598            }
6599            LogicalPlan::CrossJoin { left, right } => {
6600                Self::collect_plan_variables_impl(left, vars);
6601                Self::collect_plan_variables_impl(right, vars);
6602            }
6603            LogicalPlan::Unwind {
6604                input, variable, ..
6605            } => {
6606                vars.insert(variable.clone());
6607                Self::collect_plan_variables_impl(input, vars);
6608            }
6609            LogicalPlan::Aggregate { input, .. } => {
6610                Self::collect_plan_variables_impl(input, vars);
6611            }
6612            LogicalPlan::Distinct { input } => {
6613                Self::collect_plan_variables_impl(input, vars);
6614            }
6615            LogicalPlan::Sort { input, .. } => {
6616                Self::collect_plan_variables_impl(input, vars);
6617            }
6618            LogicalPlan::Limit { input, .. } => {
6619                Self::collect_plan_variables_impl(input, vars);
6620            }
6621            LogicalPlan::VectorKnn { variable, .. } => {
6622                vars.insert(variable.clone());
6623            }
6624            LogicalPlan::ProcedureCall { yield_items, .. } => {
6625                for (name, alias) in yield_items {
6626                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
6627                }
6628            }
6629            LogicalPlan::ShortestPath {
6630                input,
6631                path_variable,
6632                ..
6633            } => {
6634                vars.insert(path_variable.clone());
6635                Self::collect_plan_variables_impl(input, vars);
6636            }
6637            LogicalPlan::AllShortestPaths {
6638                input,
6639                path_variable,
6640                ..
6641            } => {
6642                vars.insert(path_variable.clone());
6643                Self::collect_plan_variables_impl(input, vars);
6644            }
6645            LogicalPlan::RecursiveCTE {
6646                initial, recursive, ..
6647            } => {
6648                Self::collect_plan_variables_impl(initial, vars);
6649                Self::collect_plan_variables_impl(recursive, vars);
6650            }
6651            LogicalPlan::SubqueryCall {
6652                input, subquery, ..
6653            } => {
6654                Self::collect_plan_variables_impl(input, vars);
6655                Self::collect_plan_variables_impl(subquery, vars);
6656            }
6657            _ => {}
6658        }
6659    }
6660
6661    /// Extract predicates that only reference variables from Apply's input.
6662    /// Returns (input_only_predicates, remaining_predicates).
6663    fn extract_apply_input_predicates(
6664        predicate: &Expr,
6665        input_variables: &HashSet<String>,
6666        subquery_new_variables: &HashSet<String>,
6667    ) -> (Vec<Expr>, Vec<Expr>) {
6668        let conjuncts = Self::split_and_conjuncts(predicate);
6669        let mut input_preds = Vec::new();
6670        let mut remaining = Vec::new();
6671
6672        for conj in conjuncts {
6673            let vars = Self::collect_expr_variables(&conj);
6674
6675            // Predicate only references input variables (none from subquery)
6676            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
6677            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
6678
6679            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
6680                input_preds.push(conj);
6681            } else {
6682                remaining.push(conj);
6683            }
6684        }
6685
6686        (input_preds, remaining)
6687    }
6688
6689    /// Push eligible predicates into Apply.input_filter.
6690    /// This filters input rows BEFORE executing the correlated subquery.
6691    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
6692        match plan {
6693            LogicalPlan::Apply {
6694                input,
6695                subquery,
6696                input_filter,
6697            } => {
6698                // Collect variables from input plan
6699                let input_vars = Self::collect_plan_variables(&input);
6700
6701                // Collect NEW variables introduced by subquery (not in input)
6702                let subquery_vars = Self::collect_plan_variables(&subquery);
6703                let new_subquery_vars: HashSet<String> =
6704                    subquery_vars.difference(&input_vars).cloned().collect();
6705
6706                // Extract predicates that only reference input variables
6707                let (input_preds, remaining) = Self::extract_apply_input_predicates(
6708                    current_predicate,
6709                    &input_vars,
6710                    &new_subquery_vars,
6711                );
6712
6713                // Update current_predicate to only remaining predicates
6714                *current_predicate = if remaining.is_empty() {
6715                    Expr::TRUE
6716                } else {
6717                    Self::combine_predicates(remaining).unwrap()
6718                };
6719
6720                // Combine extracted predicates with existing input_filter
6721                let new_input_filter = if input_preds.is_empty() {
6722                    input_filter
6723                } else {
6724                    let extracted = Self::combine_predicates(input_preds).unwrap();
6725                    match input_filter {
6726                        Some(existing) => Some(Expr::BinaryOp {
6727                            left: Box::new(existing),
6728                            op: BinaryOp::And,
6729                            right: Box::new(extracted),
6730                        }),
6731                        None => Some(extracted),
6732                    }
6733                };
6734
6735                // Recurse into input plan
6736                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
6737
6738                LogicalPlan::Apply {
6739                    input: Box::new(new_input),
6740                    subquery,
6741                    input_filter: new_input_filter,
6742                }
6743            }
6744            // Recurse into other plan nodes
6745            LogicalPlan::Filter {
6746                input,
6747                predicate,
6748                optional_variables,
6749            } => LogicalPlan::Filter {
6750                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6751                predicate,
6752                optional_variables,
6753            },
6754            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6755                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6756                projections,
6757            },
6758            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
6759                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6760                order_by,
6761            },
6762            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
6763                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6764                skip,
6765                fetch,
6766            },
6767            LogicalPlan::Aggregate {
6768                input,
6769                group_by,
6770                aggregates,
6771            } => LogicalPlan::Aggregate {
6772                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6773                group_by,
6774                aggregates,
6775            },
6776            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6777                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
6778                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
6779            },
6780            LogicalPlan::Traverse {
6781                input,
6782                edge_type_ids,
6783                direction,
6784                source_variable,
6785                target_variable,
6786                target_label_id,
6787                step_variable,
6788                min_hops,
6789                max_hops,
6790                optional,
6791                target_filter,
6792                path_variable,
6793                edge_properties,
6794                is_variable_length,
6795                optional_pattern_vars,
6796                scope_match_variables,
6797                edge_filter_expr,
6798                path_mode,
6799                qpp_steps,
6800            } => LogicalPlan::Traverse {
6801                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6802                edge_type_ids,
6803                direction,
6804                source_variable,
6805                target_variable,
6806                target_label_id,
6807                step_variable,
6808                min_hops,
6809                max_hops,
6810                optional,
6811                target_filter,
6812                path_variable,
6813                edge_properties,
6814                is_variable_length,
6815                optional_pattern_vars,
6816                scope_match_variables,
6817                edge_filter_expr,
6818                path_mode,
6819                qpp_steps,
6820            },
6821            other => other,
6822        }
6823    }
6824
6825    /// Get the column name for an aggregate expression.
6826    /// This must match the logic in executor's build_aggregate_result.
6827    fn get_aggregate_column_name(expr: &Expr) -> String {
6828        aggregate_column_name(expr)
6829    }
6830}
6831
6832/// Get the expected column name for an aggregate expression.
6833///
6834/// This is the single source of truth for aggregate column naming, used by:
6835/// - Logical planner (to create column references)
6836/// - Physical planner (to rename DataFusion's auto-generated column names)
6837/// - Fallback executor (to name result columns)
6838pub fn aggregate_column_name(expr: &Expr) -> String {
6839    expr.to_string_repr()
6840}
6841
6842#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6843pub struct ExplainOutput {
6844    pub plan_text: String,
6845    pub index_usage: Vec<IndexUsage>,
6846    pub cost_estimates: CostEstimates,
6847    pub warnings: Vec<String>,
6848    pub suggestions: Vec<IndexSuggestion>,
6849}
6850
6851/// Suggestion for creating an index to improve query performance.
6852#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6853pub struct IndexSuggestion {
6854    pub label_or_type: String,
6855    pub property: String,
6856    pub index_type: String,
6857    pub reason: String,
6858    pub create_statement: String,
6859}
6860
6861#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6862pub struct IndexUsage {
6863    pub label_or_type: String,
6864    pub property: String,
6865    pub index_type: String,
6866    pub used: bool,
6867    pub reason: Option<String>,
6868}
6869
6870#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6871pub struct CostEstimates {
6872    pub estimated_rows: f64,
6873    pub estimated_cost: f64,
6874}
6875
6876impl QueryPlanner {
6877    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
6878        let plan = self.plan(ast)?;
6879        self.explain_logical_plan(&plan)
6880    }
6881
6882    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
6883        let index_usage = self.analyze_index_usage(plan)?;
6884        let cost_estimates = self.estimate_costs(plan)?;
6885        let suggestions = self.collect_index_suggestions(plan);
6886        let warnings = Vec::new();
6887        let plan_text = format!("{:#?}", plan);
6888
6889        Ok(ExplainOutput {
6890            plan_text,
6891            index_usage,
6892            cost_estimates,
6893            warnings,
6894            suggestions,
6895        })
6896    }
6897
6898    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
6899        let mut usage = Vec::new();
6900        self.collect_index_usage(plan, &mut usage);
6901        Ok(usage)
6902    }
6903
6904    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
6905        match plan {
6906            LogicalPlan::Scan { .. } => {
6907                // Placeholder: Scan might use index if it was optimized
6908                // Ideally LogicalPlan::Scan should store if it uses index.
6909                // But typically Planner converts Scan to specific index scan or we infer it here.
6910            }
6911            LogicalPlan::VectorKnn {
6912                label_id, property, ..
6913            } => {
6914                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
6915                usage.push(IndexUsage {
6916                    label_or_type: label_name.to_string(),
6917                    property: property.clone(),
6918                    index_type: "VECTOR".to_string(),
6919                    used: true,
6920                    reason: None,
6921                });
6922            }
6923            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
6924            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
6925            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
6926            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
6927            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
6928            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
6929            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
6930            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
6931                self.collect_index_usage(left, usage);
6932                self.collect_index_usage(right, usage);
6933            }
6934            _ => {}
6935        }
6936    }
6937
6938    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
6939        Ok(CostEstimates {
6940            estimated_rows: 100.0,
6941            estimated_cost: 10.0,
6942        })
6943    }
6944
6945    /// Collect index suggestions based on query patterns.
6946    ///
6947    /// Currently detects:
6948    /// - Temporal predicates from `uni.validAt()` function calls
6949    /// - Temporal predicates from `VALID_AT` macro expansion
6950    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
6951        let mut suggestions = Vec::new();
6952        self.collect_temporal_suggestions(plan, &mut suggestions);
6953        suggestions
6954    }
6955
6956    /// Recursively collect temporal index suggestions from the plan.
6957    fn collect_temporal_suggestions(
6958        &self,
6959        plan: &LogicalPlan,
6960        suggestions: &mut Vec<IndexSuggestion>,
6961    ) {
6962        match plan {
6963            LogicalPlan::Filter {
6964                input, predicate, ..
6965            } => {
6966                // Check for temporal patterns in the predicate
6967                self.detect_temporal_pattern(predicate, suggestions);
6968                // Recurse into input
6969                self.collect_temporal_suggestions(input, suggestions);
6970            }
6971            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
6972            LogicalPlan::Project { input, .. } => {
6973                self.collect_temporal_suggestions(input, suggestions)
6974            }
6975            LogicalPlan::Limit { input, .. } => {
6976                self.collect_temporal_suggestions(input, suggestions)
6977            }
6978            LogicalPlan::Sort { input, .. } => {
6979                self.collect_temporal_suggestions(input, suggestions)
6980            }
6981            LogicalPlan::Aggregate { input, .. } => {
6982                self.collect_temporal_suggestions(input, suggestions)
6983            }
6984            LogicalPlan::Traverse { input, .. } => {
6985                self.collect_temporal_suggestions(input, suggestions)
6986            }
6987            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
6988                self.collect_temporal_suggestions(left, suggestions);
6989                self.collect_temporal_suggestions(right, suggestions);
6990            }
6991            _ => {}
6992        }
6993    }
6994
6995    /// Detect temporal predicate patterns and suggest indexes.
6996    ///
6997    /// Detects two patterns:
6998    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
6999    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
7000    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
7001        match expr {
7002            // Pattern 1: uni.temporal.validAt() function call
7003            Expr::FunctionCall { name, args, .. }
7004                if name.eq_ignore_ascii_case("uni.temporal.validAt")
7005                    || name.eq_ignore_ascii_case("validAt") =>
7006            {
7007                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
7008                if args.len() >= 2 {
7009                    let start_prop =
7010                        if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1) {
7011                            s.clone()
7012                        } else {
7013                            "valid_from".to_string()
7014                        };
7015
7016                    // Try to extract label from the node expression
7017                    if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
7018                        self.suggest_temporal_index(&var, &start_prop, suggestions);
7019                    }
7020                }
7021            }
7022
7023            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
7024            Expr::BinaryOp {
7025                left,
7026                op: BinaryOp::And,
7027                right,
7028            } => {
7029                // Check left side for `prop <= time` pattern (temporal start condition)
7030                if let Expr::BinaryOp {
7031                    left: prop_expr,
7032                    op: BinaryOp::LtEq,
7033                    ..
7034                } = left.as_ref()
7035                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
7036                    && (prop_name == "valid_from"
7037                        || prop_name.contains("start")
7038                        || prop_name.contains("from")
7039                        || prop_name.contains("begin"))
7040                    && let Some(var) = base.extract_variable()
7041                {
7042                    self.suggest_temporal_index(&var, prop_name, suggestions);
7043                }
7044
7045                // Recurse into both sides of AND
7046                self.detect_temporal_pattern(left.as_ref(), suggestions);
7047                self.detect_temporal_pattern(right.as_ref(), suggestions);
7048            }
7049
7050            // Recurse into other binary ops
7051            Expr::BinaryOp { left, right, .. } => {
7052                self.detect_temporal_pattern(left.as_ref(), suggestions);
7053                self.detect_temporal_pattern(right.as_ref(), suggestions);
7054            }
7055
7056            _ => {}
7057        }
7058    }
7059
7060    /// Suggest a scalar index for a temporal property if one doesn't already exist.
7061    fn suggest_temporal_index(
7062        &self,
7063        _variable: &str,
7064        property: &str,
7065        suggestions: &mut Vec<IndexSuggestion>,
7066    ) {
7067        // Check if a scalar index already exists for this property
7068        // We need to check all labels since we may not know the exact label from the variable
7069        let mut has_index = false;
7070
7071        for index in &self.schema.indexes {
7072            if let IndexDefinition::Scalar(config) = index
7073                && config.properties.contains(&property.to_string())
7074            {
7075                has_index = true;
7076                break;
7077            }
7078        }
7079
7080        if !has_index {
7081            // Avoid duplicate suggestions
7082            let already_suggested = suggestions.iter().any(|s| s.property == property);
7083            if !already_suggested {
7084                suggestions.push(IndexSuggestion {
7085                    label_or_type: "(detected from temporal query)".to_string(),
7086                    property: property.to_string(),
7087                    index_type: "SCALAR (BTree)".to_string(),
7088                    reason: format!(
7089                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
7090                        property
7091                    ),
7092                    create_statement: format!(
7093                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
7094                        property, property
7095                    ),
7096                });
7097            }
7098        }
7099    }
7100
7101    /// Helper functions for expression normalization
7102    /// Normalize an expression for storage: strip variable prefixes
7103    /// For simple property: u.email -> "email"
7104    /// For expressions: lower(u.email) -> "lower(email)"
7105    fn normalize_expression_for_storage(expr: &Expr) -> String {
7106        match expr {
7107            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
7108            _ => {
7109                // Serialize expression and strip variable prefix
7110                let expr_str = expr.to_string_repr();
7111                Self::strip_variable_prefix(&expr_str)
7112            }
7113        }
7114    }
7115
7116    /// Strip variable references like "u.prop" from expression strings
7117    /// Converts "lower(u.email)" to "lower(email)"
7118    fn strip_variable_prefix(expr_str: &str) -> String {
7119        use regex::Regex;
7120        // Match patterns like "word.property" and replace with just "property"
7121        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
7122        re.replace_all(expr_str, "$1").to_string()
7123    }
7124
7125    /// Plan a schema command from the new AST
7126    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
7127        match cmd {
7128            SchemaCommand::CreateVectorIndex(c) => {
7129                // Parse index type from options (default: IvfPq)
7130                let index_type = if let Some(type_val) = c.options.get("type") {
7131                    match type_val.as_str() {
7132                        Some("hnsw") => VectorIndexType::Hnsw {
7133                            m: 16,
7134                            ef_construction: 200,
7135                            ef_search: 100,
7136                        },
7137                        Some("flat") => VectorIndexType::Flat,
7138                        _ => VectorIndexType::IvfPq {
7139                            num_partitions: 256,
7140                            num_sub_vectors: 16,
7141                            bits_per_subvector: 8,
7142                        },
7143                    }
7144                } else {
7145                    VectorIndexType::IvfPq {
7146                        num_partitions: 256,
7147                        num_sub_vectors: 16,
7148                        bits_per_subvector: 8,
7149                    }
7150                };
7151
7152                // Parse embedding config from options
7153                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
7154                    Self::parse_embedding_config(emb_val)?
7155                } else {
7156                    None
7157                };
7158
7159                let config = VectorIndexConfig {
7160                    name: c.name,
7161                    label: c.label,
7162                    property: c.property,
7163                    metric: DistanceMetric::Cosine,
7164                    index_type,
7165                    embedding_config,
7166                    metadata: Default::default(),
7167                };
7168                Ok(LogicalPlan::CreateVectorIndex {
7169                    config,
7170                    if_not_exists: c.if_not_exists,
7171                })
7172            }
7173            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
7174                config: FullTextIndexConfig {
7175                    name: cfg.name,
7176                    label: cfg.label,
7177                    properties: cfg.properties,
7178                    tokenizer: TokenizerConfig::Standard,
7179                    with_positions: true,
7180                    metadata: Default::default(),
7181                },
7182                if_not_exists: cfg.if_not_exists,
7183            }),
7184            SchemaCommand::CreateScalarIndex(cfg) => {
7185                // Convert expressions to storage strings (strip variable prefix)
7186                let properties: Vec<String> = cfg
7187                    .expressions
7188                    .iter()
7189                    .map(Self::normalize_expression_for_storage)
7190                    .collect();
7191
7192                Ok(LogicalPlan::CreateScalarIndex {
7193                    config: ScalarIndexConfig {
7194                        name: cfg.name,
7195                        label: cfg.label,
7196                        properties,
7197                        index_type: ScalarIndexType::BTree,
7198                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
7199                        metadata: Default::default(),
7200                    },
7201                    if_not_exists: cfg.if_not_exists,
7202                })
7203            }
7204            SchemaCommand::CreateJsonFtsIndex(cfg) => {
7205                let with_positions = cfg
7206                    .options
7207                    .get("with_positions")
7208                    .and_then(|v| v.as_bool())
7209                    .unwrap_or(false);
7210                Ok(LogicalPlan::CreateJsonFtsIndex {
7211                    config: JsonFtsIndexConfig {
7212                        name: cfg.name,
7213                        label: cfg.label,
7214                        column: cfg.column,
7215                        paths: Vec::new(),
7216                        with_positions,
7217                        metadata: Default::default(),
7218                    },
7219                    if_not_exists: cfg.if_not_exists,
7220                })
7221            }
7222            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
7223                name: drop.name,
7224                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
7225            }),
7226            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
7227            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
7228            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
7229            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
7230            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
7231            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
7232            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
7233            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
7234            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
7235            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
7236            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
7237            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
7238            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
7239            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
7240            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
7241            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
7242                destination: path,
7243                options: HashMap::new(),
7244            }),
7245            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
7246                label: cmd.label,
7247                path: cmd.path,
7248                format: cmd.format,
7249                options: cmd.options,
7250            }),
7251            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
7252                label: cmd.label,
7253                path: cmd.path,
7254                format: cmd.format,
7255                options: cmd.options,
7256            }),
7257        }
7258    }
7259
7260    fn plan_transaction_command(
7261        &self,
7262        cmd: uni_cypher::ast::TransactionCommand,
7263    ) -> Result<LogicalPlan> {
7264        use uni_cypher::ast::TransactionCommand;
7265        match cmd {
7266            TransactionCommand::Begin => Ok(LogicalPlan::Begin),
7267            TransactionCommand::Commit => Ok(LogicalPlan::Commit),
7268            TransactionCommand::Rollback => Ok(LogicalPlan::Rollback),
7269        }
7270    }
7271
7272    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
7273        let obj = emb_val
7274            .as_object()
7275            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
7276
7277        // Parse alias (required)
7278        let alias = obj
7279            .get("alias")
7280            .and_then(|v| v.as_str())
7281            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
7282
7283        // Parse source properties (required)
7284        let source_properties = obj
7285            .get("source")
7286            .and_then(|v| v.as_array())
7287            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
7288            .iter()
7289            .filter_map(|v| v.as_str().map(|s| s.to_string()))
7290            .collect::<Vec<_>>();
7291
7292        if source_properties.is_empty() {
7293            return Err(anyhow!(
7294                "embedding.source must contain at least one property"
7295            ));
7296        }
7297
7298        let batch_size = obj
7299            .get("batch_size")
7300            .and_then(|v| v.as_u64())
7301            .map(|v| v as usize)
7302            .unwrap_or(32);
7303
7304        Ok(Some(EmbeddingConfig {
7305            alias: alias.to_string(),
7306            source_properties,
7307            batch_size,
7308        }))
7309    }
7310}
7311
7312/// Collect all properties referenced anywhere in the LogicalPlan tree.
7313///
7314/// This is critical for window functions: properties must be materialized
7315/// at the Scan node so they're available for window operations later.
7316///
7317/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
7318pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
7319    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
7320    collect_properties_recursive(plan, &mut properties);
7321    properties
7322}
7323
7324/// Recursively walk the LogicalPlan tree and collect all property references.
7325fn collect_properties_recursive(
7326    plan: &LogicalPlan,
7327    properties: &mut HashMap<String, HashSet<String>>,
7328) {
7329    match plan {
7330        LogicalPlan::Window {
7331            input,
7332            window_exprs,
7333        } => {
7334            // Collect from window expressions
7335            for expr in window_exprs {
7336                collect_properties_from_expr_into(expr, properties);
7337            }
7338            collect_properties_recursive(input, properties);
7339        }
7340        LogicalPlan::Project { input, projections } => {
7341            for (expr, _alias) in projections {
7342                collect_properties_from_expr_into(expr, properties);
7343            }
7344            collect_properties_recursive(input, properties);
7345        }
7346        LogicalPlan::Sort { input, order_by } => {
7347            for sort_item in order_by {
7348                collect_properties_from_expr_into(&sort_item.expr, properties);
7349            }
7350            collect_properties_recursive(input, properties);
7351        }
7352        LogicalPlan::Filter {
7353            input, predicate, ..
7354        } => {
7355            collect_properties_from_expr_into(predicate, properties);
7356            collect_properties_recursive(input, properties);
7357        }
7358        LogicalPlan::Aggregate {
7359            input,
7360            group_by,
7361            aggregates,
7362        } => {
7363            for expr in group_by {
7364                collect_properties_from_expr_into(expr, properties);
7365            }
7366            for expr in aggregates {
7367                collect_properties_from_expr_into(expr, properties);
7368            }
7369            collect_properties_recursive(input, properties);
7370        }
7371        LogicalPlan::Scan {
7372            filter: Some(expr), ..
7373        } => {
7374            collect_properties_from_expr_into(expr, properties);
7375        }
7376        LogicalPlan::Scan { filter: None, .. } => {}
7377        LogicalPlan::ExtIdLookup {
7378            filter: Some(expr), ..
7379        } => {
7380            collect_properties_from_expr_into(expr, properties);
7381        }
7382        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
7383        LogicalPlan::ScanAll {
7384            filter: Some(expr), ..
7385        } => {
7386            collect_properties_from_expr_into(expr, properties);
7387        }
7388        LogicalPlan::ScanAll { filter: None, .. } => {}
7389        LogicalPlan::ScanMainByLabels {
7390            filter: Some(expr), ..
7391        } => {
7392            collect_properties_from_expr_into(expr, properties);
7393        }
7394        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
7395        LogicalPlan::TraverseMainByType {
7396            input,
7397            target_filter,
7398            ..
7399        } => {
7400            if let Some(expr) = target_filter {
7401                collect_properties_from_expr_into(expr, properties);
7402            }
7403            collect_properties_recursive(input, properties);
7404        }
7405        LogicalPlan::Traverse {
7406            input,
7407            target_filter,
7408            step_variable: _,
7409            ..
7410        } => {
7411            if let Some(expr) = target_filter {
7412                collect_properties_from_expr_into(expr, properties);
7413            }
7414            // Note: Edge properties (step_variable) will be collected from expressions
7415            // that reference them. The edge_properties field in LogicalPlan is populated
7416            // later during physical planning based on this collected map.
7417            collect_properties_recursive(input, properties);
7418        }
7419        LogicalPlan::Unwind { input, expr, .. } => {
7420            collect_properties_from_expr_into(expr, properties);
7421            collect_properties_recursive(input, properties);
7422        }
7423        LogicalPlan::Create { input, pattern } => {
7424            // Mark variables referenced in CREATE patterns with "*" so plan_scan
7425            // adds structural projections (bare entity columns). Without this,
7426            // execute_create_pattern() can't find bound variables and creates
7427            // spurious new nodes instead of using existing MATCH'd ones.
7428            mark_pattern_variables(pattern, properties);
7429            collect_properties_recursive(input, properties);
7430        }
7431        LogicalPlan::CreateBatch { input, patterns } => {
7432            for pattern in patterns {
7433                mark_pattern_variables(pattern, properties);
7434            }
7435            collect_properties_recursive(input, properties);
7436        }
7437        LogicalPlan::Merge {
7438            input,
7439            pattern,
7440            on_match,
7441            on_create,
7442        } => {
7443            mark_pattern_variables(pattern, properties);
7444            if let Some(set_clause) = on_match {
7445                mark_set_item_variables(&set_clause.items, properties);
7446            }
7447            if let Some(set_clause) = on_create {
7448                mark_set_item_variables(&set_clause.items, properties);
7449            }
7450            collect_properties_recursive(input, properties);
7451        }
7452        LogicalPlan::Set { input, items } => {
7453            mark_set_item_variables(items, properties);
7454            collect_properties_recursive(input, properties);
7455        }
7456        LogicalPlan::Remove { input, items } => {
7457            for item in items {
7458                match item {
7459                    RemoveItem::Property(expr) => {
7460                        // REMOVE n.prop — collect the property and mark the variable
7461                        // with "*" so full structural projection is applied.
7462                        collect_properties_from_expr_into(expr, properties);
7463                        if let Expr::Property(base, _) = expr
7464                            && let Expr::Variable(var) = base.as_ref()
7465                        {
7466                            properties
7467                                .entry(var.clone())
7468                                .or_default()
7469                                .insert("*".to_string());
7470                        }
7471                    }
7472                    RemoveItem::Labels { variable, .. } => {
7473                        // REMOVE n:Label — mark n with "*"
7474                        properties
7475                            .entry(variable.clone())
7476                            .or_default()
7477                            .insert("*".to_string());
7478                    }
7479                }
7480            }
7481            collect_properties_recursive(input, properties);
7482        }
7483        LogicalPlan::Delete { input, items, .. } => {
7484            for expr in items {
7485                collect_properties_from_expr_into(expr, properties);
7486            }
7487            collect_properties_recursive(input, properties);
7488        }
7489        LogicalPlan::Foreach {
7490            input, list, body, ..
7491        } => {
7492            collect_properties_from_expr_into(list, properties);
7493            for plan in body {
7494                collect_properties_recursive(plan, properties);
7495            }
7496            collect_properties_recursive(input, properties);
7497        }
7498        LogicalPlan::Limit { input, .. } => {
7499            collect_properties_recursive(input, properties);
7500        }
7501        LogicalPlan::CrossJoin { left, right } => {
7502            collect_properties_recursive(left, properties);
7503            collect_properties_recursive(right, properties);
7504        }
7505        LogicalPlan::Apply {
7506            input,
7507            subquery,
7508            input_filter,
7509        } => {
7510            if let Some(expr) = input_filter {
7511                collect_properties_from_expr_into(expr, properties);
7512            }
7513            collect_properties_recursive(input, properties);
7514            collect_properties_recursive(subquery, properties);
7515        }
7516        LogicalPlan::Union { left, right, .. } => {
7517            collect_properties_recursive(left, properties);
7518            collect_properties_recursive(right, properties);
7519        }
7520        LogicalPlan::RecursiveCTE {
7521            initial, recursive, ..
7522        } => {
7523            collect_properties_recursive(initial, properties);
7524            collect_properties_recursive(recursive, properties);
7525        }
7526        LogicalPlan::ProcedureCall { arguments, .. } => {
7527            for arg in arguments {
7528                collect_properties_from_expr_into(arg, properties);
7529            }
7530        }
7531        LogicalPlan::VectorKnn { query, .. } => {
7532            collect_properties_from_expr_into(query, properties);
7533        }
7534        LogicalPlan::InvertedIndexLookup { terms, .. } => {
7535            collect_properties_from_expr_into(terms, properties);
7536        }
7537        LogicalPlan::ShortestPath { input, .. } => {
7538            collect_properties_recursive(input, properties);
7539        }
7540        LogicalPlan::AllShortestPaths { input, .. } => {
7541            collect_properties_recursive(input, properties);
7542        }
7543        LogicalPlan::Distinct { input } => {
7544            collect_properties_recursive(input, properties);
7545        }
7546        LogicalPlan::QuantifiedPattern {
7547            input,
7548            pattern_plan,
7549            ..
7550        } => {
7551            collect_properties_recursive(input, properties);
7552            collect_properties_recursive(pattern_plan, properties);
7553        }
7554        LogicalPlan::BindZeroLengthPath { input, .. } => {
7555            collect_properties_recursive(input, properties);
7556        }
7557        LogicalPlan::BindPath { input, .. } => {
7558            collect_properties_recursive(input, properties);
7559        }
7560        LogicalPlan::SubqueryCall { input, subquery } => {
7561            collect_properties_recursive(input, properties);
7562            collect_properties_recursive(subquery, properties);
7563        }
7564        LogicalPlan::LocyProject {
7565            input, projections, ..
7566        } => {
7567            for (expr, _alias) in projections {
7568                match expr {
7569                    // Bare variable in LocyProject: only need _vid for node variables
7570                    // (plan_locy_project extracts VID directly). Adding "*" would create
7571                    // a structural Struct column that conflicts with derived scan columns.
7572                    Expr::Variable(name) if !name.contains('.') => {
7573                        properties
7574                            .entry(name.clone())
7575                            .or_default()
7576                            .insert("_vid".to_string());
7577                    }
7578                    _ => collect_properties_from_expr_into(expr, properties),
7579                }
7580            }
7581            collect_properties_recursive(input, properties);
7582        }
7583        LogicalPlan::LocyFold {
7584            input,
7585            fold_bindings,
7586            ..
7587        } => {
7588            for (_name, expr) in fold_bindings {
7589                collect_properties_from_expr_into(expr, properties);
7590            }
7591            collect_properties_recursive(input, properties);
7592        }
7593        LogicalPlan::LocyBestBy {
7594            input, criteria, ..
7595        } => {
7596            for (expr, _asc) in criteria {
7597                collect_properties_from_expr_into(expr, properties);
7598            }
7599            collect_properties_recursive(input, properties);
7600        }
7601        LogicalPlan::LocyPriority { input, .. } => {
7602            collect_properties_recursive(input, properties);
7603        }
7604        // DDL and other plans don't reference properties
7605        _ => {}
7606    }
7607}
7608
7609/// Mark target variables from SET items with "*" and collect value expressions.
7610fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
7611    for item in items {
7612        match item {
7613            SetItem::Property { expr, value } => {
7614                // SET n.prop = val — mark n via the property expr, collect from value.
7615                // Also mark the variable with "*" for full structural projection so
7616                // edge identity fields (_src/_dst) are available for write operations.
7617                collect_properties_from_expr_into(expr, properties);
7618                collect_properties_from_expr_into(value, properties);
7619                if let Expr::Property(base, _) = expr
7620                    && let Expr::Variable(var) = base.as_ref()
7621                {
7622                    properties
7623                        .entry(var.clone())
7624                        .or_default()
7625                        .insert("*".to_string());
7626                }
7627            }
7628            SetItem::Labels { variable, .. } => {
7629                // SET n:Label — need full access to n
7630                properties
7631                    .entry(variable.clone())
7632                    .or_default()
7633                    .insert("*".to_string());
7634            }
7635            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
7636                // SET n = {props} or SET n += {props}
7637                properties
7638                    .entry(variable.clone())
7639                    .or_default()
7640                    .insert("*".to_string());
7641                collect_properties_from_expr_into(value, properties);
7642            }
7643        }
7644    }
7645}
7646
7647/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
7648/// adds structural projections (bare entity Struct columns) for them.
7649/// This is needed so that execute_create_pattern() can find bound variables
7650/// in the row HashMap and reuse existing nodes instead of creating new ones.
7651fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
7652    for path in &pattern.paths {
7653        if let Some(ref v) = path.variable {
7654            properties
7655                .entry(v.clone())
7656                .or_default()
7657                .insert("*".to_string());
7658        }
7659        for element in &path.elements {
7660            match element {
7661                PatternElement::Node(n) => {
7662                    if let Some(ref v) = n.variable {
7663                        properties
7664                            .entry(v.clone())
7665                            .or_default()
7666                            .insert("*".to_string());
7667                    }
7668                    // Also collect properties from inline property expressions
7669                    if let Some(ref props) = n.properties {
7670                        collect_properties_from_expr_into(props, properties);
7671                    }
7672                }
7673                PatternElement::Relationship(r) => {
7674                    if let Some(ref v) = r.variable {
7675                        properties
7676                            .entry(v.clone())
7677                            .or_default()
7678                            .insert("*".to_string());
7679                    }
7680                    if let Some(ref props) = r.properties {
7681                        collect_properties_from_expr_into(props, properties);
7682                    }
7683                }
7684                PatternElement::Parenthesized { pattern, .. } => {
7685                    let sub = Pattern {
7686                        paths: vec![pattern.as_ref().clone()],
7687                    };
7688                    mark_pattern_variables(&sub, properties);
7689                }
7690            }
7691        }
7692    }
7693}
7694
7695/// Collect properties from an expression into a HashMap.
7696fn collect_properties_from_expr_into(
7697    expr: &Expr,
7698    properties: &mut HashMap<String, HashSet<String>>,
7699) {
7700    match expr {
7701        Expr::PatternComprehension {
7702            where_clause,
7703            map_expr,
7704            ..
7705        } => {
7706            // Collect properties from the WHERE clause and map expression.
7707            // The pattern itself creates local bindings that don't need
7708            // property collection from the outer scope.
7709            if let Some(where_expr) = where_clause {
7710                collect_properties_from_expr_into(where_expr, properties);
7711            }
7712            collect_properties_from_expr_into(map_expr, properties);
7713        }
7714        Expr::Variable(name) => {
7715            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
7716            if let Some((var, prop)) = name.split_once('.') {
7717                properties
7718                    .entry(var.to_string())
7719                    .or_default()
7720                    .insert(prop.to_string());
7721            } else {
7722                // Bare variable (e.g., RETURN n) — needs all properties materialized
7723                properties
7724                    .entry(name.clone())
7725                    .or_default()
7726                    .insert("*".to_string());
7727            }
7728        }
7729        Expr::Property(base, name) => {
7730            // Extract variable name from the base expression
7731            if let Expr::Variable(var) = base.as_ref() {
7732                properties
7733                    .entry(var.clone())
7734                    .or_default()
7735                    .insert(name.clone());
7736                // Don't recurse into Variable — that would mark it as a bare
7737                // variable reference (adding "*") when it's just a property base.
7738            } else {
7739                // Recurse for complex base expressions (nested property, function call, etc.)
7740                collect_properties_from_expr_into(base, properties);
7741            }
7742        }
7743        Expr::BinaryOp { left, right, .. } => {
7744            collect_properties_from_expr_into(left, properties);
7745            collect_properties_from_expr_into(right, properties);
7746        }
7747        Expr::FunctionCall {
7748            name,
7749            args,
7750            window_spec,
7751            ..
7752        } => {
7753            // Analyze function for property requirements (pushdown hydration)
7754            analyze_function_property_requirements(name, args, properties);
7755
7756            // Collect from arguments
7757            for arg in args {
7758                collect_properties_from_expr_into(arg, properties);
7759            }
7760
7761            // Collect from window spec (PARTITION BY, ORDER BY)
7762            if let Some(spec) = window_spec {
7763                for part_expr in &spec.partition_by {
7764                    collect_properties_from_expr_into(part_expr, properties);
7765                }
7766                for sort_item in &spec.order_by {
7767                    collect_properties_from_expr_into(&sort_item.expr, properties);
7768                }
7769            }
7770        }
7771        Expr::UnaryOp { expr, .. } => {
7772            collect_properties_from_expr_into(expr, properties);
7773        }
7774        Expr::List(items) => {
7775            for item in items {
7776                collect_properties_from_expr_into(item, properties);
7777            }
7778        }
7779        Expr::Map(entries) => {
7780            for (_key, value) in entries {
7781                collect_properties_from_expr_into(value, properties);
7782            }
7783        }
7784        Expr::ListComprehension {
7785            list,
7786            where_clause,
7787            map_expr,
7788            ..
7789        } => {
7790            collect_properties_from_expr_into(list, properties);
7791            if let Some(where_expr) = where_clause {
7792                collect_properties_from_expr_into(where_expr, properties);
7793            }
7794            collect_properties_from_expr_into(map_expr, properties);
7795        }
7796        Expr::Case {
7797            expr,
7798            when_then,
7799            else_expr,
7800        } => {
7801            if let Some(scrutinee_expr) = expr {
7802                collect_properties_from_expr_into(scrutinee_expr, properties);
7803            }
7804            for (when, then) in when_then {
7805                collect_properties_from_expr_into(when, properties);
7806                collect_properties_from_expr_into(then, properties);
7807            }
7808            if let Some(default_expr) = else_expr {
7809                collect_properties_from_expr_into(default_expr, properties);
7810            }
7811        }
7812        Expr::Quantifier {
7813            list, predicate, ..
7814        } => {
7815            collect_properties_from_expr_into(list, properties);
7816            collect_properties_from_expr_into(predicate, properties);
7817        }
7818        Expr::Reduce {
7819            init, list, expr, ..
7820        } => {
7821            collect_properties_from_expr_into(init, properties);
7822            collect_properties_from_expr_into(list, properties);
7823            collect_properties_from_expr_into(expr, properties);
7824        }
7825        Expr::Exists { query, .. } => {
7826            // Walk into EXISTS body to collect property references for outer-scope variables.
7827            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
7828            // are included in the outer scan's property list. Extra properties collected for
7829            // inner-only variables are harmless — the outer scan ignores unknown variable names.
7830            collect_properties_from_subquery(query, properties);
7831        }
7832        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
7833            collect_properties_from_subquery(query, properties);
7834        }
7835        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
7836            collect_properties_from_expr_into(expr, properties);
7837        }
7838        Expr::In { expr, list } => {
7839            collect_properties_from_expr_into(expr, properties);
7840            collect_properties_from_expr_into(list, properties);
7841        }
7842        Expr::ArrayIndex { array, index } => {
7843            if let Expr::Variable(var) = array.as_ref() {
7844                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
7845                    // Static string key: e['name'] → only need that specific property
7846                    properties
7847                        .entry(var.clone())
7848                        .or_default()
7849                        .insert(prop_name.clone());
7850                } else {
7851                    // Dynamic property access: e[prop] → need all properties
7852                    properties
7853                        .entry(var.clone())
7854                        .or_default()
7855                        .insert("*".to_string());
7856                }
7857            }
7858            collect_properties_from_expr_into(array, properties);
7859            collect_properties_from_expr_into(index, properties);
7860        }
7861        Expr::ArraySlice { array, start, end } => {
7862            collect_properties_from_expr_into(array, properties);
7863            if let Some(start_expr) = start {
7864                collect_properties_from_expr_into(start_expr, properties);
7865            }
7866            if let Some(end_expr) = end {
7867                collect_properties_from_expr_into(end_expr, properties);
7868            }
7869        }
7870        Expr::ValidAt {
7871            entity,
7872            timestamp,
7873            start_prop,
7874            end_prop,
7875        } => {
7876            // Extract property requirements from ValidAt expression
7877            if let Expr::Variable(var) = entity.as_ref() {
7878                if let Some(prop) = start_prop {
7879                    properties
7880                        .entry(var.clone())
7881                        .or_default()
7882                        .insert(prop.clone());
7883                }
7884                if let Some(prop) = end_prop {
7885                    properties
7886                        .entry(var.clone())
7887                        .or_default()
7888                        .insert(prop.clone());
7889                }
7890            }
7891            collect_properties_from_expr_into(entity, properties);
7892            collect_properties_from_expr_into(timestamp, properties);
7893        }
7894        Expr::MapProjection { base, items } => {
7895            collect_properties_from_expr_into(base, properties);
7896            for item in items {
7897                match item {
7898                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
7899                        if let Expr::Variable(var) = base.as_ref() {
7900                            properties
7901                                .entry(var.clone())
7902                                .or_default()
7903                                .insert(prop.clone());
7904                        }
7905                    }
7906                    uni_cypher::ast::MapProjectionItem::AllProperties => {
7907                        if let Expr::Variable(var) = base.as_ref() {
7908                            properties
7909                                .entry(var.clone())
7910                                .or_default()
7911                                .insert("*".to_string());
7912                        }
7913                    }
7914                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
7915                        collect_properties_from_expr_into(expr, properties);
7916                    }
7917                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
7918                }
7919            }
7920        }
7921        Expr::LabelCheck { expr, .. } => {
7922            collect_properties_from_expr_into(expr, properties);
7923        }
7924        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
7925        // Mark them with "*" so the outer scan produces structural projections that
7926        // extract_row_params can resolve.
7927        Expr::Parameter(name) => {
7928            properties
7929                .entry(name.clone())
7930                .or_default()
7931                .insert("*".to_string());
7932        }
7933        // Literals and wildcard don't reference properties
7934        Expr::Literal(_) | Expr::Wildcard => {}
7935    }
7936}
7937
7938/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
7939///
7940/// This is needed so that correlated property accesses like `a.city` inside
7941/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
7942/// scan to include `a.city` in its projected columns.
7943fn collect_properties_from_subquery(
7944    query: &Query,
7945    properties: &mut HashMap<String, HashSet<String>>,
7946) {
7947    match query {
7948        Query::Single(stmt) => {
7949            for clause in &stmt.clauses {
7950                match clause {
7951                    Clause::Match(m) => {
7952                        if let Some(ref wc) = m.where_clause {
7953                            collect_properties_from_expr_into(wc, properties);
7954                        }
7955                    }
7956                    Clause::With(w) => {
7957                        for item in &w.items {
7958                            if let ReturnItem::Expr { expr, .. } = item {
7959                                collect_properties_from_expr_into(expr, properties);
7960                            }
7961                        }
7962                        if let Some(ref wc) = w.where_clause {
7963                            collect_properties_from_expr_into(wc, properties);
7964                        }
7965                    }
7966                    Clause::Return(r) => {
7967                        for item in &r.items {
7968                            if let ReturnItem::Expr { expr, .. } = item {
7969                                collect_properties_from_expr_into(expr, properties);
7970                            }
7971                        }
7972                    }
7973                    _ => {}
7974                }
7975            }
7976        }
7977        Query::Union { left, right, .. } => {
7978            collect_properties_from_subquery(left, properties);
7979            collect_properties_from_subquery(right, properties);
7980        }
7981        _ => {}
7982    }
7983}
7984
7985/// Analyze function calls to extract property requirements for pushdown hydration
7986///
7987/// This function examines function calls and their arguments to determine which properties
7988/// need to be loaded for entity arguments. For example:
7989/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
7990/// - keys(n) -> n needs all properties (*)
7991///
7992/// The extracted requirements are added to the properties map for later use during
7993/// scan planning.
7994fn analyze_function_property_requirements(
7995    name: &str,
7996    args: &[Expr],
7997    properties: &mut HashMap<String, HashSet<String>>,
7998) {
7999    use crate::query::function_props::get_function_spec;
8000
8001    /// Helper to mark a variable as needing all properties.
8002    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
8003        properties
8004            .entry(var.to_string())
8005            .or_default()
8006            .insert("*".to_string());
8007    }
8008
8009    let Some(spec) = get_function_spec(name) else {
8010        // Unknown function: conservatively require all properties for variable args
8011        for arg in args {
8012            if let Expr::Variable(var) = arg {
8013                mark_wildcard(var, properties);
8014            }
8015        }
8016        return;
8017    };
8018
8019    // Extract property names from string literal arguments
8020    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
8021        let entity_arg = args.get(entity_arg_idx);
8022        let prop_arg = args.get(prop_arg_idx);
8023
8024        match (entity_arg, prop_arg) {
8025            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
8026                properties
8027                    .entry(var.clone())
8028                    .or_default()
8029                    .insert(prop.clone());
8030            }
8031            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
8032                // Parameter property name: need all properties
8033                mark_wildcard(var, properties);
8034            }
8035            _ => {}
8036        }
8037    }
8038
8039    // Handle full entity requirement (keys(), properties())
8040    if spec.needs_full_entity {
8041        for &idx in spec.entity_args {
8042            if let Some(Expr::Variable(var)) = args.get(idx) {
8043                mark_wildcard(var, properties);
8044            }
8045        }
8046    }
8047}
8048
8049#[cfg(test)]
8050mod pushdown_tests {
8051    use super::*;
8052
8053    #[test]
8054    fn test_validat_extracts_property_names() {
8055        // validAt(e, 'start', 'end', ts) → e: {start, end}
8056        let mut properties = HashMap::new();
8057
8058        let args = vec![
8059            Expr::Variable("e".to_string()),
8060            Expr::Literal(CypherLiteral::String("start".to_string())),
8061            Expr::Literal(CypherLiteral::String("end".to_string())),
8062            Expr::Variable("ts".to_string()),
8063        ];
8064
8065        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
8066
8067        assert!(properties.contains_key("e"));
8068        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
8069            .iter()
8070            .cloned()
8071            .collect();
8072        assert_eq!(properties.get("e").unwrap(), &e_props);
8073    }
8074
8075    #[test]
8076    fn test_keys_requires_wildcard() {
8077        // keys(n) → n: {*}
8078        let mut properties = HashMap::new();
8079
8080        let args = vec![Expr::Variable("n".to_string())];
8081
8082        analyze_function_property_requirements("keys", &args, &mut properties);
8083
8084        assert!(properties.contains_key("n"));
8085        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8086        assert_eq!(properties.get("n").unwrap(), &n_props);
8087    }
8088
8089    #[test]
8090    fn test_properties_requires_wildcard() {
8091        // properties(n) → n: {*}
8092        let mut properties = HashMap::new();
8093
8094        let args = vec![Expr::Variable("n".to_string())];
8095
8096        analyze_function_property_requirements("properties", &args, &mut properties);
8097
8098        assert!(properties.contains_key("n"));
8099        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8100        assert_eq!(properties.get("n").unwrap(), &n_props);
8101    }
8102
8103    #[test]
8104    fn test_unknown_function_conservative() {
8105        // customUdf(e) → e: {*}
8106        let mut properties = HashMap::new();
8107
8108        let args = vec![Expr::Variable("e".to_string())];
8109
8110        analyze_function_property_requirements("customUdf", &args, &mut properties);
8111
8112        assert!(properties.contains_key("e"));
8113        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8114        assert_eq!(properties.get("e").unwrap(), &e_props);
8115    }
8116
8117    #[test]
8118    fn test_parameter_property_name() {
8119        // validAt(e, $start, $end, ts) → e: {*}
8120        let mut properties = HashMap::new();
8121
8122        let args = vec![
8123            Expr::Variable("e".to_string()),
8124            Expr::Parameter("start".to_string()),
8125            Expr::Parameter("end".to_string()),
8126            Expr::Variable("ts".to_string()),
8127        ];
8128
8129        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
8130
8131        assert!(properties.contains_key("e"));
8132        assert!(properties.get("e").unwrap().contains("*"));
8133    }
8134
8135    #[test]
8136    fn test_validat_expr_extracts_properties() {
8137        // Test Expr::ValidAt variant property extraction
8138        let mut properties = HashMap::new();
8139
8140        let validat_expr = Expr::ValidAt {
8141            entity: Box::new(Expr::Variable("e".to_string())),
8142            timestamp: Box::new(Expr::Variable("ts".to_string())),
8143            start_prop: Some("valid_from".to_string()),
8144            end_prop: Some("valid_to".to_string()),
8145        };
8146
8147        collect_properties_from_expr_into(&validat_expr, &mut properties);
8148
8149        assert!(properties.contains_key("e"));
8150        assert!(properties.get("e").unwrap().contains("valid_from"));
8151        assert!(properties.get("e").unwrap().contains("valid_to"));
8152    }
8153
8154    #[test]
8155    fn test_array_index_requires_wildcard() {
8156        // e[prop] → e: {*}
8157        let mut properties = HashMap::new();
8158
8159        let array_index_expr = Expr::ArrayIndex {
8160            array: Box::new(Expr::Variable("e".to_string())),
8161            index: Box::new(Expr::Variable("prop".to_string())),
8162        };
8163
8164        collect_properties_from_expr_into(&array_index_expr, &mut properties);
8165
8166        assert!(properties.contains_key("e"));
8167        assert!(properties.get("e").unwrap().contains("*"));
8168    }
8169
8170    #[test]
8171    fn test_property_access_extraction() {
8172        // e.name → e: {name}
8173        let mut properties = HashMap::new();
8174
8175        let prop_access = Expr::Property(
8176            Box::new(Expr::Variable("e".to_string())),
8177            "name".to_string(),
8178        );
8179
8180        collect_properties_from_expr_into(&prop_access, &mut properties);
8181
8182        assert!(properties.contains_key("e"));
8183        assert!(properties.get("e").unwrap().contains("name"));
8184    }
8185}