Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::PredicateAnalyzer;
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig,
14    ScalarIndexConfig, ScalarIndexType, Schema, TokenizerConfig, VectorIndexConfig,
15    VectorIndexType,
16};
17use uni_cypher::ast::{
18    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
19    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
20    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
21    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
22    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
23    WithRecursiveClause,
24};
25
26/// Type of variable in scope for semantic validation.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum VariableType {
29    /// Node variable (from MATCH (n), CREATE (n), etc.)
30    Node,
31    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
32    Edge,
33    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
34    Path,
35    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
36    /// Could hold a map or dynamic value — property access is allowed.
37    Scalar,
38    /// Scalar from a known non-graph literal (int, float, bool, string, list).
39    /// Property access is NOT allowed on these at compile time.
40    ScalarLiteral,
41    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
42    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
43    Imported,
44}
45
46impl VariableType {
47    /// Returns true if this type is compatible with the expected type.
48    ///
49    /// `Imported` is always compatible because the actual type is unknown at plan time.
50    fn is_compatible_with(self, expected: VariableType) -> bool {
51        self == expected
52            || self == VariableType::Imported
53            // ScalarLiteral behaves like Scalar for compatibility checks
54            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
55    }
56}
57
58/// Information about a variable in scope during planning.
59#[derive(Debug, Clone)]
60pub struct VariableInfo {
61    /// Variable name as written in the query.
62    pub name: String,
63    /// Semantic type of the variable.
64    pub var_type: VariableType,
65    /// True if this is a variable-length path (VLP) step variable.
66    ///
67    /// VLP step variables are typed as Edge but semantically hold edge lists.
68    pub is_vlp: bool,
69}
70
71impl VariableInfo {
72    pub fn new(name: String, var_type: VariableType) -> Self {
73        Self {
74            name,
75            var_type,
76            is_vlp: false,
77        }
78    }
79}
80
81/// Find a variable in scope by name.
82fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
83    vars.iter().find(|v| v.name == name)
84}
85
86/// Check if a variable is in scope.
87fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
88    find_var_in_scope(vars, name).is_some()
89}
90
91/// Check if an expression contains a pattern predicate.
92fn contains_pattern_predicate(expr: &Expr) -> bool {
93    if matches!(
94        expr,
95        Expr::Exists {
96            from_pattern_predicate: true,
97            ..
98        }
99    ) {
100        return true;
101    }
102    let mut found = false;
103    expr.for_each_child(&mut |child| {
104        if !found {
105            found = contains_pattern_predicate(child);
106        }
107    });
108    found
109}
110
111/// Add a variable to scope with type conflict validation.
112/// Returns an error if the variable already exists with a different type.
113fn add_var_to_scope(
114    vars: &mut Vec<VariableInfo>,
115    name: &str,
116    var_type: VariableType,
117) -> Result<()> {
118    if name.is_empty() {
119        return Ok(());
120    }
121
122    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
123        if existing.var_type == VariableType::Imported {
124            // Imported vars upgrade to the concrete type
125            existing.var_type = var_type;
126        } else if var_type == VariableType::Imported || existing.var_type == var_type {
127            // New type is Imported (keep existing) or same type — no conflict
128        } else if matches!(
129            existing.var_type,
130            VariableType::Scalar | VariableType::ScalarLiteral
131        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
132        {
133            // Scalar can be used as Node/Edge in CREATE context — a scalar
134            // holding a node/edge reference is valid for pattern use
135            existing.var_type = var_type;
136        } else {
137            return Err(anyhow!(
138                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
139                name,
140                existing.var_type,
141                var_type
142            ));
143        }
144    } else {
145        vars.push(VariableInfo::new(name.to_string(), var_type));
146    }
147    Ok(())
148}
149
150/// Convert VariableInfo vec to String vec for backward compatibility
151fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
152    vars.iter().map(|v| v.name.clone()).collect()
153}
154
155fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
156    match expr {
157        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
158            .map(|info| info.var_type)
159            .unwrap_or(VariableType::Scalar),
160        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
161        // Known non-graph literals: property access is NOT valid on these.
162        Expr::Literal(CypherLiteral::Integer(_))
163        | Expr::Literal(CypherLiteral::Float(_))
164        | Expr::Literal(CypherLiteral::String(_))
165        | Expr::Literal(CypherLiteral::Bool(_))
166        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
167        Expr::FunctionCall { name, args, .. } => {
168            let lower = name.to_lowercase();
169            if lower == "coalesce" {
170                infer_coalesce_type(args, vars_in_scope)
171            } else if lower == "collect" && !args.is_empty() {
172                let collected = infer_with_output_type(&args[0], vars_in_scope);
173                if matches!(
174                    collected,
175                    VariableType::Node
176                        | VariableType::Edge
177                        | VariableType::Path
178                        | VariableType::Imported
179                ) {
180                    collected
181                } else {
182                    VariableType::Scalar
183                }
184            } else {
185                VariableType::Scalar
186            }
187        }
188        // WITH list literals/expressions produce scalar list values. Preserving
189        // entity typing here causes invalid node/edge reuse in later MATCH clauses
190        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
191        // Lists are ScalarLiteral since property access is not valid on them.
192        Expr::List(_) => VariableType::ScalarLiteral,
193        _ => VariableType::Scalar,
194    }
195}
196
197fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
198    let mut resolved: Option<VariableType> = None;
199    let mut saw_imported = false;
200    for arg in args {
201        let t = infer_with_output_type(arg, vars_in_scope);
202        match t {
203            VariableType::Node | VariableType::Edge | VariableType::Path => {
204                if let Some(existing) = resolved {
205                    if existing != t {
206                        return VariableType::Scalar;
207                    }
208                } else {
209                    resolved = Some(t);
210                }
211            }
212            VariableType::Imported => saw_imported = true,
213            VariableType::Scalar | VariableType::ScalarLiteral => {}
214        }
215    }
216    if let Some(t) = resolved {
217        t
218    } else if saw_imported {
219        VariableType::Imported
220    } else {
221        VariableType::Scalar
222    }
223}
224
225fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
226    match expr {
227        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
228            .map(|info| info.var_type)
229            .unwrap_or(VariableType::Scalar),
230        Expr::FunctionCall { name, args, .. }
231            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
232        {
233            infer_with_output_type(&args[0], vars_in_scope)
234        }
235        Expr::List(items) => {
236            let mut inferred: Option<VariableType> = None;
237            for item in items {
238                let t = infer_with_output_type(item, vars_in_scope);
239                if !matches!(
240                    t,
241                    VariableType::Node
242                        | VariableType::Edge
243                        | VariableType::Path
244                        | VariableType::Imported
245                ) {
246                    return VariableType::Scalar;
247                }
248                if let Some(existing) = inferred {
249                    if existing != t
250                        && t != VariableType::Imported
251                        && existing != VariableType::Imported
252                    {
253                        return VariableType::Scalar;
254                    }
255                    if existing == VariableType::Imported && t != VariableType::Imported {
256                        inferred = Some(t);
257                    }
258                } else {
259                    inferred = Some(t);
260                }
261            }
262            inferred.unwrap_or(VariableType::Scalar)
263        }
264        _ => VariableType::Scalar,
265    }
266}
267
268/// Collect all variable names referenced in an expression
269fn collect_expr_variables(expr: &Expr) -> Vec<String> {
270    let mut vars = Vec::new();
271    collect_expr_variables_inner(expr, &mut vars);
272    vars
273}
274
275fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
276    let mut add_var = |name: &String| {
277        if !vars.contains(name) {
278            vars.push(name.clone());
279        }
280    };
281
282    match expr {
283        Expr::Variable(name) => add_var(name),
284        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
285        Expr::BinaryOp { left, right, .. } => {
286            collect_expr_variables_inner(left, vars);
287            collect_expr_variables_inner(right, vars);
288        }
289        Expr::UnaryOp { expr: e, .. }
290        | Expr::IsNull(e)
291        | Expr::IsNotNull(e)
292        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
293        Expr::FunctionCall { args, .. } => {
294            for a in args {
295                collect_expr_variables_inner(a, vars);
296            }
297        }
298        Expr::List(items) => {
299            for item in items {
300                collect_expr_variables_inner(item, vars);
301            }
302        }
303        Expr::In { expr: e, list } => {
304            collect_expr_variables_inner(e, vars);
305            collect_expr_variables_inner(list, vars);
306        }
307        Expr::Case {
308            expr: case_expr,
309            when_then,
310            else_expr,
311        } => {
312            if let Some(e) = case_expr {
313                collect_expr_variables_inner(e, vars);
314            }
315            for (w, t) in when_then {
316                collect_expr_variables_inner(w, vars);
317                collect_expr_variables_inner(t, vars);
318            }
319            if let Some(e) = else_expr {
320                collect_expr_variables_inner(e, vars);
321            }
322        }
323        Expr::Map(entries) => {
324            for (_, v) in entries {
325                collect_expr_variables_inner(v, vars);
326            }
327        }
328        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
329        Expr::ArrayIndex { array, index } => {
330            collect_expr_variables_inner(array, vars);
331            collect_expr_variables_inner(index, vars);
332        }
333        Expr::ArraySlice { array, start, end } => {
334            collect_expr_variables_inner(array, vars);
335            if let Some(s) = start {
336                collect_expr_variables_inner(s, vars);
337            }
338            if let Some(e) = end {
339                collect_expr_variables_inner(e, vars);
340            }
341        }
342        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
343        // they introduce local variable bindings not in outer scope.
344        _ => {}
345    }
346}
347
348/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
349///
350/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
351/// before the final RETURN projection without losing alias semantics.
352fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
353    let repr = expr.to_string_repr();
354    if let Some(rewritten) = aliases.get(&repr) {
355        return rewritten.clone();
356    }
357
358    match expr {
359        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
360        Expr::Property(base, prop) => Expr::Property(
361            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
362            prop.clone(),
363        ),
364        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
365            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
366            op: *op,
367            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
368        },
369        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
370            op: *op,
371            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
372        },
373        Expr::FunctionCall {
374            name,
375            args,
376            distinct,
377            window_spec,
378        } => Expr::FunctionCall {
379            name: name.clone(),
380            args: args
381                .iter()
382                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
383                .collect(),
384            distinct: *distinct,
385            window_spec: window_spec.clone(),
386        },
387        Expr::List(items) => Expr::List(
388            items
389                .iter()
390                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
391                .collect(),
392        ),
393        Expr::Map(entries) => Expr::Map(
394            entries
395                .iter()
396                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
397                .collect(),
398        ),
399        Expr::Case {
400            expr: case_expr,
401            when_then,
402            else_expr,
403        } => Expr::Case {
404            expr: case_expr
405                .as_ref()
406                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
407            when_then: when_then
408                .iter()
409                .map(|(w, t)| {
410                    (
411                        rewrite_order_by_expr_with_aliases(w, aliases),
412                        rewrite_order_by_expr_with_aliases(t, aliases),
413                    )
414                })
415                .collect(),
416            else_expr: else_expr
417                .as_ref()
418                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
419        },
420        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
421        // they introduce local variable bindings that could shadow aliases.
422        _ => expr.clone(),
423    }
424}
425
426/// Validate function call argument types.
427/// Returns error if type constraints are violated.
428fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
429    let name_lower = name.to_lowercase();
430
431    // labels() requires Node
432    if name_lower == "labels"
433        && let Some(Expr::Variable(var_name)) = args.first()
434        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
435        && !info.var_type.is_compatible_with(VariableType::Node)
436    {
437        return Err(anyhow!(
438            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
439        ));
440    }
441
442    // type() requires Edge
443    if name_lower == "type"
444        && let Some(Expr::Variable(var_name)) = args.first()
445        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
446        && !info.var_type.is_compatible_with(VariableType::Edge)
447    {
448        return Err(anyhow!(
449            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
450        ));
451    }
452
453    // properties() requires Node/Edge/Map (not scalar literals)
454    if name_lower == "properties"
455        && let Some(arg) = args.first()
456    {
457        match arg {
458            Expr::Literal(CypherLiteral::Integer(_))
459            | Expr::Literal(CypherLiteral::Float(_))
460            | Expr::Literal(CypherLiteral::String(_))
461            | Expr::Literal(CypherLiteral::Bool(_))
462            | Expr::List(_) => {
463                return Err(anyhow!(
464                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
465                ));
466            }
467            Expr::Variable(var_name) => {
468                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
469                    && matches!(
470                        info.var_type,
471                        VariableType::Scalar | VariableType::ScalarLiteral
472                    )
473                {
474                    return Err(anyhow!(
475                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
476                    ));
477                }
478            }
479            _ => {}
480        }
481    }
482
483    // nodes()/relationships() require Path
484    if (name_lower == "nodes" || name_lower == "relationships")
485        && let Some(Expr::Variable(var_name)) = args.first()
486        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
487        && !info.var_type.is_compatible_with(VariableType::Path)
488    {
489        return Err(anyhow!(
490            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
491            name_lower
492        ));
493    }
494
495    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
496    if name_lower == "size"
497        && let Some(Expr::Variable(var_name)) = args.first()
498        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
499        && info.var_type == VariableType::Path
500    {
501        return Err(anyhow!(
502            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
503        ));
504    }
505
506    // length()/size() do NOT accept Node or single-Edge arguments.
507    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
508    // but are actually edge lists — size()/length() is valid on those.
509    if (name_lower == "length" || name_lower == "size")
510        && let Some(Expr::Variable(var_name)) = args.first()
511        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
512        && (info.var_type == VariableType::Node
513            || (info.var_type == VariableType::Edge && !info.is_vlp))
514    {
515        return Err(anyhow!(
516            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
517            name_lower
518        ));
519    }
520
521    Ok(())
522}
523
524/// Check if an expression is a non-boolean literal.
525fn is_non_boolean_literal(expr: &Expr) -> bool {
526    matches!(
527        expr,
528        Expr::Literal(CypherLiteral::Integer(_))
529            | Expr::Literal(CypherLiteral::Float(_))
530            | Expr::Literal(CypherLiteral::String(_))
531            | Expr::List(_)
532            | Expr::Map(_)
533    )
534}
535
536/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
537fn validate_boolean_expression(expr: &Expr) -> Result<()> {
538    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
539    if let Expr::BinaryOp { left, op, right } = expr
540        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
541    {
542        let op_name = format!("{op:?}").to_uppercase();
543        for operand in [left.as_ref(), right.as_ref()] {
544            if is_non_boolean_literal(operand) {
545                return Err(anyhow!(
546                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
547                    op_name
548                ));
549            }
550        }
551    }
552    if let Expr::UnaryOp {
553        op: uni_cypher::ast::UnaryOp::Not,
554        expr: inner,
555    } = expr
556        && is_non_boolean_literal(inner)
557    {
558        return Err(anyhow!(
559            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
560        ));
561    }
562    let mut result = Ok(());
563    expr.for_each_child(&mut |child| {
564        if result.is_ok() {
565            result = validate_boolean_expression(child);
566        }
567    });
568    result
569}
570
571/// Validate that all variables used in an expression are in scope.
572fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
573    let used_vars = collect_expr_variables(expr);
574    for var in used_vars {
575        if !is_var_in_scope(vars_in_scope, &var) {
576            return Err(anyhow!(
577                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
578                var
579            ));
580        }
581    }
582    Ok(())
583}
584
585/// Check if a function name (lowercase) is an aggregate function.
586fn is_aggregate_function_name(name: &str) -> bool {
587    matches!(
588        name.to_lowercase().as_str(),
589        "count"
590            | "sum"
591            | "avg"
592            | "min"
593            | "max"
594            | "collect"
595            | "stdev"
596            | "stdevp"
597            | "percentiledisc"
598            | "percentilecont"
599    )
600}
601
602/// Returns true if the expression is a window function (FunctionCall with window_spec).
603fn is_window_function(expr: &Expr) -> bool {
604    matches!(
605        expr,
606        Expr::FunctionCall {
607            window_spec: Some(_),
608            ..
609        }
610    )
611}
612
613/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
614/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
615/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
616/// size() call, BinaryOp, etc.).
617fn is_compound_aggregate(expr: &Expr) -> bool {
618    if !expr.is_aggregate() {
619        return false;
620    }
621    match expr {
622        Expr::FunctionCall {
623            name, window_spec, ..
624        } => {
625            // A bare aggregate FunctionCall is NOT compound
626            if window_spec.is_some() {
627                return true; // window wrapping an aggregate — treat as compound
628            }
629            !is_aggregate_function_name(name)
630        }
631        // Subquery aggregates are "bare" (not compound)
632        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
633        // Everything else (ListComprehension, BinaryOp, etc.) is compound
634        _ => true,
635    }
636}
637
638/// Recursively collect all bare aggregate FunctionCall sub-expressions from
639/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
640/// the outermost aggregate boundaries).
641///
642/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
643/// is searched because the body (`map_expr`, `predicate`, `expr`) references
644/// the loop variable, not outer-scope aggregates.
645fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
646    let mut out = Vec::new();
647    extract_inner_aggregates_rec(expr, &mut out);
648    out
649}
650
651fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
652    match expr {
653        Expr::FunctionCall {
654            name, window_spec, ..
655        } if window_spec.is_none() && is_aggregate_function_name(name) => {
656            // Found a bare aggregate — collect it and stop recursing
657            out.push(expr.clone());
658        }
659        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
660            out.push(expr.clone());
661        }
662        // For list comprehension, only search the `list` source for aggregates
663        Expr::ListComprehension { list, .. } => {
664            extract_inner_aggregates_rec(list, out);
665        }
666        // For quantifier, only search the `list` source
667        Expr::Quantifier { list, .. } => {
668            extract_inner_aggregates_rec(list, out);
669        }
670        // For reduce, search `init` and `list` (not the body `expr`)
671        Expr::Reduce { init, list, .. } => {
672            extract_inner_aggregates_rec(init, out);
673            extract_inner_aggregates_rec(list, out);
674        }
675        // Standard recursive cases
676        Expr::FunctionCall { args, .. } => {
677            for arg in args {
678                extract_inner_aggregates_rec(arg, out);
679            }
680        }
681        Expr::BinaryOp { left, right, .. } => {
682            extract_inner_aggregates_rec(left, out);
683            extract_inner_aggregates_rec(right, out);
684        }
685        Expr::UnaryOp { expr: e, .. }
686        | Expr::IsNull(e)
687        | Expr::IsNotNull(e)
688        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
689        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
690        Expr::List(items) => {
691            for item in items {
692                extract_inner_aggregates_rec(item, out);
693            }
694        }
695        Expr::Case {
696            expr: case_expr,
697            when_then,
698            else_expr,
699        } => {
700            if let Some(e) = case_expr {
701                extract_inner_aggregates_rec(e, out);
702            }
703            for (w, t) in when_then {
704                extract_inner_aggregates_rec(w, out);
705                extract_inner_aggregates_rec(t, out);
706            }
707            if let Some(e) = else_expr {
708                extract_inner_aggregates_rec(e, out);
709            }
710        }
711        Expr::In {
712            expr: in_expr,
713            list,
714        } => {
715            extract_inner_aggregates_rec(in_expr, out);
716            extract_inner_aggregates_rec(list, out);
717        }
718        Expr::ArrayIndex { array, index } => {
719            extract_inner_aggregates_rec(array, out);
720            extract_inner_aggregates_rec(index, out);
721        }
722        Expr::ArraySlice { array, start, end } => {
723            extract_inner_aggregates_rec(array, out);
724            if let Some(s) = start {
725                extract_inner_aggregates_rec(s, out);
726            }
727            if let Some(e) = end {
728                extract_inner_aggregates_rec(e, out);
729            }
730        }
731        Expr::Map(entries) => {
732            for (_, v) in entries {
733                extract_inner_aggregates_rec(v, out);
734            }
735        }
736        _ => {}
737    }
738}
739
740/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
741/// `Expr::Variable(aggregate_column_name(agg))`.
742///
743/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
744/// rewritten (the body references the loop variable, not outer-scope columns).
745fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
746    match expr {
747        Expr::FunctionCall {
748            name, window_spec, ..
749        } if window_spec.is_none() && is_aggregate_function_name(name) => {
750            // Replace bare aggregate with column reference
751            Expr::Variable(aggregate_column_name(expr))
752        }
753        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
754            Expr::Variable(aggregate_column_name(expr))
755        }
756        Expr::ListComprehension {
757            variable,
758            list,
759            where_clause,
760            map_expr,
761        } => Expr::ListComprehension {
762            variable: variable.clone(),
763            list: Box::new(replace_aggregates_with_columns(list)),
764            where_clause: where_clause.clone(), // don't touch — references loop var
765            map_expr: map_expr.clone(),         // don't touch — references loop var
766        },
767        Expr::Quantifier {
768            quantifier,
769            variable,
770            list,
771            predicate,
772        } => Expr::Quantifier {
773            quantifier: *quantifier,
774            variable: variable.clone(),
775            list: Box::new(replace_aggregates_with_columns(list)),
776            predicate: predicate.clone(), // don't touch — references loop var
777        },
778        Expr::Reduce {
779            accumulator,
780            init,
781            variable,
782            list,
783            expr: body,
784        } => Expr::Reduce {
785            accumulator: accumulator.clone(),
786            init: Box::new(replace_aggregates_with_columns(init)),
787            variable: variable.clone(),
788            list: Box::new(replace_aggregates_with_columns(list)),
789            expr: body.clone(), // don't touch — references loop var
790        },
791        Expr::FunctionCall {
792            name,
793            args,
794            distinct,
795            window_spec,
796        } => Expr::FunctionCall {
797            name: name.clone(),
798            args: args.iter().map(replace_aggregates_with_columns).collect(),
799            distinct: *distinct,
800            window_spec: window_spec.clone(),
801        },
802        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
803            left: Box::new(replace_aggregates_with_columns(left)),
804            op: *op,
805            right: Box::new(replace_aggregates_with_columns(right)),
806        },
807        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
808            op: *op,
809            expr: Box::new(replace_aggregates_with_columns(e)),
810        },
811        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
812        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
813        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
814        Expr::Property(base, prop) => Expr::Property(
815            Box::new(replace_aggregates_with_columns(base)),
816            prop.clone(),
817        ),
818        Expr::List(items) => {
819            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
820        }
821        Expr::Case {
822            expr: case_expr,
823            when_then,
824            else_expr,
825        } => Expr::Case {
826            expr: case_expr
827                .as_ref()
828                .map(|e| Box::new(replace_aggregates_with_columns(e))),
829            when_then: when_then
830                .iter()
831                .map(|(w, t)| {
832                    (
833                        replace_aggregates_with_columns(w),
834                        replace_aggregates_with_columns(t),
835                    )
836                })
837                .collect(),
838            else_expr: else_expr
839                .as_ref()
840                .map(|e| Box::new(replace_aggregates_with_columns(e))),
841        },
842        Expr::In {
843            expr: in_expr,
844            list,
845        } => Expr::In {
846            expr: Box::new(replace_aggregates_with_columns(in_expr)),
847            list: Box::new(replace_aggregates_with_columns(list)),
848        },
849        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
850            array: Box::new(replace_aggregates_with_columns(array)),
851            index: Box::new(replace_aggregates_with_columns(index)),
852        },
853        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
854            array: Box::new(replace_aggregates_with_columns(array)),
855            start: start
856                .as_ref()
857                .map(|e| Box::new(replace_aggregates_with_columns(e))),
858            end: end
859                .as_ref()
860                .map(|e| Box::new(replace_aggregates_with_columns(e))),
861        },
862        Expr::Map(entries) => Expr::Map(
863            entries
864                .iter()
865                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
866                .collect(),
867        ),
868        // Leaf expressions — return as-is
869        other => other.clone(),
870    }
871}
872
873/// Check if an expression contains any aggregate function (recursively).
874fn contains_aggregate_recursive(expr: &Expr) -> bool {
875    match expr {
876        Expr::FunctionCall { name, args, .. } => {
877            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
878        }
879        Expr::BinaryOp { left, right, .. } => {
880            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
881        }
882        Expr::UnaryOp { expr: e, .. }
883        | Expr::IsNull(e)
884        | Expr::IsNotNull(e)
885        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
886        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
887        Expr::Case {
888            expr,
889            when_then,
890            else_expr,
891        } => {
892            expr.as_deref().is_some_and(contains_aggregate_recursive)
893                || when_then.iter().any(|(w, t)| {
894                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
895                })
896                || else_expr
897                    .as_deref()
898                    .is_some_and(contains_aggregate_recursive)
899        }
900        Expr::In { expr, list } => {
901            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
902        }
903        Expr::Property(base, _) => contains_aggregate_recursive(base),
904        Expr::ListComprehension { list, .. } => {
905            // Only check the list source — where_clause/map_expr reference the loop variable
906            contains_aggregate_recursive(list)
907        }
908        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
909        Expr::Reduce { init, list, .. } => {
910            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
911        }
912        Expr::ArrayIndex { array, index } => {
913            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
914        }
915        Expr::ArraySlice { array, start, end } => {
916            contains_aggregate_recursive(array)
917                || start.as_deref().is_some_and(contains_aggregate_recursive)
918                || end.as_deref().is_some_and(contains_aggregate_recursive)
919        }
920        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
921        _ => false,
922    }
923}
924
925/// Check if an expression contains a non-deterministic function (e.g. rand()).
926fn contains_non_deterministic(expr: &Expr) -> bool {
927    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
928        return true;
929    }
930    let mut found = false;
931    expr.for_each_child(&mut |child| {
932        if !found {
933            found = contains_non_deterministic(child);
934        }
935    });
936    found
937}
938
939fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
940    match expr {
941        Expr::FunctionCall { name, args, .. } => {
942            if is_aggregate_function_name(name) {
943                out.insert(expr.to_string_repr());
944                return;
945            }
946            for arg in args {
947                collect_aggregate_reprs(arg, out);
948            }
949        }
950        Expr::BinaryOp { left, right, .. } => {
951            collect_aggregate_reprs(left, out);
952            collect_aggregate_reprs(right, out);
953        }
954        Expr::UnaryOp { expr, .. }
955        | Expr::IsNull(expr)
956        | Expr::IsNotNull(expr)
957        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
958        Expr::List(items) => {
959            for item in items {
960                collect_aggregate_reprs(item, out);
961            }
962        }
963        Expr::Case {
964            expr,
965            when_then,
966            else_expr,
967        } => {
968            if let Some(e) = expr {
969                collect_aggregate_reprs(e, out);
970            }
971            for (w, t) in when_then {
972                collect_aggregate_reprs(w, out);
973                collect_aggregate_reprs(t, out);
974            }
975            if let Some(e) = else_expr {
976                collect_aggregate_reprs(e, out);
977            }
978        }
979        Expr::In { expr, list } => {
980            collect_aggregate_reprs(expr, out);
981            collect_aggregate_reprs(list, out);
982        }
983        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
984        Expr::ListComprehension { list, .. } => {
985            collect_aggregate_reprs(list, out);
986        }
987        Expr::Quantifier { list, .. } => {
988            collect_aggregate_reprs(list, out);
989        }
990        Expr::Reduce { init, list, .. } => {
991            collect_aggregate_reprs(init, out);
992            collect_aggregate_reprs(list, out);
993        }
994        Expr::ArrayIndex { array, index } => {
995            collect_aggregate_reprs(array, out);
996            collect_aggregate_reprs(index, out);
997        }
998        Expr::ArraySlice { array, start, end } => {
999            collect_aggregate_reprs(array, out);
1000            if let Some(s) = start {
1001                collect_aggregate_reprs(s, out);
1002            }
1003            if let Some(e) = end {
1004                collect_aggregate_reprs(e, out);
1005            }
1006        }
1007        _ => {}
1008    }
1009}
1010
1011#[derive(Debug, Clone)]
1012enum NonAggregateRef {
1013    Var(String),
1014    Property {
1015        repr: String,
1016        base_var: Option<String>,
1017    },
1018}
1019
1020fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1021    match expr {
1022        Expr::FunctionCall { name, args, .. } => {
1023            if is_aggregate_function_name(name) {
1024                return;
1025            }
1026            for arg in args {
1027                collect_non_aggregate_refs(arg, inside_agg, out);
1028            }
1029        }
1030        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1031        Expr::Property(base, _) if !inside_agg => {
1032            let base_var = if let Expr::Variable(v) = base.as_ref() {
1033                Some(v.clone())
1034            } else {
1035                None
1036            };
1037            out.push(NonAggregateRef::Property {
1038                repr: expr.to_string_repr(),
1039                base_var,
1040            });
1041        }
1042        Expr::BinaryOp { left, right, .. } => {
1043            collect_non_aggregate_refs(left, inside_agg, out);
1044            collect_non_aggregate_refs(right, inside_agg, out);
1045        }
1046        Expr::UnaryOp { expr, .. }
1047        | Expr::IsNull(expr)
1048        | Expr::IsNotNull(expr)
1049        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1050        Expr::List(items) => {
1051            for item in items {
1052                collect_non_aggregate_refs(item, inside_agg, out);
1053            }
1054        }
1055        Expr::Case {
1056            expr,
1057            when_then,
1058            else_expr,
1059        } => {
1060            if let Some(e) = expr {
1061                collect_non_aggregate_refs(e, inside_agg, out);
1062            }
1063            for (w, t) in when_then {
1064                collect_non_aggregate_refs(w, inside_agg, out);
1065                collect_non_aggregate_refs(t, inside_agg, out);
1066            }
1067            if let Some(e) = else_expr {
1068                collect_non_aggregate_refs(e, inside_agg, out);
1069            }
1070        }
1071        Expr::In { expr, list } => {
1072            collect_non_aggregate_refs(expr, inside_agg, out);
1073            collect_non_aggregate_refs(list, inside_agg, out);
1074        }
1075        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1076        // source. The body references the loop variable, not outer-scope vars.
1077        Expr::ListComprehension { list, .. } => {
1078            collect_non_aggregate_refs(list, inside_agg, out);
1079        }
1080        Expr::Quantifier { list, .. } => {
1081            collect_non_aggregate_refs(list, inside_agg, out);
1082        }
1083        Expr::Reduce { init, list, .. } => {
1084            collect_non_aggregate_refs(init, inside_agg, out);
1085            collect_non_aggregate_refs(list, inside_agg, out);
1086        }
1087        _ => {}
1088    }
1089}
1090
1091fn validate_with_order_by_aggregate_item(
1092    expr: &Expr,
1093    projected_aggregate_reprs: &HashSet<String>,
1094    projected_simple_reprs: &HashSet<String>,
1095    projected_aliases: &HashSet<String>,
1096) -> Result<()> {
1097    let mut aggregate_reprs = HashSet::new();
1098    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1099    for agg in aggregate_reprs {
1100        if !projected_aggregate_reprs.contains(&agg) {
1101            return Err(anyhow!(
1102                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1103                agg
1104            ));
1105        }
1106    }
1107
1108    let mut refs = Vec::new();
1109    collect_non_aggregate_refs(expr, false, &mut refs);
1110    refs.retain(|r| match r {
1111        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1112        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1113    });
1114
1115    let mut dedup = HashSet::new();
1116    refs.retain(|r| {
1117        let key = match r {
1118            NonAggregateRef::Var(v) => format!("v:{v}"),
1119            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1120        };
1121        dedup.insert(key)
1122    });
1123
1124    if refs.len() > 1 {
1125        return Err(anyhow!(
1126            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1127        ));
1128    }
1129
1130    if let Some(r) = refs.first() {
1131        return match r {
1132            NonAggregateRef::Var(v) => Err(anyhow!(
1133                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1134                v
1135            )),
1136            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1137                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1138                base_var
1139                    .clone()
1140                    .unwrap_or_else(|| "<property-base>".to_string())
1141            )),
1142        };
1143    }
1144
1145    Ok(())
1146}
1147
1148/// Validate that no aggregation functions appear in WHERE clause.
1149fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1150    if contains_aggregate_recursive(predicate) {
1151        return Err(anyhow!(
1152            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1153        ));
1154    }
1155    Ok(())
1156}
1157
1158#[derive(Debug, Clone, Copy)]
1159enum ConstNumber {
1160    Int(i64),
1161    Float(f64),
1162}
1163
1164impl ConstNumber {
1165    fn to_f64(self) -> f64 {
1166        match self {
1167            Self::Int(v) => v as f64,
1168            Self::Float(v) => v,
1169        }
1170    }
1171}
1172
1173fn eval_const_numeric_expr(
1174    expr: &Expr,
1175    params: &HashMap<String, uni_common::Value>,
1176) -> Result<ConstNumber> {
1177    match expr {
1178        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1179        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1180        Expr::Parameter(name) => match params.get(name) {
1181            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1182            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1183            Some(uni_common::Value::Null) => Err(anyhow!(
1184                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1185                name
1186            )),
1187            Some(other) => Err(anyhow!(
1188                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1189                name,
1190                other
1191            )),
1192            None => Err(anyhow!(
1193                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1194            )),
1195        },
1196        Expr::UnaryOp {
1197            op: uni_cypher::ast::UnaryOp::Neg,
1198            expr,
1199        } => match eval_const_numeric_expr(expr, params)? {
1200            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1201            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1202        },
1203        Expr::BinaryOp { left, op, right } => {
1204            let l = eval_const_numeric_expr(left, params)?;
1205            let r = eval_const_numeric_expr(right, params)?;
1206            match op {
1207                BinaryOp::Add => match (l, r) {
1208                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1209                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1210                },
1211                BinaryOp::Sub => match (l, r) {
1212                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1213                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1214                },
1215                BinaryOp::Mul => match (l, r) {
1216                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1217                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1218                },
1219                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1220                BinaryOp::Mod => match (l, r) {
1221                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1222                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1223                },
1224                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1225                _ => Err(anyhow!(
1226                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1227                )),
1228            }
1229        }
1230        Expr::FunctionCall { name, args, .. } => {
1231            let lower = name.to_lowercase();
1232            match lower.as_str() {
1233                "rand" if args.is_empty() => {
1234                    use rand::Rng;
1235                    let mut rng = rand::thread_rng();
1236                    Ok(ConstNumber::Float(rng.r#gen::<f64>()))
1237                }
1238                "tointeger" | "toint" if args.len() == 1 => {
1239                    match eval_const_numeric_expr(&args[0], params)? {
1240                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1241                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1242                    }
1243                }
1244                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1245                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1246                )),
1247                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1248                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1249                )),
1250                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1251                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1252                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1253                },
1254                _ => Err(anyhow!(
1255                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1256                )),
1257            }
1258        }
1259        _ => Err(anyhow!(
1260            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1261        )),
1262    }
1263}
1264
1265/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1266/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1267fn parse_non_negative_integer(
1268    expr: &Expr,
1269    clause_name: &str,
1270    params: &HashMap<String, uni_common::Value>,
1271) -> Result<Option<usize>> {
1272    let referenced_vars = collect_expr_variables(expr);
1273    if !referenced_vars.is_empty() {
1274        return Err(anyhow!(
1275            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1276            clause_name
1277        ));
1278    }
1279
1280    let value = eval_const_numeric_expr(expr, params)?;
1281    let as_int = match value {
1282        ConstNumber::Int(v) => v,
1283        ConstNumber::Float(v) => {
1284            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1285                return Err(anyhow!(
1286                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1287                    clause_name
1288                ));
1289            }
1290            v as i64
1291        }
1292    };
1293    if as_int < 0 {
1294        return Err(anyhow!(
1295            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1296            clause_name
1297        ));
1298    }
1299    Ok(Some(as_int as usize))
1300}
1301
1302/// Validate that aggregation functions are not nested.
1303fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1304    if let Expr::FunctionCall { name, args, .. } = expr
1305        && is_aggregate_function_name(name)
1306    {
1307        for arg in args {
1308            if contains_aggregate_recursive(arg) {
1309                return Err(anyhow!(
1310                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1311                ));
1312            }
1313            if contains_non_deterministic(arg) {
1314                return Err(anyhow!(
1315                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1316                ));
1317            }
1318        }
1319    }
1320    let mut result = Ok(());
1321    expr.for_each_child(&mut |child| {
1322        if result.is_ok() {
1323            result = validate_no_nested_aggregation(child);
1324        }
1325    });
1326    result
1327}
1328
1329/// Validate that an expression does not access properties or labels of
1330/// deleted entities. `type(r)` on a deleted relationship is allowed per
1331/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1332fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1333    // Check n.prop on a deleted variable
1334    if let Expr::Property(inner, _) = expr
1335        && let Expr::Variable(name) = inner.as_ref()
1336        && deleted_vars.contains(name)
1337    {
1338        return Err(anyhow!(
1339            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1340            name
1341        ));
1342    }
1343    // Check labels(n) or keys(n) on a deleted variable
1344    if let Expr::FunctionCall { name, args, .. } = expr
1345        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1346        && args.len() == 1
1347        && let Expr::Variable(var) = &args[0]
1348        && deleted_vars.contains(var)
1349    {
1350        return Err(anyhow!(
1351            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1352            name.to_lowercase(),
1353            var
1354        ));
1355    }
1356    let mut result = Ok(());
1357    expr.for_each_child(&mut |child| {
1358        if result.is_ok() {
1359            result = validate_no_deleted_entity_access(child, deleted_vars);
1360        }
1361    });
1362    result
1363}
1364
1365/// Validate that all variables referenced in properties are defined,
1366/// either in scope or in the local CREATE variable list.
1367fn validate_property_variables(
1368    properties: &Option<Expr>,
1369    vars_in_scope: &[VariableInfo],
1370    create_vars: &[&str],
1371) -> Result<()> {
1372    if let Some(props) = properties {
1373        for var in collect_expr_variables(props) {
1374            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1375                return Err(anyhow!(
1376                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1377                    var
1378                ));
1379            }
1380        }
1381    }
1382    Ok(())
1383}
1384
1385/// Check that a variable name is not already bound in scope or in the local CREATE list.
1386/// Used to prevent rebinding in CREATE clauses.
1387fn check_not_already_bound(
1388    name: &str,
1389    vars_in_scope: &[VariableInfo],
1390    create_vars: &[&str],
1391) -> Result<()> {
1392    if is_var_in_scope(vars_in_scope, name) {
1393        return Err(anyhow!(
1394            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1395            name
1396        ));
1397    }
1398    if create_vars.contains(&name) {
1399        return Err(anyhow!(
1400            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1401            name
1402        ));
1403    }
1404    Ok(())
1405}
1406
1407fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1408    let mut scope = vars_in_scope.to_vec();
1409
1410    for path in &pattern.paths {
1411        if let Some(path_var) = &path.variable
1412            && !path_var.is_empty()
1413            && !is_var_in_scope(&scope, path_var)
1414        {
1415            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1416        }
1417        for element in &path.elements {
1418            match element {
1419                PatternElement::Node(n) => {
1420                    if let Some(v) = &n.variable
1421                        && !v.is_empty()
1422                        && !is_var_in_scope(&scope, v)
1423                    {
1424                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1425                    }
1426                }
1427                PatternElement::Relationship(r) => {
1428                    if let Some(v) = &r.variable
1429                        && !v.is_empty()
1430                        && !is_var_in_scope(&scope, v)
1431                    {
1432                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1433                    }
1434                }
1435                PatternElement::Parenthesized { .. } => {}
1436            }
1437        }
1438    }
1439
1440    scope
1441}
1442
1443fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1444    match item {
1445        SetItem::Property { expr, value } => {
1446            validate_expression_variables(expr, vars_in_scope)?;
1447            validate_expression(expr, vars_in_scope)?;
1448            validate_expression_variables(value, vars_in_scope)?;
1449            validate_expression(value, vars_in_scope)?;
1450            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1451                return Err(anyhow!(
1452                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1453                ));
1454            }
1455        }
1456        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1457            if !is_var_in_scope(vars_in_scope, variable) {
1458                return Err(anyhow!(
1459                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1460                    variable
1461                ));
1462            }
1463            validate_expression_variables(value, vars_in_scope)?;
1464            validate_expression(value, vars_in_scope)?;
1465            if contains_pattern_predicate(value) {
1466                return Err(anyhow!(
1467                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1468                ));
1469            }
1470        }
1471        SetItem::Labels { variable, .. } => {
1472            if !is_var_in_scope(vars_in_scope, variable) {
1473                return Err(anyhow!(
1474                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1475                    variable
1476                ));
1477            }
1478        }
1479    }
1480
1481    Ok(())
1482}
1483
1484/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1485/// The OpenCypher spec requires all property values in MERGE to be non-null.
1486fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1487    if let Some(Expr::Map(entries)) = properties {
1488        for (key, value) in entries {
1489            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1490                return Err(anyhow!(
1491                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1492                    key
1493                ));
1494            }
1495        }
1496    }
1497    Ok(())
1498}
1499
1500fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1501    for path in &merge_clause.pattern.paths {
1502        for element in &path.elements {
1503            match element {
1504                PatternElement::Node(n) => {
1505                    if let Some(Expr::Parameter(_)) = &n.properties {
1506                        return Err(anyhow!(
1507                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1508                        ));
1509                    }
1510                    reject_null_merge_properties(&n.properties)?;
1511                    // VariableAlreadyBound: reject if a bound variable is used
1512                    // as a standalone MERGE node or introduces new labels/properties.
1513                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1514                    if let Some(variable) = &n.variable
1515                        && !variable.is_empty()
1516                        && is_var_in_scope(vars_in_scope, variable)
1517                    {
1518                        let is_standalone = path.elements.len() == 1;
1519                        let has_new_labels = !n.labels.is_empty();
1520                        let has_new_properties = n.properties.is_some();
1521                        if is_standalone || has_new_labels || has_new_properties {
1522                            return Err(anyhow!(
1523                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1524                                variable
1525                            ));
1526                        }
1527                    }
1528                }
1529                PatternElement::Relationship(r) => {
1530                    if let Some(variable) = &r.variable
1531                        && !variable.is_empty()
1532                        && is_var_in_scope(vars_in_scope, variable)
1533                    {
1534                        return Err(anyhow!(
1535                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1536                            variable
1537                        ));
1538                    }
1539                    if r.types.len() != 1 {
1540                        return Err(anyhow!(
1541                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1542                        ));
1543                    }
1544                    if r.range.is_some() {
1545                        return Err(anyhow!(
1546                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1547                        ));
1548                    }
1549                    if let Some(Expr::Parameter(_)) = &r.properties {
1550                        return Err(anyhow!(
1551                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1552                        ));
1553                    }
1554                    reject_null_merge_properties(&r.properties)?;
1555                }
1556                PatternElement::Parenthesized { .. } => {}
1557            }
1558        }
1559    }
1560
1561    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1562    for item in &merge_clause.on_create {
1563        validate_merge_set_item(item, &merge_scope)?;
1564    }
1565    for item in &merge_clause.on_match {
1566        validate_merge_set_item(item, &merge_scope)?;
1567    }
1568
1569    Ok(())
1570}
1571
1572/// Recursively validate an expression for type errors, undefined variables, etc.
1573fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1574    // Validate boolean operators and nested aggregation first
1575    validate_boolean_expression(expr)?;
1576    validate_no_nested_aggregation(expr)?;
1577
1578    // Helper to validate multiple expressions
1579    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1580        for e in exprs {
1581            validate_expression(e, vars)?;
1582        }
1583        Ok(())
1584    }
1585
1586    match expr {
1587        Expr::FunctionCall { name, args, .. } => {
1588            validate_function_call(name, args, vars_in_scope)?;
1589            validate_all(args, vars_in_scope)
1590        }
1591        Expr::BinaryOp { left, right, .. } => {
1592            validate_expression(left, vars_in_scope)?;
1593            validate_expression(right, vars_in_scope)
1594        }
1595        Expr::UnaryOp { expr: e, .. }
1596        | Expr::IsNull(e)
1597        | Expr::IsNotNull(e)
1598        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1599        Expr::Property(base, prop) => {
1600            if let Expr::Variable(var_name) = base.as_ref()
1601                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1602            {
1603                // Paths don't have properties
1604                if var_info.var_type == VariableType::Path {
1605                    return Err(anyhow!(
1606                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1607                        var_name,
1608                        prop
1609                    ));
1610                }
1611                // Known non-graph literals (int, float, bool, string, list) don't have properties
1612                if var_info.var_type == VariableType::ScalarLiteral {
1613                    return Err(anyhow!(
1614                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1615                    ));
1616                }
1617            }
1618            validate_expression(base, vars_in_scope)
1619        }
1620        Expr::List(items) => validate_all(items, vars_in_scope),
1621        Expr::Case {
1622            expr: case_expr,
1623            when_then,
1624            else_expr,
1625        } => {
1626            if let Some(e) = case_expr {
1627                validate_expression(e, vars_in_scope)?;
1628            }
1629            for (w, t) in when_then {
1630                validate_expression(w, vars_in_scope)?;
1631                validate_expression(t, vars_in_scope)?;
1632            }
1633            if let Some(e) = else_expr {
1634                validate_expression(e, vars_in_scope)?;
1635            }
1636            Ok(())
1637        }
1638        Expr::In { expr: e, list } => {
1639            validate_expression(e, vars_in_scope)?;
1640            validate_expression(list, vars_in_scope)
1641        }
1642        Expr::Exists {
1643            query,
1644            from_pattern_predicate: true,
1645        } => {
1646            // Pattern predicates cannot introduce new named variables.
1647            // Extract named vars from inner MATCH pattern, check each is in scope.
1648            if let Query::Single(stmt) = query.as_ref() {
1649                for clause in &stmt.clauses {
1650                    if let Clause::Match(m) = clause {
1651                        for path in &m.pattern.paths {
1652                            for elem in &path.elements {
1653                                match elem {
1654                                    PatternElement::Node(n) => {
1655                                        if let Some(var) = &n.variable
1656                                            && !is_var_in_scope(vars_in_scope, var)
1657                                        {
1658                                            return Err(anyhow!(
1659                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1660                                                var
1661                                            ));
1662                                        }
1663                                    }
1664                                    PatternElement::Relationship(r) => {
1665                                        if let Some(var) = &r.variable
1666                                            && !is_var_in_scope(vars_in_scope, var)
1667                                        {
1668                                            return Err(anyhow!(
1669                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1670                                                var
1671                                            ));
1672                                        }
1673                                    }
1674                                    _ => {}
1675                                }
1676                            }
1677                        }
1678                    }
1679                }
1680            }
1681            Ok(())
1682        }
1683        _ => Ok(()),
1684    }
1685}
1686
1687/// One step (hop) in a Quantified Path Pattern sub-pattern.
1688///
1689/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1690#[derive(Debug, Clone)]
1691pub struct QppStepInfo {
1692    /// Edge type IDs that this step can traverse.
1693    pub edge_type_ids: Vec<u32>,
1694    /// Traversal direction for this step.
1695    pub direction: Direction,
1696    /// Optional label constraint on the target node.
1697    pub target_label: Option<String>,
1698}
1699
1700/// Logical query plan produced by [`QueryPlanner`].
1701///
1702/// Each variant represents one step in the Cypher execution pipeline.
1703/// Plans are tree-structured — leaf nodes produce rows, intermediate nodes
1704/// transform or join them, and the root node defines the final output.
1705#[derive(Debug, Clone)]
1706pub enum LogicalPlan {
1707    /// UNION / UNION ALL of two sub-plans.
1708    Union {
1709        left: Box<LogicalPlan>,
1710        right: Box<LogicalPlan>,
1711        /// When `true`, duplicate rows are preserved (UNION ALL semantics).
1712        all: bool,
1713    },
1714    /// Scan vertices of a single labeled dataset.
1715    Scan {
1716        label_id: u16,
1717        labels: Vec<String>,
1718        variable: String,
1719        filter: Option<Expr>,
1720        optional: bool,
1721    },
1722    /// Lookup vertices by ext_id using the main vertices table.
1723    /// Used when a query references ext_id without specifying a label.
1724    ExtIdLookup {
1725        variable: String,
1726        ext_id: String,
1727        filter: Option<Expr>,
1728        optional: bool,
1729    },
1730    /// Scan all vertices from main table (MATCH (n) without label).
1731    /// Used for schemaless queries that don't specify any label.
1732    ScanAll {
1733        variable: String,
1734        filter: Option<Expr>,
1735        optional: bool,
1736    },
1737    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1738    /// Used for labels not defined in schema (schemaless support).
1739    /// Scan main vertices table by label name(s) for schemaless support.
1740    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1741    ScanMainByLabels {
1742        labels: Vec<String>,
1743        variable: String,
1744        filter: Option<Expr>,
1745        optional: bool,
1746    },
1747    /// Produces exactly one empty row (used to bootstrap pipelines with no source).
1748    Empty,
1749    /// UNWIND: expand a list expression into one row per element.
1750    Unwind {
1751        input: Box<LogicalPlan>,
1752        expr: Expr,
1753        variable: String,
1754    },
1755    Traverse {
1756        input: Box<LogicalPlan>,
1757        edge_type_ids: Vec<u32>,
1758        direction: Direction,
1759        source_variable: String,
1760        target_variable: String,
1761        target_label_id: u16,
1762        step_variable: Option<String>,
1763        min_hops: usize,
1764        max_hops: usize,
1765        optional: bool,
1766        target_filter: Option<Expr>,
1767        path_variable: Option<String>,
1768        edge_properties: HashSet<String>,
1769        /// Whether this is a variable-length pattern (has `*` range specifier).
1770        /// When true, step_variable holds a list of edges (even for *1..1).
1771        is_variable_length: bool,
1772        /// All variables from this OPTIONAL MATCH pattern.
1773        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1774        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1775        optional_pattern_vars: HashSet<String>,
1776        /// Variable names (node + edge) from the current MATCH clause scope.
1777        /// Used for relationship uniqueness scoping: only edge ID columns whose
1778        /// associated variable is in this set participate in uniqueness filtering.
1779        /// Variables from previous disconnected MATCH clauses are excluded.
1780        scope_match_variables: HashSet<String>,
1781        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1782        edge_filter_expr: Option<Expr>,
1783        /// Path traversal semantics (Trail by default for OpenCypher).
1784        path_mode: crate::query::df_graph::nfa::PathMode,
1785        /// QPP steps for multi-hop quantified path patterns.
1786        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1787        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1788        qpp_steps: Option<Vec<QppStepInfo>>,
1789    },
1790    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1791    /// Used for edge types not defined in schema (schemaless support).
1792    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1793    TraverseMainByType {
1794        type_names: Vec<String>,
1795        input: Box<LogicalPlan>,
1796        direction: Direction,
1797        source_variable: String,
1798        target_variable: String,
1799        step_variable: Option<String>,
1800        min_hops: usize,
1801        max_hops: usize,
1802        optional: bool,
1803        target_filter: Option<Expr>,
1804        path_variable: Option<String>,
1805        /// Whether this is a variable-length pattern (has `*` range specifier).
1806        /// When true, step_variable holds a list of edges (even for *1..1).
1807        is_variable_length: bool,
1808        /// All variables from this OPTIONAL MATCH pattern.
1809        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1810        optional_pattern_vars: HashSet<String>,
1811        /// Variables belonging to the current MATCH clause scope.
1812        /// Used for relationship uniqueness scoping: only edge columns whose
1813        /// associated variable is in this set participate in uniqueness filtering.
1814        scope_match_variables: HashSet<String>,
1815        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1816        edge_filter_expr: Option<Expr>,
1817        /// Path traversal semantics (Trail by default for OpenCypher).
1818        path_mode: crate::query::df_graph::nfa::PathMode,
1819    },
1820    Filter {
1821        input: Box<LogicalPlan>,
1822        predicate: Expr,
1823        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1824        /// When evaluating the filter, if any of these variables are NULL,
1825        /// the row is preserved regardless of the predicate result.
1826        optional_variables: HashSet<String>,
1827    },
1828    Create {
1829        input: Box<LogicalPlan>,
1830        pattern: Pattern,
1831    },
1832    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1833    ///
1834    /// This variant combines multiple CREATE patterns into a single plan node
1835    /// to avoid deep recursion when executing many CREATEs sequentially.
1836    CreateBatch {
1837        input: Box<LogicalPlan>,
1838        patterns: Vec<Pattern>,
1839    },
1840    Merge {
1841        input: Box<LogicalPlan>,
1842        pattern: Pattern,
1843        on_match: Option<SetClause>,
1844        on_create: Option<SetClause>,
1845    },
1846    Set {
1847        input: Box<LogicalPlan>,
1848        items: Vec<SetItem>,
1849    },
1850    Remove {
1851        input: Box<LogicalPlan>,
1852        items: Vec<RemoveItem>,
1853    },
1854    Delete {
1855        input: Box<LogicalPlan>,
1856        items: Vec<Expr>,
1857        detach: bool,
1858    },
1859    /// FOREACH (variable IN list | clauses)
1860    Foreach {
1861        input: Box<LogicalPlan>,
1862        variable: String,
1863        list: Expr,
1864        body: Vec<LogicalPlan>,
1865    },
1866    Sort {
1867        input: Box<LogicalPlan>,
1868        order_by: Vec<SortItem>,
1869    },
1870    Limit {
1871        input: Box<LogicalPlan>,
1872        skip: Option<usize>,
1873        fetch: Option<usize>,
1874    },
1875    Aggregate {
1876        input: Box<LogicalPlan>,
1877        group_by: Vec<Expr>,
1878        aggregates: Vec<Expr>,
1879    },
1880    Distinct {
1881        input: Box<LogicalPlan>,
1882    },
1883    Window {
1884        input: Box<LogicalPlan>,
1885        window_exprs: Vec<Expr>,
1886    },
1887    Project {
1888        input: Box<LogicalPlan>,
1889        projections: Vec<(Expr, Option<String>)>,
1890    },
1891    CrossJoin {
1892        left: Box<LogicalPlan>,
1893        right: Box<LogicalPlan>,
1894    },
1895    Apply {
1896        input: Box<LogicalPlan>,
1897        subquery: Box<LogicalPlan>,
1898        input_filter: Option<Expr>,
1899    },
1900    RecursiveCTE {
1901        cte_name: String,
1902        initial: Box<LogicalPlan>,
1903        recursive: Box<LogicalPlan>,
1904    },
1905    ProcedureCall {
1906        procedure_name: String,
1907        arguments: Vec<Expr>,
1908        yield_items: Vec<(String, Option<String>)>,
1909    },
1910    SubqueryCall {
1911        input: Box<LogicalPlan>,
1912        subquery: Box<LogicalPlan>,
1913    },
1914    VectorKnn {
1915        label_id: u16,
1916        variable: String,
1917        property: String,
1918        query: Expr,
1919        k: usize,
1920        threshold: Option<f32>,
1921    },
1922    InvertedIndexLookup {
1923        label_id: u16,
1924        variable: String,
1925        property: String,
1926        terms: Expr,
1927    },
1928    ShortestPath {
1929        input: Box<LogicalPlan>,
1930        edge_type_ids: Vec<u32>,
1931        direction: Direction,
1932        source_variable: String,
1933        target_variable: String,
1934        target_label_id: u16,
1935        path_variable: String,
1936        /// Minimum number of hops (edges) in the path. Default is 1.
1937        min_hops: u32,
1938        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
1939        max_hops: u32,
1940    },
1941    /// allShortestPaths() - Returns all paths with minimum length
1942    AllShortestPaths {
1943        input: Box<LogicalPlan>,
1944        edge_type_ids: Vec<u32>,
1945        direction: Direction,
1946        source_variable: String,
1947        target_variable: String,
1948        target_label_id: u16,
1949        path_variable: String,
1950        /// Minimum number of hops (edges) in the path. Default is 1.
1951        min_hops: u32,
1952        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
1953        max_hops: u32,
1954    },
1955    QuantifiedPattern {
1956        input: Box<LogicalPlan>,
1957        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
1958        min_iterations: u32,
1959        max_iterations: u32,
1960        path_variable: Option<String>,
1961        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
1962        binding_variable: String, // Output variable of iteration (e.g. 'b')
1963    },
1964    // DDL Plans
1965    CreateVectorIndex {
1966        config: VectorIndexConfig,
1967        if_not_exists: bool,
1968    },
1969    CreateFullTextIndex {
1970        config: FullTextIndexConfig,
1971        if_not_exists: bool,
1972    },
1973    CreateScalarIndex {
1974        config: ScalarIndexConfig,
1975        if_not_exists: bool,
1976    },
1977    CreateJsonFtsIndex {
1978        config: JsonFtsIndexConfig,
1979        if_not_exists: bool,
1980    },
1981    DropIndex {
1982        name: String,
1983        if_exists: bool,
1984    },
1985    ShowIndexes {
1986        filter: Option<String>,
1987    },
1988    Copy {
1989        target: String,
1990        source: String,
1991        is_export: bool,
1992        options: HashMap<String, Value>,
1993    },
1994    Backup {
1995        destination: String,
1996        options: HashMap<String, Value>,
1997    },
1998    Explain {
1999        plan: Box<LogicalPlan>,
2000    },
2001    // Admin Plans
2002    ShowDatabase,
2003    ShowConfig,
2004    ShowStatistics,
2005    Vacuum,
2006    Checkpoint,
2007    CopyTo {
2008        label: String,
2009        path: String,
2010        format: String,
2011        options: HashMap<String, Value>,
2012    },
2013    CopyFrom {
2014        label: String,
2015        path: String,
2016        format: String,
2017        options: HashMap<String, Value>,
2018    },
2019    // Schema DDL
2020    CreateLabel(CreateLabel),
2021    CreateEdgeType(CreateEdgeType),
2022    AlterLabel(AlterLabel),
2023    AlterEdgeType(AlterEdgeType),
2024    DropLabel(DropLabel),
2025    DropEdgeType(DropEdgeType),
2026    // Constraints
2027    CreateConstraint(CreateConstraint),
2028    DropConstraint(DropConstraint),
2029    ShowConstraints(ShowConstraints),
2030    /// Bind a zero-length path (single node pattern with path variable).
2031    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2032    BindZeroLengthPath {
2033        input: Box<LogicalPlan>,
2034        node_variable: String,
2035        path_variable: String,
2036    },
2037    /// Bind a fixed-length path from already-computed node and edge columns.
2038    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2039    BindPath {
2040        input: Box<LogicalPlan>,
2041        node_variables: Vec<String>,
2042        edge_variables: Vec<String>,
2043        path_variable: String,
2044    },
2045
2046    // ── Locy variants ──────────────────────────────────────────
2047    /// Top-level Locy program: stratified rules + commands.
2048    LocyProgram {
2049        strata: Vec<super::planner_locy_types::LocyStratum>,
2050        commands: Vec<super::planner_locy_types::LocyCommand>,
2051        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2052        max_iterations: usize,
2053        timeout: std::time::Duration,
2054        max_derived_bytes: usize,
2055        deterministic_best_by: bool,
2056        strict_probability_domain: bool,
2057        probability_epsilon: f64,
2058        exact_probability: bool,
2059        max_bdd_variables: usize,
2060        top_k_proofs: usize,
2061    },
2062    /// FOLD operator: lattice-join non-key columns per KEY group.
2063    LocyFold {
2064        input: Box<LogicalPlan>,
2065        key_columns: Vec<String>,
2066        fold_bindings: Vec<(String, Expr)>,
2067        strict_probability_domain: bool,
2068        probability_epsilon: f64,
2069    },
2070    /// BEST BY operator: select best row per KEY group by ordered criteria.
2071    LocyBestBy {
2072        input: Box<LogicalPlan>,
2073        key_columns: Vec<String>,
2074        /// (expression, ascending) pairs.
2075        criteria: Vec<(Expr, bool)>,
2076    },
2077    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2078    LocyPriority {
2079        input: Box<LogicalPlan>,
2080        key_columns: Vec<String>,
2081    },
2082    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2083    LocyDerivedScan {
2084        scan_index: usize,
2085        data: Arc<RwLock<Vec<RecordBatch>>>,
2086        schema: SchemaRef,
2087    },
2088    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2089    /// without carrying through helper/property columns like the regular Project.
2090    LocyProject {
2091        input: Box<LogicalPlan>,
2092        projections: Vec<(Expr, Option<String>)>,
2093        /// Expected output Arrow type per projection (for CAST support).
2094        target_types: Vec<DataType>,
2095    },
2096}
2097
2098/// Extracted vector similarity predicate info for optimization
2099struct VectorSimilarityPredicate {
2100    variable: String,
2101    property: String,
2102    query: Expr,
2103    threshold: Option<f32>,
2104}
2105
2106/// Result of extracting vector_similarity from a predicate
2107struct VectorSimilarityExtraction {
2108    /// The extracted vector similarity predicate
2109    predicate: VectorSimilarityPredicate,
2110    /// Remaining predicates that couldn't be optimized (if any)
2111    residual: Option<Expr>,
2112}
2113
2114/// Try to extract a vector_similarity predicate from an expression.
2115/// Matches patterns like:
2116/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2117/// - n.embedding ~= $query
2118///
2119/// Also handles AND predicates.
2120fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2121    match expr {
2122        Expr::BinaryOp { left, op, right } => {
2123            // Handle AND: check both sides for vector_similarity
2124            if matches!(op, BinaryOp::And) {
2125                // Try left side first
2126                if let Some(vs) = extract_simple_vector_similarity(left) {
2127                    return Some(VectorSimilarityExtraction {
2128                        predicate: vs,
2129                        residual: Some(right.as_ref().clone()),
2130                    });
2131                }
2132                // Try right side
2133                if let Some(vs) = extract_simple_vector_similarity(right) {
2134                    return Some(VectorSimilarityExtraction {
2135                        predicate: vs,
2136                        residual: Some(left.as_ref().clone()),
2137                    });
2138                }
2139                // Recursively check within left/right for nested ANDs
2140                if let Some(mut extraction) = extract_vector_similarity(left) {
2141                    extraction.residual = Some(combine_with_and(
2142                        extraction.residual,
2143                        right.as_ref().clone(),
2144                    ));
2145                    return Some(extraction);
2146                }
2147                if let Some(mut extraction) = extract_vector_similarity(right) {
2148                    extraction.residual =
2149                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2150                    return Some(extraction);
2151                }
2152                return None;
2153            }
2154
2155            // Simple case: direct vector_similarity comparison
2156            if let Some(vs) = extract_simple_vector_similarity(expr) {
2157                return Some(VectorSimilarityExtraction {
2158                    predicate: vs,
2159                    residual: None,
2160                });
2161            }
2162            None
2163        }
2164        _ => None,
2165    }
2166}
2167
2168/// Helper to combine an optional expression with another using AND
2169fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2170    match opt_expr {
2171        Some(e) => Expr::BinaryOp {
2172            left: Box::new(e),
2173            op: BinaryOp::And,
2174            right: Box::new(other),
2175        },
2176        None => other,
2177    }
2178}
2179
2180/// Extract a simple vector_similarity comparison (no AND)
2181fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2182    match expr {
2183        Expr::BinaryOp { left, op, right } => {
2184            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2185            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2186                && let (Some(vs), Some(thresh)) = (
2187                    extract_vector_similarity_call(left),
2188                    extract_float_literal(right),
2189                )
2190            {
2191                return Some(VectorSimilarityPredicate {
2192                    variable: vs.0,
2193                    property: vs.1,
2194                    query: vs.2,
2195                    threshold: Some(thresh),
2196                });
2197            }
2198            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2199            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2200                && let (Some(thresh), Some(vs)) = (
2201                    extract_float_literal(left),
2202                    extract_vector_similarity_call(right),
2203                )
2204            {
2205                return Some(VectorSimilarityPredicate {
2206                    variable: vs.0,
2207                    property: vs.1,
2208                    query: vs.2,
2209                    threshold: Some(thresh),
2210                });
2211            }
2212            // Pattern: n.embedding ~= query
2213            if matches!(op, BinaryOp::ApproxEq)
2214                && let Expr::Property(var_expr, prop) = left.as_ref()
2215                && let Expr::Variable(var) = var_expr.as_ref()
2216            {
2217                return Some(VectorSimilarityPredicate {
2218                    variable: var.clone(),
2219                    property: prop.clone(),
2220                    query: right.as_ref().clone(),
2221                    threshold: None,
2222                });
2223            }
2224            None
2225        }
2226        _ => None,
2227    }
2228}
2229
2230/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2231fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2232    if let Expr::FunctionCall { name, args, .. } = expr
2233        && name.eq_ignore_ascii_case("vector_similarity")
2234        && args.len() == 2
2235    {
2236        // First arg should be Property(Identifier(var), prop)
2237        if let Expr::Property(var_expr, prop) = &args[0]
2238            && let Expr::Variable(var) = var_expr.as_ref()
2239        {
2240            // Second arg is query
2241            return Some((var.clone(), prop.clone(), args[1].clone()));
2242        }
2243    }
2244    None
2245}
2246
2247/// Extract a float value from a literal expression
2248fn extract_float_literal(expr: &Expr) -> Option<f32> {
2249    match expr {
2250        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2251        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2252        _ => None,
2253    }
2254}
2255
2256/// Translates a parsed Cypher AST into a [`LogicalPlan`].
2257///
2258/// `QueryPlanner` applies semantic validation (variable scoping, label
2259/// resolution, type checking) and produces a plan tree that the executor
2260/// can run against storage.
2261#[derive(Debug)]
2262pub struct QueryPlanner {
2263    schema: Arc<Schema>,
2264    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2265    gen_expr_cache: HashMap<(String, String), Expr>,
2266    /// Counter for generating unique anonymous variable names.
2267    anon_counter: std::sync::atomic::AtomicUsize,
2268    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2269    params: HashMap<String, uni_common::Value>,
2270}
2271
2272struct TraverseParams<'a> {
2273    rel: &'a RelationshipPattern,
2274    target_node: &'a NodePattern,
2275    optional: bool,
2276    path_variable: Option<String>,
2277    /// All variables from this OPTIONAL MATCH pattern.
2278    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2279    optional_pattern_vars: HashSet<String>,
2280}
2281
2282impl QueryPlanner {
2283    /// Create a new planner for the given schema.
2284    ///
2285    /// Pre-parses all generation expressions defined in the schema so that
2286    /// repeated plan calls avoid redundant parsing.
2287    pub fn new(schema: Arc<Schema>) -> Self {
2288        // Pre-parse all generation expressions for caching
2289        let mut gen_expr_cache = HashMap::new();
2290        for (label, props) in &schema.properties {
2291            for (gen_col, meta) in props {
2292                if let Some(expr_str) = &meta.generation_expression
2293                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2294                {
2295                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2296                }
2297            }
2298        }
2299        Self {
2300            schema,
2301            gen_expr_cache,
2302            anon_counter: std::sync::atomic::AtomicUsize::new(0),
2303            params: HashMap::new(),
2304        }
2305    }
2306
2307    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2308    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2309        self.params = params;
2310        self
2311    }
2312
2313    /// Plan a Cypher query with no pre-bound variables.
2314    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2315        self.plan_with_scope(query, Vec::new())
2316    }
2317
2318    /// Plan a Cypher query with a set of externally pre-bound variable names.
2319    ///
2320    /// `vars` lists variable names already in scope before this query executes
2321    /// (e.g., from an enclosing Locy rule body).
2322    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2323        // Apply query rewrites before planning
2324        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2325        if Self::has_mixed_union_modes(&rewritten_query) {
2326            return Err(anyhow!(
2327                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2328            ));
2329        }
2330
2331        match rewritten_query {
2332            Query::Single(stmt) => self.plan_single(stmt, vars),
2333            Query::Union { left, right, all } => {
2334                let l = self.plan_with_scope(*left, vars.clone())?;
2335                let r = self.plan_with_scope(*right, vars)?;
2336
2337                // Validate that both sides have the same column names
2338                let left_cols = Self::extract_projection_columns(&l);
2339                let right_cols = Self::extract_projection_columns(&r);
2340
2341                if left_cols != right_cols {
2342                    return Err(anyhow!(
2343                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2344                    ));
2345                }
2346
2347                Ok(LogicalPlan::Union {
2348                    left: Box::new(l),
2349                    right: Box::new(r),
2350                    all,
2351                })
2352            }
2353            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2354            Query::Explain(inner) => {
2355                let inner_plan = self.plan_with_scope(*inner, vars)?;
2356                Ok(LogicalPlan::Explain {
2357                    plan: Box::new(inner_plan),
2358                })
2359            }
2360            Query::TimeTravel { .. } => {
2361                unreachable!("TimeTravel should be resolved at API layer before planning")
2362            }
2363        }
2364    }
2365
2366    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2367        match query {
2368            Query::Union { left, right, all } => {
2369                out.insert(*all);
2370                Self::collect_union_modes(left, out);
2371                Self::collect_union_modes(right, out);
2372            }
2373            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2374            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2375            Query::Single(_) | Query::Schema(_) => {}
2376        }
2377    }
2378
2379    fn has_mixed_union_modes(query: &Query) -> bool {
2380        let mut modes = HashSet::new();
2381        Self::collect_union_modes(query, &mut modes);
2382        modes.len() > 1
2383    }
2384
2385    fn next_anon_var(&self) -> String {
2386        let id = self
2387            .anon_counter
2388            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2389        format!("_anon_{}", id)
2390    }
2391
2392    /// Extract projection column names from a logical plan.
2393    /// Used for UNION column validation.
2394    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2395        match plan {
2396            LogicalPlan::Project { projections, .. } => projections
2397                .iter()
2398                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2399                .collect(),
2400            LogicalPlan::Limit { input, .. }
2401            | LogicalPlan::Sort { input, .. }
2402            | LogicalPlan::Distinct { input, .. }
2403            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2404            LogicalPlan::Union { left, right, .. } => {
2405                let left_cols = Self::extract_projection_columns(left);
2406                if left_cols.is_empty() {
2407                    Self::extract_projection_columns(right)
2408                } else {
2409                    left_cols
2410                }
2411            }
2412            LogicalPlan::Aggregate {
2413                group_by,
2414                aggregates,
2415                ..
2416            } => {
2417                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2418                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2419                cols
2420            }
2421            _ => Vec::new(),
2422        }
2423    }
2424
2425    fn plan_return_clause(
2426        &self,
2427        return_clause: &ReturnClause,
2428        plan: LogicalPlan,
2429        vars_in_scope: &[VariableInfo],
2430    ) -> Result<LogicalPlan> {
2431        let mut plan = plan;
2432        let mut group_by = Vec::new();
2433        let mut aggregates = Vec::new();
2434        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2435        let mut has_agg = false;
2436        let mut projections = Vec::new();
2437        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2438        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2439        let mut projected_aliases: HashSet<String> = HashSet::new();
2440
2441        for item in &return_clause.items {
2442            match item {
2443                ReturnItem::All => {
2444                    // RETURN * - add all user-named variables in scope
2445                    // (anonymous variables like _anon_0 are excluded)
2446                    let user_vars: Vec<_> = vars_in_scope
2447                        .iter()
2448                        .filter(|v| !v.name.starts_with("_anon_"))
2449                        .collect();
2450                    if user_vars.is_empty() {
2451                        return Err(anyhow!(
2452                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2453                        ));
2454                    }
2455                    for v in user_vars {
2456                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2457                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2458                            group_by.push(Expr::Variable(v.name.clone()));
2459                        }
2460                        projected_aliases.insert(v.name.clone());
2461                        projected_simple_reprs.insert(v.name.clone());
2462                    }
2463                }
2464                ReturnItem::Expr {
2465                    expr,
2466                    alias,
2467                    source_text,
2468                } => {
2469                    if matches!(expr, Expr::Wildcard) {
2470                        for v in vars_in_scope {
2471                            projections
2472                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2473                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2474                                group_by.push(Expr::Variable(v.name.clone()));
2475                            }
2476                            projected_aliases.insert(v.name.clone());
2477                            projected_simple_reprs.insert(v.name.clone());
2478                        }
2479                    } else {
2480                        // Validate expression variables are defined
2481                        validate_expression_variables(expr, vars_in_scope)?;
2482                        // Validate function argument types and boolean operators
2483                        validate_expression(expr, vars_in_scope)?;
2484                        // Pattern predicates are not allowed in RETURN
2485                        if contains_pattern_predicate(expr) {
2486                            return Err(anyhow!(
2487                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2488                            ));
2489                        }
2490
2491                        // Use source text as column name when no explicit alias
2492                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2493                        projections.push((expr.clone(), effective_alias));
2494                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2495                            // Bare aggregate — push directly
2496                            has_agg = true;
2497                            aggregates.push(expr.clone());
2498                            projected_aggregate_reprs.insert(expr.to_string_repr());
2499                        } else if !is_window_function(expr)
2500                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2501                        {
2502                            // Compound aggregate or expression containing aggregates —
2503                            // extract the inner bare aggregates for the Aggregate node
2504                            has_agg = true;
2505                            compound_agg_exprs.push(expr.clone());
2506                            for inner in extract_inner_aggregates(expr) {
2507                                let repr = inner.to_string_repr();
2508                                if !projected_aggregate_reprs.contains(&repr) {
2509                                    aggregates.push(inner);
2510                                    projected_aggregate_reprs.insert(repr);
2511                                }
2512                            }
2513                        } else if !group_by.contains(expr) {
2514                            group_by.push(expr.clone());
2515                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
2516                                projected_simple_reprs.insert(expr.to_string_repr());
2517                            }
2518                        }
2519
2520                        if let Some(a) = alias {
2521                            if projected_aliases.contains(a) {
2522                                return Err(anyhow!(
2523                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2524                                    a
2525                                ));
2526                            }
2527                            projected_aliases.insert(a.clone());
2528                        } else if let Expr::Variable(v) = expr {
2529                            if projected_aliases.contains(v) {
2530                                return Err(anyhow!(
2531                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2532                                    v
2533                                ));
2534                            }
2535                            projected_aliases.insert(v.clone());
2536                        }
2537                    }
2538                }
2539            }
2540        }
2541
2542        // Validate compound aggregate expressions: non-aggregate refs must be
2543        // individually present in the group_by as simple variables or properties.
2544        if has_agg {
2545            let group_by_reprs: HashSet<String> =
2546                group_by.iter().map(|e| e.to_string_repr()).collect();
2547            for expr in &compound_agg_exprs {
2548                let mut refs = Vec::new();
2549                collect_non_aggregate_refs(expr, false, &mut refs);
2550                for r in &refs {
2551                    let is_covered = match r {
2552                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
2553                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
2554                    };
2555                    if !is_covered {
2556                        return Err(anyhow!(
2557                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
2558                        ));
2559                    }
2560                }
2561            }
2562        }
2563
2564        if has_agg {
2565            plan = LogicalPlan::Aggregate {
2566                input: Box::new(plan),
2567                group_by,
2568                aggregates,
2569            };
2570        }
2571
2572        let mut window_exprs = Vec::new();
2573        for (expr, _) in &projections {
2574            Self::collect_window_functions(expr, &mut window_exprs);
2575        }
2576
2577        if let Some(order_by) = &return_clause.order_by {
2578            for item in order_by {
2579                Self::collect_window_functions(&item.expr, &mut window_exprs);
2580            }
2581        }
2582
2583        let has_window_exprs = !window_exprs.is_empty();
2584
2585        if has_window_exprs {
2586            // Before creating the Window node, we need to ensure all properties
2587            // referenced by window functions are available. Create a Project node
2588            // that loads these properties.
2589            let mut props_needed_for_window: Vec<Expr> = Vec::new();
2590            for window_expr in &window_exprs {
2591                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
2592            }
2593
2594            // Also include non-window expressions from projections that might be needed
2595            // Preserve qualified names (e.g., "e.salary") as aliases for properties
2596            let non_window_projections: Vec<_> = projections
2597                .iter()
2598                .filter_map(|(expr, alias)| {
2599                    // Keep expressions that don't have window_spec
2600                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
2601                        window_spec.is_none()
2602                    } else {
2603                        true
2604                    };
2605
2606                    if keep {
2607                        // For property references, use the qualified name as alias
2608                        let new_alias = if matches!(expr, Expr::Property(..)) {
2609                            Some(expr.to_string_repr())
2610                        } else {
2611                            alias.clone()
2612                        };
2613                        Some((expr.clone(), new_alias))
2614                    } else {
2615                        None
2616                    }
2617                })
2618                .collect();
2619
2620            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
2621                let mut intermediate_projections = non_window_projections;
2622                // Add any additional property references needed by window functions
2623                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
2624                for prop in &props_needed_for_window {
2625                    if !intermediate_projections
2626                        .iter()
2627                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
2628                    {
2629                        let qualified_name = prop.to_string_repr();
2630                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
2631                    }
2632                }
2633
2634                if !intermediate_projections.is_empty() {
2635                    plan = LogicalPlan::Project {
2636                        input: Box::new(plan),
2637                        projections: intermediate_projections,
2638                    };
2639                }
2640            }
2641
2642            // Transform property expressions in window functions to use qualified variable names
2643            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
2644            let transformed_window_exprs: Vec<Expr> = window_exprs
2645                .into_iter()
2646                .map(Self::transform_window_expr_properties)
2647                .collect();
2648
2649            plan = LogicalPlan::Window {
2650                input: Box::new(plan),
2651                window_exprs: transformed_window_exprs,
2652            };
2653        }
2654
2655        if let Some(order_by) = &return_clause.order_by {
2656            let alias_exprs: HashMap<String, Expr> = projections
2657                .iter()
2658                .filter_map(|(expr, alias)| {
2659                    alias.as_ref().map(|a| {
2660                        // ORDER BY is planned before the final RETURN projection.
2661                        // In aggregate contexts, aliases must resolve to the
2662                        // post-aggregate output columns, not raw aggregate calls.
2663                        let rewritten = if has_agg && !has_window_exprs {
2664                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
2665                                Expr::Variable(aggregate_column_name(expr))
2666                            } else if is_compound_aggregate(expr)
2667                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
2668                            {
2669                                replace_aggregates_with_columns(expr)
2670                            } else {
2671                                Expr::Variable(expr.to_string_repr())
2672                            }
2673                        } else {
2674                            expr.clone()
2675                        };
2676                        (a.clone(), rewritten)
2677                    })
2678                })
2679                .collect();
2680
2681            // Build an extended scope that includes RETURN aliases so ORDER BY
2682            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
2683            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
2684                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
2685                // Keep aliases and directly returned variables in scope.
2686                let mut scope = Vec::new();
2687                for (expr, alias) in &projections {
2688                    if let Some(a) = alias
2689                        && !is_var_in_scope(&scope, a)
2690                    {
2691                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
2692                    }
2693                    if let Expr::Variable(v) = expr
2694                        && !is_var_in_scope(&scope, v)
2695                    {
2696                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
2697                    }
2698                }
2699                scope
2700            } else {
2701                let mut scope = vars_in_scope.to_vec();
2702                for (expr, alias) in &projections {
2703                    if let Some(a) = alias
2704                        && !is_var_in_scope(&scope, a)
2705                    {
2706                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
2707                    } else if let Expr::Variable(v) = expr
2708                        && !is_var_in_scope(&scope, v)
2709                    {
2710                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
2711                    }
2712                }
2713                scope
2714            };
2715            // Validate ORDER BY expressions against the extended scope
2716            for item in order_by {
2717                // DISTINCT allows ORDER BY on the same projected expression
2718                // even when underlying variables are not otherwise visible.
2719                let matches_projected_expr = return_clause.distinct
2720                    && projections
2721                        .iter()
2722                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
2723                if !matches_projected_expr {
2724                    validate_expression_variables(&item.expr, &order_by_scope)?;
2725                    validate_expression(&item.expr, &order_by_scope)?;
2726                }
2727                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
2728                if has_aggregate_in_item && !has_agg {
2729                    return Err(anyhow!(
2730                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
2731                    ));
2732                }
2733                if has_agg && has_aggregate_in_item {
2734                    validate_with_order_by_aggregate_item(
2735                        &item.expr,
2736                        &projected_aggregate_reprs,
2737                        &projected_simple_reprs,
2738                        &projected_aliases,
2739                    )?;
2740                }
2741            }
2742            let rewritten_order_by: Vec<SortItem> = order_by
2743                .iter()
2744                .map(|item| SortItem {
2745                    expr: {
2746                        let mut rewritten =
2747                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
2748                        if has_agg && !has_window_exprs {
2749                            rewritten = replace_aggregates_with_columns(&rewritten);
2750                        }
2751                        rewritten
2752                    },
2753                    ascending: item.ascending,
2754                })
2755                .collect();
2756            plan = LogicalPlan::Sort {
2757                input: Box::new(plan),
2758                order_by: rewritten_order_by,
2759            };
2760        }
2761
2762        if return_clause.skip.is_some() || return_clause.limit.is_some() {
2763            let skip = return_clause
2764                .skip
2765                .as_ref()
2766                .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
2767                .transpose()?
2768                .flatten();
2769            let fetch = return_clause
2770                .limit
2771                .as_ref()
2772                .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
2773                .transpose()?
2774                .flatten();
2775
2776            plan = LogicalPlan::Limit {
2777                input: Box::new(plan),
2778                skip,
2779                fetch,
2780            };
2781        }
2782
2783        if !projections.is_empty() {
2784            // If we created an Aggregate or Window node, we need to adjust the final projections
2785            // to reference aggregate/window function results as columns instead of re-evaluating them
2786            let final_projections = if has_agg || has_window_exprs {
2787                projections
2788                    .into_iter()
2789                    .map(|(expr, alias)| {
2790                        // Check if this expression is an aggregate function
2791                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
2792                        {
2793                            // Bare aggregate — replace with column reference
2794                            let col_name = aggregate_column_name(&expr);
2795                            (Expr::Variable(col_name), alias)
2796                        } else if !has_window_exprs
2797                            && (is_compound_aggregate(&expr)
2798                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
2799                        {
2800                            // Compound aggregate — replace inner aggregates with
2801                            // column references, keep outer expression for Project
2802                            (replace_aggregates_with_columns(&expr), alias)
2803                        }
2804                        // For grouped RETURN projections, reference the pre-computed
2805                        // group-by output column instead of re-evaluating the expression
2806                        // against the aggregate schema (which no longer has original vars).
2807                        else if has_agg
2808                            && !has_window_exprs
2809                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
2810                        {
2811                            (Expr::Variable(expr.to_string_repr()), alias)
2812                        }
2813                        // Check if this expression is a window function
2814                        else if let Expr::FunctionCall {
2815                            window_spec: Some(_),
2816                            ..
2817                        } = &expr
2818                        {
2819                            // Replace window function with a column reference to its result
2820                            // The column name in the Window output is the full expression string
2821                            let window_col_name = expr.to_string_repr();
2822                            // Keep the original alias for the final output
2823                            (Expr::Variable(window_col_name), alias)
2824                        } else {
2825                            (expr, alias)
2826                        }
2827                    })
2828                    .collect()
2829            } else {
2830                projections
2831            };
2832
2833            plan = LogicalPlan::Project {
2834                input: Box::new(plan),
2835                projections: final_projections,
2836            };
2837        }
2838
2839        if return_clause.distinct {
2840            plan = LogicalPlan::Distinct {
2841                input: Box::new(plan),
2842            };
2843        }
2844
2845        Ok(plan)
2846    }
2847
2848    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
2849        let typed_vars: Vec<VariableInfo> = initial_vars
2850            .into_iter()
2851            .map(|name| VariableInfo::new(name, VariableType::Imported))
2852            .collect();
2853        self.plan_single_typed(query, typed_vars)
2854    }
2855
2856    /// Rewrite a query then plan it, preserving typed variable scope when possible.
2857    ///
2858    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
2859    /// variable info through and avoid false type-conflict errors in subqueries.
2860    /// For unions and other compound queries, falls back to `plan_with_scope`.
2861    fn rewrite_and_plan_typed(
2862        &self,
2863        query: Query,
2864        typed_vars: &[VariableInfo],
2865    ) -> Result<LogicalPlan> {
2866        let rewritten = crate::query::rewrite::rewrite_query(query)?;
2867        match rewritten {
2868            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
2869            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
2870        }
2871    }
2872
2873    fn plan_single_typed(
2874        &self,
2875        query: Statement,
2876        initial_vars: Vec<VariableInfo>,
2877    ) -> Result<LogicalPlan> {
2878        let mut plan = LogicalPlan::Empty;
2879
2880        if !initial_vars.is_empty() {
2881            // Project bound variables from outer scope as parameters.
2882            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
2883            // Use Parameter expressions to read from params, not Variable which would read from input row.
2884            let projections = initial_vars
2885                .iter()
2886                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
2887                .collect();
2888            plan = LogicalPlan::Project {
2889                input: Box::new(plan),
2890                projections,
2891            };
2892        }
2893
2894        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
2895        // Track variables introduced by CREATE clauses so we can distinguish
2896        // MATCH-introduced variables (which cannot be re-created as bare nodes)
2897        // from CREATE-introduced variables (which can be referenced as bare nodes).
2898        let mut create_introduced_vars: HashSet<String> = HashSet::new();
2899        // Track variables targeted by DELETE so we can reject property/label
2900        // access on deleted entities in subsequent RETURN clauses.
2901        let mut deleted_vars: HashSet<String> = HashSet::new();
2902
2903        let clause_count = query.clauses.len();
2904        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
2905            match clause {
2906                Clause::Match(match_clause) => {
2907                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
2908                }
2909                Clause::Unwind(unwind) => {
2910                    plan = LogicalPlan::Unwind {
2911                        input: Box::new(plan),
2912                        expr: unwind.expr.clone(),
2913                        variable: unwind.variable.clone(),
2914                    };
2915                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
2916                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
2917                }
2918                Clause::Call(call_clause) => {
2919                    match &call_clause.kind {
2920                        CallKind::Procedure {
2921                            procedure,
2922                            arguments,
2923                        } => {
2924                            // Validate that procedure arguments don't contain aggregation functions
2925                            for arg in arguments {
2926                                if contains_aggregate_recursive(arg) {
2927                                    return Err(anyhow!(
2928                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
2929                                    ));
2930                                }
2931                            }
2932
2933                            let has_yield_star = call_clause.yield_items.len() == 1
2934                                && call_clause.yield_items[0].name == "*"
2935                                && call_clause.yield_items[0].alias.is_none();
2936                            if has_yield_star && clause_idx + 1 < clause_count {
2937                                return Err(anyhow!(
2938                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
2939                                ));
2940                            }
2941
2942                            // Validate for duplicate yield names (VariableAlreadyBound)
2943                            let mut yield_names = Vec::new();
2944                            for item in &call_clause.yield_items {
2945                                if item.name == "*" {
2946                                    continue;
2947                                }
2948                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
2949                                if yield_names.contains(output_name) {
2950                                    return Err(anyhow!(
2951                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
2952                                        output_name
2953                                    ));
2954                                }
2955                                // Check against existing scope (in-query CALL must not shadow)
2956                                if clause_idx > 0
2957                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
2958                                {
2959                                    return Err(anyhow!(
2960                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
2961                                        output_name
2962                                    ));
2963                                }
2964                                yield_names.push(output_name.clone());
2965                            }
2966
2967                            let mut yields = Vec::new();
2968                            for item in &call_clause.yield_items {
2969                                if item.name == "*" {
2970                                    continue;
2971                                }
2972                                yields.push((item.name.clone(), item.alias.clone()));
2973                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
2974                                // Use Imported because procedure return types are unknown
2975                                // at plan time (could be nodes, edges, or scalars)
2976                                add_var_to_scope(
2977                                    &mut vars_in_scope,
2978                                    var_name,
2979                                    VariableType::Imported,
2980                                )?;
2981                            }
2982                            let proc_plan = LogicalPlan::ProcedureCall {
2983                                procedure_name: procedure.clone(),
2984                                arguments: arguments.clone(),
2985                                yield_items: yields.clone(),
2986                            };
2987
2988                            if matches!(plan, LogicalPlan::Empty) {
2989                                // Standalone CALL (first clause) — use directly
2990                                plan = proc_plan;
2991                            } else if yields.is_empty() {
2992                                // In-query CALL with no YIELD (void procedure):
2993                                // preserve the input rows unchanged
2994                            } else {
2995                                // In-query CALL with YIELD: cross-join input × procedure output
2996                                plan = LogicalPlan::Apply {
2997                                    input: Box::new(plan),
2998                                    subquery: Box::new(proc_plan),
2999                                    input_filter: None,
3000                                };
3001                            }
3002                        }
3003                        CallKind::Subquery(query) => {
3004                            let subquery_plan =
3005                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
3006
3007                            // Extract variables from subquery RETURN clause
3008                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
3009
3010                            // Add new variables to scope (as Scalar since they come from subquery projection)
3011                            for var in subquery_vars {
3012                                if !is_var_in_scope(&vars_in_scope, &var) {
3013                                    add_var_to_scope(
3014                                        &mut vars_in_scope,
3015                                        &var,
3016                                        VariableType::Scalar,
3017                                    )?;
3018                                }
3019                            }
3020
3021                            plan = LogicalPlan::SubqueryCall {
3022                                input: Box::new(plan),
3023                                subquery: Box::new(subquery_plan),
3024                            };
3025                        }
3026                    }
3027                }
3028                Clause::Merge(merge_clause) => {
3029                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
3030
3031                    plan = LogicalPlan::Merge {
3032                        input: Box::new(plan),
3033                        pattern: merge_clause.pattern.clone(),
3034                        on_match: Some(SetClause {
3035                            items: merge_clause.on_match.clone(),
3036                        }),
3037                        on_create: Some(SetClause {
3038                            items: merge_clause.on_create.clone(),
3039                        }),
3040                    };
3041
3042                    for path in &merge_clause.pattern.paths {
3043                        if let Some(path_var) = &path.variable
3044                            && !path_var.is_empty()
3045                            && !is_var_in_scope(&vars_in_scope, path_var)
3046                        {
3047                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3048                        }
3049                        for element in &path.elements {
3050                            if let PatternElement::Node(n) = element {
3051                                if let Some(v) = &n.variable
3052                                    && !is_var_in_scope(&vars_in_scope, v)
3053                                {
3054                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3055                                }
3056                            } else if let PatternElement::Relationship(r) = element
3057                                && let Some(v) = &r.variable
3058                                && !is_var_in_scope(&vars_in_scope, v)
3059                            {
3060                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3061                            }
3062                        }
3063                    }
3064                }
3065                Clause::Create(create_clause) => {
3066                    // Validate CREATE patterns:
3067                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3068                    // - Bare nodes (v) are "references" if bound, "creations" if not
3069                    // - Relationships are always creations - can't rebind
3070                    // - Within CREATE, each new variable can only be defined once
3071                    // - Variables used in properties must be defined
3072                    let mut create_vars: Vec<&str> = Vec::new();
3073                    for path in &create_clause.pattern.paths {
3074                        let is_standalone_node = path.elements.len() == 1;
3075                        for element in &path.elements {
3076                            match element {
3077                                PatternElement::Node(n) => {
3078                                    validate_property_variables(
3079                                        &n.properties,
3080                                        &vars_in_scope,
3081                                        &create_vars,
3082                                    )?;
3083
3084                                    if let Some(v) = n.variable.as_deref()
3085                                        && !v.is_empty()
3086                                    {
3087                                        // A node is a "creation" if it has labels or properties
3088                                        let is_creation =
3089                                            !n.labels.is_empty() || n.properties.is_some();
3090
3091                                        if is_creation {
3092                                            check_not_already_bound(
3093                                                v,
3094                                                &vars_in_scope,
3095                                                &create_vars,
3096                                            )?;
3097                                            create_vars.push(v);
3098                                        } else if is_standalone_node
3099                                            && is_var_in_scope(&vars_in_scope, v)
3100                                            && !create_introduced_vars.contains(v)
3101                                        {
3102                                            // Standalone bare node referencing a variable from a
3103                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3104                                            // Bare nodes used as relationship endpoints
3105                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3106                                            return Err(anyhow!(
3107                                                "SyntaxError: VariableAlreadyBound - '{}'",
3108                                                v
3109                                            ));
3110                                        } else if !create_vars.contains(&v) {
3111                                            // New bare variable — register it
3112                                            create_vars.push(v);
3113                                        }
3114                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3115                                    }
3116                                }
3117                                PatternElement::Relationship(r) => {
3118                                    validate_property_variables(
3119                                        &r.properties,
3120                                        &vars_in_scope,
3121                                        &create_vars,
3122                                    )?;
3123
3124                                    if let Some(v) = r.variable.as_deref()
3125                                        && !v.is_empty()
3126                                    {
3127                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3128                                        create_vars.push(v);
3129                                    }
3130
3131                                    // Validate relationship constraints for CREATE
3132                                    if r.types.len() != 1 {
3133                                        return Err(anyhow!(
3134                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3135                                        ));
3136                                    }
3137                                    if r.direction == Direction::Both {
3138                                        return Err(anyhow!(
3139                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3140                                        ));
3141                                    }
3142                                    if r.range.is_some() {
3143                                        return Err(anyhow!(
3144                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3145                                        ));
3146                                    }
3147                                }
3148                                PatternElement::Parenthesized { .. } => {}
3149                            }
3150                        }
3151                    }
3152
3153                    // Batch consecutive CREATEs to avoid deep recursion
3154                    match &mut plan {
3155                        LogicalPlan::CreateBatch { patterns, .. } => {
3156                            // Append to existing batch
3157                            patterns.push(create_clause.pattern.clone());
3158                        }
3159                        LogicalPlan::Create { input, pattern } => {
3160                            // Convert single Create to CreateBatch with both patterns
3161                            let first_pattern = pattern.clone();
3162                            plan = LogicalPlan::CreateBatch {
3163                                input: input.clone(),
3164                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3165                            };
3166                        }
3167                        _ => {
3168                            // Start new Create (may become batch if more CREATEs follow)
3169                            plan = LogicalPlan::Create {
3170                                input: Box::new(plan),
3171                                pattern: create_clause.pattern.clone(),
3172                            };
3173                        }
3174                    }
3175                    // Add variables from created nodes and relationships to scope
3176                    for path in &create_clause.pattern.paths {
3177                        for element in &path.elements {
3178                            match element {
3179                                PatternElement::Node(n) => {
3180                                    if let Some(var) = &n.variable
3181                                        && !var.is_empty()
3182                                    {
3183                                        create_introduced_vars.insert(var.clone());
3184                                        add_var_to_scope(
3185                                            &mut vars_in_scope,
3186                                            var,
3187                                            VariableType::Node,
3188                                        )?;
3189                                    }
3190                                }
3191                                PatternElement::Relationship(r) => {
3192                                    if let Some(var) = &r.variable
3193                                        && !var.is_empty()
3194                                    {
3195                                        create_introduced_vars.insert(var.clone());
3196                                        add_var_to_scope(
3197                                            &mut vars_in_scope,
3198                                            var,
3199                                            VariableType::Edge,
3200                                        )?;
3201                                    }
3202                                }
3203                                PatternElement::Parenthesized { .. } => {
3204                                    // Skip for now - not commonly used in CREATE
3205                                }
3206                            }
3207                        }
3208                    }
3209                }
3210                Clause::Set(set_clause) => {
3211                    // Validate SET value expressions
3212                    for item in &set_clause.items {
3213                        match item {
3214                            SetItem::Property { value, .. }
3215                            | SetItem::Variable { value, .. }
3216                            | SetItem::VariablePlus { value, .. } => {
3217                                validate_expression_variables(value, &vars_in_scope)?;
3218                                validate_expression(value, &vars_in_scope)?;
3219                                if contains_pattern_predicate(value) {
3220                                    return Err(anyhow!(
3221                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3222                                    ));
3223                                }
3224                            }
3225                            SetItem::Labels { .. } => {}
3226                        }
3227                    }
3228                    plan = LogicalPlan::Set {
3229                        input: Box::new(plan),
3230                        items: set_clause.items.clone(),
3231                    };
3232                }
3233                Clause::Remove(remove_clause) => {
3234                    plan = LogicalPlan::Remove {
3235                        input: Box::new(plan),
3236                        items: remove_clause.items.clone(),
3237                    };
3238                }
3239                Clause::Delete(delete_clause) => {
3240                    // Validate DELETE targets
3241                    for item in &delete_clause.items {
3242                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3243                        if matches!(item, Expr::LabelCheck { .. }) {
3244                            return Err(anyhow!(
3245                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3246                            ));
3247                        }
3248                        let vars_used = collect_expr_variables(item);
3249                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3250                        if vars_used.is_empty() {
3251                            return Err(anyhow!(
3252                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3253                            ));
3254                        }
3255                        for var in &vars_used {
3256                            // Check if variable is defined
3257                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3258                                return Err(anyhow!(
3259                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3260                                    var
3261                                ));
3262                            }
3263                        }
3264                        // Strict type check only for simple variable references —
3265                        // complex expressions (property access, array index, etc.)
3266                        // may resolve to a node/edge at runtime even if the base
3267                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3268                        if let Expr::Variable(name) = item
3269                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3270                            && matches!(
3271                                info.var_type,
3272                                VariableType::Scalar | VariableType::ScalarLiteral
3273                            )
3274                        {
3275                            return Err(anyhow!(
3276                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3277                                name
3278                            ));
3279                        }
3280                    }
3281                    // Track deleted variables for later validation
3282                    for item in &delete_clause.items {
3283                        if let Expr::Variable(name) = item {
3284                            deleted_vars.insert(name.clone());
3285                        }
3286                    }
3287                    plan = LogicalPlan::Delete {
3288                        input: Box::new(plan),
3289                        items: delete_clause.items.clone(),
3290                        detach: delete_clause.detach,
3291                    };
3292                }
3293                Clause::With(with_clause) => {
3294                    let (new_plan, new_vars) =
3295                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3296                    plan = new_plan;
3297                    vars_in_scope = new_vars;
3298                }
3299                Clause::WithRecursive(with_recursive) => {
3300                    // Plan the recursive CTE
3301                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3302                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3303                    add_var_to_scope(
3304                        &mut vars_in_scope,
3305                        &with_recursive.name,
3306                        VariableType::Scalar,
3307                    )?;
3308                }
3309                Clause::Return(return_clause) => {
3310                    // Check for property/label access on deleted entities
3311                    if !deleted_vars.is_empty() {
3312                        for item in &return_clause.items {
3313                            if let ReturnItem::Expr { expr, .. } = item {
3314                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3315                            }
3316                        }
3317                    }
3318                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3319                } // All Clause variants are handled above - no catch-all needed
3320            }
3321        }
3322
3323        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3324        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3325        // If RETURN was used, the plan will have been wrapped in Project, so we only
3326        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3327        let plan = match &plan {
3328            LogicalPlan::Create { .. }
3329            | LogicalPlan::CreateBatch { .. }
3330            | LogicalPlan::Delete { .. }
3331            | LogicalPlan::Set { .. }
3332            | LogicalPlan::Remove { .. }
3333            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3334                input: Box::new(plan),
3335                skip: None,
3336                fetch: Some(0),
3337            },
3338            _ => plan,
3339        };
3340
3341        Ok(plan)
3342    }
3343
3344    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3345        match expr {
3346            Expr::Property(_, _) => {
3347                if !collected
3348                    .iter()
3349                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3350                {
3351                    collected.push(expr.clone());
3352                }
3353            }
3354            Expr::Variable(_) => {
3355                // Variables are already available, don't need to project them
3356            }
3357            Expr::BinaryOp { left, right, .. } => {
3358                Self::collect_properties_from_expr(left, collected);
3359                Self::collect_properties_from_expr(right, collected);
3360            }
3361            Expr::FunctionCall {
3362                args, window_spec, ..
3363            } => {
3364                for arg in args {
3365                    Self::collect_properties_from_expr(arg, collected);
3366                }
3367                if let Some(spec) = window_spec {
3368                    for partition_expr in &spec.partition_by {
3369                        Self::collect_properties_from_expr(partition_expr, collected);
3370                    }
3371                    for sort_item in &spec.order_by {
3372                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3373                    }
3374                }
3375            }
3376            Expr::List(items) => {
3377                for item in items {
3378                    Self::collect_properties_from_expr(item, collected);
3379                }
3380            }
3381            Expr::UnaryOp { expr: e, .. }
3382            | Expr::IsNull(e)
3383            | Expr::IsNotNull(e)
3384            | Expr::IsUnique(e) => {
3385                Self::collect_properties_from_expr(e, collected);
3386            }
3387            Expr::Case {
3388                expr,
3389                when_then,
3390                else_expr,
3391            } => {
3392                if let Some(e) = expr {
3393                    Self::collect_properties_from_expr(e, collected);
3394                }
3395                for (w, t) in when_then {
3396                    Self::collect_properties_from_expr(w, collected);
3397                    Self::collect_properties_from_expr(t, collected);
3398                }
3399                if let Some(e) = else_expr {
3400                    Self::collect_properties_from_expr(e, collected);
3401                }
3402            }
3403            Expr::In { expr, list } => {
3404                Self::collect_properties_from_expr(expr, collected);
3405                Self::collect_properties_from_expr(list, collected);
3406            }
3407            Expr::ArrayIndex { array, index } => {
3408                Self::collect_properties_from_expr(array, collected);
3409                Self::collect_properties_from_expr(index, collected);
3410            }
3411            Expr::ArraySlice { array, start, end } => {
3412                Self::collect_properties_from_expr(array, collected);
3413                if let Some(s) = start {
3414                    Self::collect_properties_from_expr(s, collected);
3415                }
3416                if let Some(e) = end {
3417                    Self::collect_properties_from_expr(e, collected);
3418                }
3419            }
3420            _ => {}
3421        }
3422    }
3423
3424    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3425        if let Expr::FunctionCall { window_spec, .. } = expr {
3426            // Collect any function with a window spec (OVER clause)
3427            if window_spec.is_some() {
3428                if !collected
3429                    .iter()
3430                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3431                {
3432                    collected.push(expr.clone());
3433                }
3434                return;
3435            }
3436        }
3437
3438        match expr {
3439            Expr::BinaryOp { left, right, .. } => {
3440                Self::collect_window_functions(left, collected);
3441                Self::collect_window_functions(right, collected);
3442            }
3443            Expr::FunctionCall { args, .. } => {
3444                for arg in args {
3445                    Self::collect_window_functions(arg, collected);
3446                }
3447            }
3448            Expr::List(items) => {
3449                for i in items {
3450                    Self::collect_window_functions(i, collected);
3451                }
3452            }
3453            Expr::Map(items) => {
3454                for (_, i) in items {
3455                    Self::collect_window_functions(i, collected);
3456                }
3457            }
3458            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
3459                Self::collect_window_functions(e, collected);
3460            }
3461            Expr::Case {
3462                expr,
3463                when_then,
3464                else_expr,
3465            } => {
3466                if let Some(e) = expr {
3467                    Self::collect_window_functions(e, collected);
3468                }
3469                for (w, t) in when_then {
3470                    Self::collect_window_functions(w, collected);
3471                    Self::collect_window_functions(t, collected);
3472                }
3473                if let Some(e) = else_expr {
3474                    Self::collect_window_functions(e, collected);
3475                }
3476            }
3477            Expr::Reduce {
3478                init, list, expr, ..
3479            } => {
3480                Self::collect_window_functions(init, collected);
3481                Self::collect_window_functions(list, collected);
3482                Self::collect_window_functions(expr, collected);
3483            }
3484            Expr::Quantifier {
3485                list, predicate, ..
3486            } => {
3487                Self::collect_window_functions(list, collected);
3488                Self::collect_window_functions(predicate, collected);
3489            }
3490            Expr::In { expr, list } => {
3491                Self::collect_window_functions(expr, collected);
3492                Self::collect_window_functions(list, collected);
3493            }
3494            Expr::ArrayIndex { array, index } => {
3495                Self::collect_window_functions(array, collected);
3496                Self::collect_window_functions(index, collected);
3497            }
3498            Expr::ArraySlice { array, start, end } => {
3499                Self::collect_window_functions(array, collected);
3500                if let Some(s) = start {
3501                    Self::collect_window_functions(s, collected);
3502                }
3503                if let Some(e) = end {
3504                    Self::collect_window_functions(e, collected);
3505                }
3506            }
3507            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
3508            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
3509            _ => {}
3510        }
3511    }
3512
3513    /// Transform property expressions in manual window functions to use qualified variable names.
3514    ///
3515    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
3516    /// so the executor can look up values directly from the row HashMap after the
3517    /// intermediate projection has materialized these properties with qualified names.
3518    ///
3519    /// Transforms ALL window functions (both manual and aggregate).
3520    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
3521    fn transform_window_expr_properties(expr: Expr) -> Expr {
3522        let Expr::FunctionCall {
3523            name,
3524            args,
3525            window_spec: Some(spec),
3526            distinct,
3527        } = expr
3528        else {
3529            return expr;
3530        };
3531
3532        // Transform arguments for ALL window functions
3533        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
3534        let transformed_args = args
3535            .into_iter()
3536            .map(Self::transform_property_to_variable)
3537            .collect();
3538
3539        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
3540        let transformed_partition_by = spec
3541            .partition_by
3542            .into_iter()
3543            .map(Self::transform_property_to_variable)
3544            .collect();
3545
3546        let transformed_order_by = spec
3547            .order_by
3548            .into_iter()
3549            .map(|item| SortItem {
3550                expr: Self::transform_property_to_variable(item.expr),
3551                ascending: item.ascending,
3552            })
3553            .collect();
3554
3555        Expr::FunctionCall {
3556            name,
3557            args: transformed_args,
3558            window_spec: Some(WindowSpec {
3559                partition_by: transformed_partition_by,
3560                order_by: transformed_order_by,
3561            }),
3562            distinct,
3563        }
3564    }
3565
3566    /// Transform a property expression to a variable expression with qualified name.
3567    ///
3568    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
3569    fn transform_property_to_variable(expr: Expr) -> Expr {
3570        let Expr::Property(base, prop) = expr else {
3571            return expr;
3572        };
3573
3574        match *base {
3575            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
3576            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
3577        }
3578    }
3579
3580    /// Transform VALID_AT macro into function call
3581    ///
3582    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
3583    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
3584    fn transform_valid_at_to_function(expr: Expr) -> Expr {
3585        match expr {
3586            Expr::ValidAt {
3587                entity,
3588                timestamp,
3589                start_prop,
3590                end_prop,
3591            } => {
3592                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
3593                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
3594
3595                Expr::FunctionCall {
3596                    name: "uni.temporal.validAt".to_string(),
3597                    args: vec![
3598                        Self::transform_valid_at_to_function(*entity),
3599                        Expr::Literal(CypherLiteral::String(start)),
3600                        Expr::Literal(CypherLiteral::String(end)),
3601                        Self::transform_valid_at_to_function(*timestamp),
3602                    ],
3603                    distinct: false,
3604                    window_spec: None,
3605                }
3606            }
3607            // Recursively transform nested expressions
3608            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
3609                left: Box::new(Self::transform_valid_at_to_function(*left)),
3610                op,
3611                right: Box::new(Self::transform_valid_at_to_function(*right)),
3612            },
3613            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
3614                op,
3615                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
3616            },
3617            Expr::FunctionCall {
3618                name,
3619                args,
3620                distinct,
3621                window_spec,
3622            } => Expr::FunctionCall {
3623                name,
3624                args: args
3625                    .into_iter()
3626                    .map(Self::transform_valid_at_to_function)
3627                    .collect(),
3628                distinct,
3629                window_spec,
3630            },
3631            Expr::Property(base, prop) => {
3632                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
3633            }
3634            Expr::List(items) => Expr::List(
3635                items
3636                    .into_iter()
3637                    .map(Self::transform_valid_at_to_function)
3638                    .collect(),
3639            ),
3640            Expr::In { expr, list } => Expr::In {
3641                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
3642                list: Box::new(Self::transform_valid_at_to_function(*list)),
3643            },
3644            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
3645            Expr::IsNotNull(e) => {
3646                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
3647            }
3648            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
3649            // Other cases: return as-is
3650            other => other,
3651        }
3652    }
3653
3654    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
3655    fn plan_match_clause(
3656        &self,
3657        match_clause: &MatchClause,
3658        plan: LogicalPlan,
3659        vars_in_scope: &mut Vec<VariableInfo>,
3660    ) -> Result<LogicalPlan> {
3661        let mut plan = plan;
3662
3663        if match_clause.pattern.paths.is_empty() {
3664            return Err(anyhow!("Empty pattern"));
3665        }
3666
3667        // Track variables introduced by this OPTIONAL MATCH
3668        let vars_before_pattern = vars_in_scope.len();
3669
3670        for path in &match_clause.pattern.paths {
3671            if let Some(mode) = &path.shortest_path_mode {
3672                plan =
3673                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
3674            } else {
3675                plan = self.plan_path(
3676                    path,
3677                    plan,
3678                    vars_in_scope,
3679                    match_clause.optional,
3680                    vars_before_pattern,
3681                )?;
3682            }
3683        }
3684
3685        // Collect variables introduced by this OPTIONAL MATCH pattern
3686        let optional_vars: HashSet<String> = if match_clause.optional {
3687            vars_in_scope[vars_before_pattern..]
3688                .iter()
3689                .map(|v| v.name.clone())
3690                .collect()
3691        } else {
3692            HashSet::new()
3693        };
3694
3695        // Handle WHERE clause with vector_similarity and predicate pushdown
3696        if let Some(predicate) = &match_clause.where_clause {
3697            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
3698        }
3699
3700        Ok(plan)
3701    }
3702
3703    /// Plan a shortestPath pattern.
3704    fn plan_shortest_path(
3705        &self,
3706        path: &PathPattern,
3707        plan: LogicalPlan,
3708        vars_in_scope: &mut Vec<VariableInfo>,
3709        mode: &ShortestPathMode,
3710        _vars_before_pattern: usize,
3711    ) -> Result<LogicalPlan> {
3712        let mut plan = plan;
3713        let elements = &path.elements;
3714
3715        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
3716        if elements.len() < 3 || elements.len().is_multiple_of(2) {
3717            return Err(anyhow!(
3718                "shortestPath requires at least one relationship: (a)-[*]->(b)"
3719            ));
3720        }
3721
3722        let source_node = match &elements[0] {
3723            PatternElement::Node(n) => n,
3724            _ => return Err(anyhow!("ShortestPath must start with a node")),
3725        };
3726        let rel = match &elements[1] {
3727            PatternElement::Relationship(r) => r,
3728            _ => {
3729                return Err(anyhow!(
3730                    "ShortestPath middle element must be a relationship"
3731                ));
3732            }
3733        };
3734        let target_node = match &elements[2] {
3735            PatternElement::Node(n) => n,
3736            _ => return Err(anyhow!("ShortestPath must end with a node")),
3737        };
3738
3739        let source_var = source_node
3740            .variable
3741            .clone()
3742            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
3743        let target_var = target_node
3744            .variable
3745            .clone()
3746            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
3747        let path_var = path
3748            .variable
3749            .clone()
3750            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
3751
3752        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
3753        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
3754
3755        // Plan source node if not bound
3756        if !source_bound {
3757            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
3758        } else if let Some(prop_filter) =
3759            self.properties_to_expr(&source_var, &source_node.properties)
3760        {
3761            plan = LogicalPlan::Filter {
3762                input: Box::new(plan),
3763                predicate: prop_filter,
3764                optional_variables: HashSet::new(),
3765            };
3766        }
3767
3768        // Plan target node if not bound
3769        let target_label_id = if !target_bound {
3770            // Use first label for target_label_id
3771            let target_label_name = target_node
3772                .labels
3773                .first()
3774                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
3775            let target_label_meta = self
3776                .schema
3777                .get_label_case_insensitive(target_label_name)
3778                .ok_or_else(|| anyhow!("Label {} not found", target_label_name))?;
3779
3780            let target_scan = LogicalPlan::Scan {
3781                label_id: target_label_meta.id,
3782                labels: target_node.labels.clone(),
3783                variable: target_var.clone(),
3784                filter: self.properties_to_expr(&target_var, &target_node.properties),
3785                optional: false,
3786            };
3787
3788            plan = Self::join_with_plan(plan, target_scan);
3789            target_label_meta.id
3790        } else {
3791            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
3792            {
3793                plan = LogicalPlan::Filter {
3794                    input: Box::new(plan),
3795                    predicate: prop_filter,
3796                    optional_variables: HashSet::new(),
3797                };
3798            }
3799            0 // Wildcard for already-bound target
3800        };
3801
3802        // Add ShortestPath operator
3803        let edge_type_ids = if rel.types.is_empty() {
3804            // If no type specified, fetch all edge types (both schema and schemaless)
3805            self.schema.all_edge_type_ids()
3806        } else {
3807            let mut ids = Vec::new();
3808            for type_name in &rel.types {
3809                let edge_meta = self
3810                    .schema
3811                    .edge_types
3812                    .get(type_name)
3813                    .ok_or_else(|| anyhow!("Edge type {} not found", type_name))?;
3814                ids.push(edge_meta.id);
3815            }
3816            ids
3817        };
3818
3819        // Extract hop constraints from relationship pattern
3820        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
3821        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
3822
3823        let sp_plan = match mode {
3824            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
3825                input: Box::new(plan),
3826                edge_type_ids,
3827                direction: rel.direction.clone(),
3828                source_variable: source_var.clone(),
3829                target_variable: target_var.clone(),
3830                target_label_id,
3831                path_variable: path_var.clone(),
3832                min_hops,
3833                max_hops,
3834            },
3835            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
3836                input: Box::new(plan),
3837                edge_type_ids,
3838                direction: rel.direction.clone(),
3839                source_variable: source_var.clone(),
3840                target_variable: target_var.clone(),
3841                target_label_id,
3842                path_variable: path_var.clone(),
3843                min_hops,
3844                max_hops,
3845            },
3846        };
3847
3848        if !source_bound {
3849            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
3850        }
3851        if !target_bound {
3852            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
3853        }
3854        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
3855
3856        Ok(sp_plan)
3857    }
3858    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
3859    ///
3860    /// This is a public entry point for the Locy plan builder to reuse the
3861    /// existing pattern-planning logic for clause bodies.
3862    pub fn plan_pattern(
3863        &self,
3864        pattern: &Pattern,
3865        initial_vars: &[VariableInfo],
3866    ) -> Result<LogicalPlan> {
3867        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
3868        let vars_before_pattern = vars_in_scope.len();
3869        let mut plan = LogicalPlan::Empty;
3870        for path in &pattern.paths {
3871            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
3872        }
3873        Ok(plan)
3874    }
3875
3876    /// Plan a regular MATCH path (not shortestPath).
3877    fn plan_path(
3878        &self,
3879        path: &PathPattern,
3880        plan: LogicalPlan,
3881        vars_in_scope: &mut Vec<VariableInfo>,
3882        optional: bool,
3883        vars_before_pattern: usize,
3884    ) -> Result<LogicalPlan> {
3885        let mut plan = plan;
3886        let elements = &path.elements;
3887        let mut i = 0;
3888
3889        let path_variable = path.variable.clone();
3890
3891        // Check for VariableAlreadyBound: path variable already in scope
3892        if let Some(pv) = &path_variable
3893            && !pv.is_empty()
3894            && is_var_in_scope(vars_in_scope, pv)
3895        {
3896            return Err(anyhow!(
3897                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3898                pv
3899            ));
3900        }
3901
3902        // Check for VariableAlreadyBound: path variable conflicts with element variables
3903        if let Some(pv) = &path_variable
3904            && !pv.is_empty()
3905        {
3906            for element in elements {
3907                match element {
3908                    PatternElement::Node(n) => {
3909                        if let Some(v) = &n.variable
3910                            && v == pv
3911                        {
3912                            return Err(anyhow!(
3913                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3914                                pv
3915                            ));
3916                        }
3917                    }
3918                    PatternElement::Relationship(r) => {
3919                        if let Some(v) = &r.variable
3920                            && v == pv
3921                        {
3922                            return Err(anyhow!(
3923                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3924                                pv
3925                            ));
3926                        }
3927                    }
3928                    PatternElement::Parenthesized { .. } => {}
3929                }
3930            }
3931        }
3932
3933        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
3934        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
3935        let mut optional_pattern_vars: HashSet<String> = if optional {
3936            let mut vars = HashSet::new();
3937            for element in elements {
3938                match element {
3939                    PatternElement::Node(n) => {
3940                        if let Some(v) = &n.variable
3941                            && !v.is_empty()
3942                            && !is_var_in_scope(vars_in_scope, v)
3943                        {
3944                            vars.insert(v.clone());
3945                        }
3946                    }
3947                    PatternElement::Relationship(r) => {
3948                        if let Some(v) = &r.variable
3949                            && !v.is_empty()
3950                            && !is_var_in_scope(vars_in_scope, v)
3951                        {
3952                            vars.insert(v.clone());
3953                        }
3954                    }
3955                    PatternElement::Parenthesized { pattern, .. } => {
3956                        // Also check nested patterns
3957                        for nested_elem in &pattern.elements {
3958                            match nested_elem {
3959                                PatternElement::Node(n) => {
3960                                    if let Some(v) = &n.variable
3961                                        && !v.is_empty()
3962                                        && !is_var_in_scope(vars_in_scope, v)
3963                                    {
3964                                        vars.insert(v.clone());
3965                                    }
3966                                }
3967                                PatternElement::Relationship(r) => {
3968                                    if let Some(v) = &r.variable
3969                                        && !v.is_empty()
3970                                        && !is_var_in_scope(vars_in_scope, v)
3971                                    {
3972                                        vars.insert(v.clone());
3973                                    }
3974                                }
3975                                _ => {}
3976                            }
3977                        }
3978                    }
3979                }
3980            }
3981            // Include path variable if present
3982            if let Some(pv) = &path_variable
3983                && !pv.is_empty()
3984            {
3985                vars.insert(pv.clone());
3986            }
3987            vars
3988        } else {
3989            HashSet::new()
3990        };
3991
3992        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
3993        // These must participate in Trail mode (relationship uniqueness) enforcement
3994        // across ALL segments in this path, so that VLP segments like [*0..1] don't
3995        // traverse through edges already claimed by a bound relationship [r].
3996        let path_bound_edge_vars: HashSet<String> = {
3997            let mut bound = HashSet::new();
3998            for element in elements {
3999                if let PatternElement::Relationship(rel) = element
4000                    && let Some(ref var_name) = rel.variable
4001                    && !var_name.is_empty()
4002                    && vars_in_scope[..vars_before_pattern]
4003                        .iter()
4004                        .any(|v| v.name == *var_name)
4005                {
4006                    bound.insert(var_name.clone());
4007                }
4008            }
4009            bound
4010        };
4011
4012        // Track if any traverses were added (for zero-length path detection)
4013        let mut had_traverses = false;
4014        // Track the node variable for zero-length path binding
4015        let mut single_node_variable: Option<String> = None;
4016        // Collect node/edge variables for BindPath (fixed-length path binding)
4017        let mut path_node_vars: Vec<String> = Vec::new();
4018        let mut path_edge_vars: Vec<String> = Vec::new();
4019        // Track the last processed outer node variable for QPP source binding.
4020        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
4021        let mut last_outer_node_var: Option<String> = None;
4022
4023        // Multi-hop path variables are now supported - path is accumulated across hops
4024        while i < elements.len() {
4025            let element = &elements[i];
4026            match element {
4027                PatternElement::Node(n) => {
4028                    let mut variable = n.variable.clone().unwrap_or_default();
4029                    if variable.is_empty() {
4030                        variable = self.next_anon_var();
4031                    }
4032                    // Track first node variable for zero-length path
4033                    if single_node_variable.is_none() {
4034                        single_node_variable = Some(variable.clone());
4035                    }
4036                    let is_bound =
4037                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4038                    if optional && !is_bound {
4039                        optional_pattern_vars.insert(variable.clone());
4040                    }
4041
4042                    if is_bound {
4043                        // Check for type conflict - can't use an Edge/Path as a Node
4044                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4045                            && !info.var_type.is_compatible_with(VariableType::Node)
4046                        {
4047                            return Err(anyhow!(
4048                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4049                                variable,
4050                                info.var_type
4051                            ));
4052                        }
4053                        if let Some(node_filter) =
4054                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4055                        {
4056                            plan = LogicalPlan::Filter {
4057                                input: Box::new(plan),
4058                                predicate: node_filter,
4059                                optional_variables: HashSet::new(),
4060                            };
4061                        }
4062                    } else {
4063                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4064                        if !variable.is_empty() {
4065                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4066                        }
4067                    }
4068
4069                    // Track source node for BindPath
4070                    if path_variable.is_some() && path_node_vars.is_empty() {
4071                        path_node_vars.push(variable.clone());
4072                    }
4073
4074                    // Look ahead for relationships
4075                    let mut current_source_var = variable;
4076                    last_outer_node_var = Some(current_source_var.clone());
4077                    i += 1;
4078                    while i < elements.len() {
4079                        if let PatternElement::Relationship(r) = &elements[i] {
4080                            if i + 1 < elements.len() {
4081                                let target_node_part = &elements[i + 1];
4082                                if let PatternElement::Node(n_target) = target_node_part {
4083                                    // For VLP traversals, pass path_variable through
4084                                    // For fixed-length, we use BindPath instead
4085                                    let is_vlp = r.range.is_some();
4086                                    let traverse_path_var =
4087                                        if is_vlp { path_variable.clone() } else { None };
4088
4089                                    // If we're about to start a VLP segment and there are
4090                                    // collected fixed-hop path vars, create an intermediate
4091                                    // BindPath for the fixed prefix first. The VLP will then
4092                                    // extend this existing path.
4093                                    if is_vlp
4094                                        && let Some(pv) = path_variable.as_ref()
4095                                        && !path_node_vars.is_empty()
4096                                    {
4097                                        plan = LogicalPlan::BindPath {
4098                                            input: Box::new(plan),
4099                                            node_variables: std::mem::take(&mut path_node_vars),
4100                                            edge_variables: std::mem::take(&mut path_edge_vars),
4101                                            path_variable: pv.clone(),
4102                                        };
4103                                        if !is_var_in_scope(vars_in_scope, pv) {
4104                                            add_var_to_scope(
4105                                                vars_in_scope,
4106                                                pv,
4107                                                VariableType::Path,
4108                                            )?;
4109                                        }
4110                                    }
4111
4112                                    // Plan the traverse from the current source node
4113                                    let target_was_bound =
4114                                        n_target.variable.as_ref().is_some_and(|v| {
4115                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4116                                        });
4117                                    let (new_plan, target_var, effective_target) = self
4118                                        .plan_traverse_with_source(
4119                                            plan,
4120                                            vars_in_scope,
4121                                            TraverseParams {
4122                                                rel: r,
4123                                                target_node: n_target,
4124                                                optional,
4125                                                path_variable: traverse_path_var,
4126                                                optional_pattern_vars: optional_pattern_vars
4127                                                    .clone(),
4128                                            },
4129                                            &current_source_var,
4130                                            vars_before_pattern,
4131                                            &path_bound_edge_vars,
4132                                        )?;
4133                                    plan = new_plan;
4134                                    if optional && !target_was_bound {
4135                                        optional_pattern_vars.insert(target_var.clone());
4136                                    }
4137
4138                                    // Track edge/target node for BindPath
4139                                    if path_variable.is_some() && !is_vlp {
4140                                        // Use the edge variable if given, otherwise use
4141                                        // the internal tracking column pattern.
4142                                        // Use effective_target (which may be __rebound_x
4143                                        // for bound-target traversals) to match the actual
4144                                        // column name produced by GraphTraverseExec.
4145                                        if let Some(ev) = &r.variable {
4146                                            path_edge_vars.push(ev.clone());
4147                                        } else {
4148                                            path_edge_vars
4149                                                .push(format!("__eid_to_{}", effective_target));
4150                                        }
4151                                        path_node_vars.push(target_var.clone());
4152                                    }
4153
4154                                    current_source_var = target_var;
4155                                    last_outer_node_var = Some(current_source_var.clone());
4156                                    had_traverses = true;
4157                                    i += 2;
4158                                } else {
4159                                    return Err(anyhow!("Relationship must be followed by a node"));
4160                                }
4161                            } else {
4162                                return Err(anyhow!("Relationship cannot be the last element"));
4163                            }
4164                        } else {
4165                            break;
4166                        }
4167                    }
4168                }
4169                PatternElement::Relationship(_) => {
4170                    return Err(anyhow!("Pattern must start with a node"));
4171                }
4172                PatternElement::Parenthesized { pattern, range } => {
4173                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4174                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4175                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4176                        return Err(anyhow!(
4177                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4178                        ));
4179                    }
4180
4181                    let source_node = match &pattern.elements[0] {
4182                        PatternElement::Node(n) => n,
4183                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4184                    };
4185
4186                    // Extract all relationship-node pairs (QPP steps)
4187                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4188                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4189                        let rel = match &pattern.elements[pair_idx] {
4190                            PatternElement::Relationship(r) => r,
4191                            _ => {
4192                                return Err(anyhow!(
4193                                    "Quantified pattern element at position {} must be a relationship",
4194                                    pair_idx
4195                                ));
4196                            }
4197                        };
4198                        let node = match &pattern.elements[pair_idx + 1] {
4199                            PatternElement::Node(n) => n,
4200                            _ => {
4201                                return Err(anyhow!(
4202                                    "Quantified pattern element at position {} must be a node",
4203                                    pair_idx + 1
4204                                ));
4205                            }
4206                        };
4207                        // Reject nested quantifiers
4208                        if rel.range.is_some() {
4209                            return Err(anyhow!(
4210                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4211                            ));
4212                        }
4213                        qpp_rels.push((rel, node));
4214                    }
4215
4216                    // Check if there's an outer target node after the Parenthesized element.
4217                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4218                    // target that should receive the traversal result.
4219                    let inner_target_node = qpp_rels.last().unwrap().1;
4220                    let outer_target_node = if i + 1 < elements.len() {
4221                        match &elements[i + 1] {
4222                            PatternElement::Node(n) => Some(n),
4223                            _ => None,
4224                        }
4225                    } else {
4226                        None
4227                    };
4228                    // Use the outer target for variable binding and filters; inner target
4229                    // labels are used for state constraints within the NFA.
4230                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4231
4232                    // For simple 3-element single-hop QPP without intermediate label constraints,
4233                    // fall back to existing VLP behavior (copy range to relationship).
4234                    let use_simple_vlp = qpp_rels.len() == 1
4235                        && inner_target_node
4236                            .labels
4237                            .first()
4238                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4239                            .is_none();
4240
4241                    // Plan source node.
4242                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4243                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4244                    // node variable, use it; otherwise fall back to the inner source.
4245                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4246                        // The preceding outer node is already bound and in scope
4247                        // Apply any property filters from the inner source node
4248                        if let Some(prop_filter) =
4249                            self.properties_to_expr(outer_src, &source_node.properties)
4250                        {
4251                            plan = LogicalPlan::Filter {
4252                                input: Box::new(plan),
4253                                predicate: prop_filter,
4254                                optional_variables: HashSet::new(),
4255                            };
4256                        }
4257                        outer_src.clone()
4258                    } else {
4259                        let sv = source_node
4260                            .variable
4261                            .clone()
4262                            .filter(|v| !v.is_empty())
4263                            .unwrap_or_else(|| self.next_anon_var());
4264
4265                        if is_var_in_scope(vars_in_scope, &sv) {
4266                            // Source is already bound, apply property filter if needed
4267                            if let Some(prop_filter) =
4268                                self.properties_to_expr(&sv, &source_node.properties)
4269                            {
4270                                plan = LogicalPlan::Filter {
4271                                    input: Box::new(plan),
4272                                    predicate: prop_filter,
4273                                    optional_variables: HashSet::new(),
4274                                };
4275                            }
4276                        } else {
4277                            // Source is unbound, scan it
4278                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4279                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4280                            if optional {
4281                                optional_pattern_vars.insert(sv.clone());
4282                            }
4283                        }
4284                        sv
4285                    };
4286
4287                    if use_simple_vlp {
4288                        // Simple single-hop QPP: apply range to relationship and use VLP path
4289                        let mut relationship = qpp_rels[0].0.clone();
4290                        relationship.range = range.clone();
4291
4292                        let target_was_bound = target_node
4293                            .variable
4294                            .as_ref()
4295                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4296                        let (new_plan, target_var, _effective_target) = self
4297                            .plan_traverse_with_source(
4298                                plan,
4299                                vars_in_scope,
4300                                TraverseParams {
4301                                    rel: &relationship,
4302                                    target_node,
4303                                    optional,
4304                                    path_variable: path_variable.clone(),
4305                                    optional_pattern_vars: optional_pattern_vars.clone(),
4306                                },
4307                                &source_variable,
4308                                vars_before_pattern,
4309                                &path_bound_edge_vars,
4310                            )?;
4311                        plan = new_plan;
4312                        if optional && !target_was_bound {
4313                            optional_pattern_vars.insert(target_var);
4314                        }
4315                    } else {
4316                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4317                        let mut qpp_step_infos = Vec::new();
4318                        let mut all_edge_type_ids = Vec::new();
4319
4320                        for (rel, node) in &qpp_rels {
4321                            let mut step_edge_type_ids = Vec::new();
4322                            if rel.types.is_empty() {
4323                                step_edge_type_ids = self.schema.all_edge_type_ids();
4324                            } else {
4325                                for type_name in &rel.types {
4326                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4327                                        step_edge_type_ids.push(edge_meta.id);
4328                                    }
4329                                }
4330                            }
4331                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4332
4333                            let target_label = node.labels.first().and_then(|l| {
4334                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4335                            });
4336
4337                            qpp_step_infos.push(QppStepInfo {
4338                                edge_type_ids: step_edge_type_ids,
4339                                direction: rel.direction.clone(),
4340                                target_label,
4341                            });
4342                        }
4343
4344                        // Deduplicate edge type IDs for adjacency warming
4345                        all_edge_type_ids.sort_unstable();
4346                        all_edge_type_ids.dedup();
4347
4348                        // Compute iteration bounds from range
4349                        let hops_per_iter = qpp_step_infos.len();
4350                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4351                        let (min_iter, max_iter) = if let Some(range) = range {
4352                            let min = range.min.unwrap_or(1) as usize;
4353                            let max = range
4354                                .max
4355                                .map(|m| m as usize)
4356                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4357                            (min, max)
4358                        } else {
4359                            (1, 1)
4360                        };
4361                        let min_hops = min_iter * hops_per_iter;
4362                        let max_hops = max_iter * hops_per_iter;
4363
4364                        // Target variable from the last node in the QPP sub-pattern
4365                        let target_variable = target_node
4366                            .variable
4367                            .clone()
4368                            .filter(|v| !v.is_empty())
4369                            .unwrap_or_else(|| self.next_anon_var());
4370
4371                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
4372
4373                        // Determine target label for the final node
4374                        let target_label_meta = target_node
4375                            .labels
4376                            .first()
4377                            .and_then(|l| self.schema.get_label_case_insensitive(l));
4378
4379                        // Collect scope match variables
4380                        let mut scope_match_variables: HashSet<String> = vars_in_scope
4381                            [vars_before_pattern..]
4382                            .iter()
4383                            .map(|v| v.name.clone())
4384                            .collect();
4385                        scope_match_variables.insert(target_variable.clone());
4386
4387                        // Handle bound target: use rebound variable for traverse
4388                        let rebound_target_var = if target_is_bound {
4389                            Some(target_variable.clone())
4390                        } else {
4391                            None
4392                        };
4393                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4394                            format!("__rebound_{}", bv)
4395                        } else {
4396                            target_variable.clone()
4397                        };
4398
4399                        plan = LogicalPlan::Traverse {
4400                            input: Box::new(plan),
4401                            edge_type_ids: all_edge_type_ids,
4402                            direction: qpp_rels[0].0.direction.clone(),
4403                            source_variable: source_variable.to_string(),
4404                            target_variable: effective_target_var.clone(),
4405                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4406                            step_variable: None, // QPP doesn't expose intermediate edges
4407                            min_hops,
4408                            max_hops,
4409                            optional,
4410                            target_filter: self.node_filter_expr(
4411                                &target_variable,
4412                                &target_node.labels,
4413                                &target_node.properties,
4414                            ),
4415                            path_variable: path_variable.clone(),
4416                            edge_properties: HashSet::new(),
4417                            is_variable_length: true,
4418                            optional_pattern_vars: optional_pattern_vars.clone(),
4419                            scope_match_variables,
4420                            edge_filter_expr: None,
4421                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4422                            qpp_steps: Some(qpp_step_infos),
4423                        };
4424
4425                        // Handle bound target: filter rebound results against original variable
4426                        if let Some(ref btv) = rebound_target_var {
4427                            // Filter: __rebound_x._vid = x._vid
4428                            let filter_pred = Expr::BinaryOp {
4429                                left: Box::new(Expr::Property(
4430                                    Box::new(Expr::Variable(effective_target_var.clone())),
4431                                    "_vid".to_string(),
4432                                )),
4433                                op: BinaryOp::Eq,
4434                                right: Box::new(Expr::Property(
4435                                    Box::new(Expr::Variable(btv.clone())),
4436                                    "_vid".to_string(),
4437                                )),
4438                            };
4439                            plan = LogicalPlan::Filter {
4440                                input: Box::new(plan),
4441                                predicate: filter_pred,
4442                                optional_variables: if optional {
4443                                    optional_pattern_vars.clone()
4444                                } else {
4445                                    HashSet::new()
4446                                },
4447                            };
4448                        }
4449
4450                        // Add target variable to scope
4451                        if !target_is_bound {
4452                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4453                        }
4454
4455                        // Add path variable to scope
4456                        if let Some(ref pv) = path_variable
4457                            && !pv.is_empty()
4458                            && !is_var_in_scope(vars_in_scope, pv)
4459                        {
4460                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4461                        }
4462                    }
4463                    had_traverses = true;
4464
4465                    // Skip the outer target node if we consumed it
4466                    if outer_target_node.is_some() {
4467                        i += 2; // skip both Parenthesized and the following Node
4468                    } else {
4469                        i += 1;
4470                    }
4471                }
4472            }
4473        }
4474
4475        // If this is a single-node pattern with a path variable, bind the zero-length path
4476        // E.g., `p = (a)` should create a Path with one node and zero edges
4477        if let Some(ref path_var) = path_variable
4478            && !path_var.is_empty()
4479            && !had_traverses
4480            && let Some(node_var) = single_node_variable
4481        {
4482            plan = LogicalPlan::BindZeroLengthPath {
4483                input: Box::new(plan),
4484                node_variable: node_var,
4485                path_variable: path_var.clone(),
4486            };
4487            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
4488        }
4489
4490        // Bind fixed-length path from collected node/edge variables
4491        if let Some(ref path_var) = path_variable
4492            && !path_var.is_empty()
4493            && had_traverses
4494            && !path_node_vars.is_empty()
4495            && !is_var_in_scope(vars_in_scope, path_var)
4496        {
4497            plan = LogicalPlan::BindPath {
4498                input: Box::new(plan),
4499                node_variables: path_node_vars,
4500                edge_variables: path_edge_vars,
4501                path_variable: path_var.clone(),
4502            };
4503            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
4504        }
4505
4506        Ok(plan)
4507    }
4508
4509    /// Plan a traverse with an explicit source variable name.
4510    ///
4511    /// Returns `(plan, target_variable, effective_target_variable)` where:
4512    /// - `target_variable` is the semantic variable name for downstream scope
4513    /// - `effective_target_variable` is the actual column-name prefix used by
4514    ///   the traverse (may be `__rebound_x` for bound-target patterns)
4515    fn plan_traverse_with_source(
4516        &self,
4517        plan: LogicalPlan,
4518        vars_in_scope: &mut Vec<VariableInfo>,
4519        params: TraverseParams<'_>,
4520        source_variable: &str,
4521        vars_before_pattern: usize,
4522        path_bound_edge_vars: &HashSet<String>,
4523    ) -> Result<(LogicalPlan, String, String)> {
4524        // Check for parameter used as relationship predicate
4525        if let Some(Expr::Parameter(_)) = &params.rel.properties {
4526            return Err(anyhow!(
4527                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
4528            ));
4529        }
4530
4531        let mut edge_type_ids = Vec::new();
4532        let mut dst_labels = Vec::new();
4533        let mut unknown_types = Vec::new();
4534
4535        if params.rel.types.is_empty() {
4536            // All types - include both schema and schemaless edge types
4537            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
4538            edge_type_ids = self.schema.all_edge_type_ids();
4539            for meta in self.schema.edge_types.values() {
4540                dst_labels.extend(meta.dst_labels.iter().cloned());
4541            }
4542        } else {
4543            for type_name in &params.rel.types {
4544                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4545                    // Known type - use standard Traverse with type_id
4546                    edge_type_ids.push(edge_meta.id);
4547                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
4548                } else {
4549                    // Unknown type - will use TraverseMainByType
4550                    unknown_types.push(type_name.clone());
4551                }
4552            }
4553        }
4554
4555        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
4556        edge_type_ids.sort_unstable();
4557        edge_type_ids.dedup();
4558        unknown_types.sort_unstable();
4559        unknown_types.dedup();
4560
4561        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
4562        if target_variable.is_empty() {
4563            target_variable = self.next_anon_var();
4564        }
4565        let target_is_bound =
4566            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
4567
4568        // Check for VariableTypeConflict: relationship variable used as node
4569        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
4570        if let Some(rel_var) = &params.rel.variable
4571            && !rel_var.is_empty()
4572            && rel_var == &target_variable
4573        {
4574            return Err(anyhow!(
4575                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
4576                rel_var
4577            ));
4578        }
4579
4580        // Check for VariableTypeConflict/RelationshipUniquenessViolation
4581        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
4582        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
4583        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
4584        let mut bound_edge_var: Option<String> = None;
4585        let mut bound_edge_list_var: Option<String> = None;
4586        if let Some(rel_var) = &params.rel.variable
4587            && !rel_var.is_empty()
4588            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
4589        {
4590            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
4591                .iter()
4592                .any(|v| v.name == *rel_var);
4593
4594            if info.var_type == VariableType::Edge {
4595                // Check if this edge variable comes from a previous clause (before this MATCH)
4596                if is_from_previous_clause {
4597                    // Edge variable bound from previous clause - this is allowed
4598                    // We'll filter the traversal to match this specific edge
4599                    bound_edge_var = Some(rel_var.clone());
4600                } else {
4601                    // Same relationship variable used twice in the same MATCH clause
4602                    return Err(anyhow!(
4603                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
4604                        rel_var
4605                    ));
4606                }
4607            } else if params.rel.range.is_some()
4608                && is_from_previous_clause
4609                && matches!(
4610                    info.var_type,
4611                    VariableType::Scalar | VariableType::ScalarLiteral
4612                )
4613            {
4614                // Allow VLP rebound against a previously bound relationship list
4615                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
4616                bound_edge_list_var = Some(rel_var.clone());
4617            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
4618                return Err(anyhow!(
4619                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
4620                    rel_var,
4621                    info.var_type
4622                ));
4623            }
4624        }
4625
4626        // Check for VariableTypeConflict: target node variable already bound as non-Node
4627        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
4628        if target_is_bound
4629            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4630            && !info.var_type.is_compatible_with(VariableType::Node)
4631        {
4632            return Err(anyhow!(
4633                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4634                target_variable,
4635                info.var_type
4636            ));
4637        }
4638
4639        // If all requested types are unknown (schemaless), use TraverseMainByType
4640        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
4641        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
4642        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
4643            // All types are unknown - use schemaless traversal
4644
4645            let is_variable_length = params.rel.range.is_some();
4646
4647            const DEFAULT_MAX_HOPS: usize = 100;
4648            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
4649                let min = range.min.unwrap_or(1) as usize;
4650                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
4651                (min, max)
4652            } else {
4653                (1, 1)
4654            };
4655
4656            // For both single-hop and variable-length paths:
4657            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
4658            //   Single-hop: step_var holds a single edge object
4659            //   VLP: step_var holds a list of edge objects
4660            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
4661            let step_var = params.rel.variable.clone();
4662            let path_var = params.path_variable.clone();
4663
4664            // Compute scope_match_variables for relationship uniqueness scoping.
4665            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
4666                .iter()
4667                .map(|v| v.name.clone())
4668                .collect();
4669            if let Some(ref sv) = step_var {
4670                // Only add the step variable to scope if it's NOT rebound from a previous clause.
4671                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
4672                // filtering because the second MATCH intentionally reuses the same edge.
4673                if bound_edge_var.is_none() {
4674                    scope_match_variables.insert(sv.clone());
4675                }
4676            }
4677            scope_match_variables.insert(target_variable.clone());
4678            // Include bound edge variables from this path for cross-segment Trail mode
4679            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
4680            // edges already claimed by a bound relationship [r] in the same path.
4681            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
4682            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
4683            scope_match_variables.extend(
4684                path_bound_edge_vars
4685                    .iter()
4686                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
4687                    .cloned(),
4688            );
4689
4690            let mut plan = LogicalPlan::TraverseMainByType {
4691                type_names: unknown_types,
4692                input: Box::new(plan),
4693                direction: params.rel.direction.clone(),
4694                source_variable: source_variable.to_string(),
4695                target_variable: target_variable.clone(),
4696                step_variable: step_var.clone(),
4697                min_hops,
4698                max_hops,
4699                optional: params.optional,
4700                target_filter: self.node_filter_expr(
4701                    &target_variable,
4702                    &params.target_node.labels,
4703                    &params.target_node.properties,
4704                ),
4705                path_variable: path_var.clone(),
4706                is_variable_length,
4707                optional_pattern_vars: params.optional_pattern_vars.clone(),
4708                scope_match_variables,
4709                edge_filter_expr: if is_variable_length {
4710                    let filter_var = step_var
4711                        .clone()
4712                        .unwrap_or_else(|| "__anon_edge".to_string());
4713                    self.properties_to_expr(&filter_var, &params.rel.properties)
4714                } else {
4715                    None
4716                },
4717                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4718            };
4719
4720            // Only apply bound target filter for Imported variables (from outer scope/subquery).
4721            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
4722            // uses Parameter which requires the value to be in params (subquery context).
4723            if target_is_bound
4724                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4725                && info.var_type == VariableType::Imported
4726            {
4727                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
4728            }
4729
4730            // Apply relationship property predicates for fixed-length schemaless
4731            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
4732            // For VLP, predicates are stored inline in edge_filter_expr (above).
4733            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
4734            if !is_variable_length
4735                && let Some(edge_var_name) = step_var.as_ref()
4736                && let Some(edge_prop_filter) =
4737                    self.properties_to_expr(edge_var_name, &params.rel.properties)
4738            {
4739                let filter_optional_vars = if params.optional {
4740                    params.optional_pattern_vars.clone()
4741                } else {
4742                    HashSet::new()
4743                };
4744                plan = LogicalPlan::Filter {
4745                    input: Box::new(plan),
4746                    predicate: edge_prop_filter,
4747                    optional_variables: filter_optional_vars,
4748                };
4749            }
4750
4751            // Add the bound variables to scope
4752            if let Some(sv) = &step_var {
4753                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
4754                if is_variable_length
4755                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
4756                {
4757                    info.is_vlp = true;
4758                }
4759            }
4760            if let Some(pv) = &path_var
4761                && !is_var_in_scope(vars_in_scope, pv)
4762            {
4763                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4764            }
4765            if !is_var_in_scope(vars_in_scope, &target_variable) {
4766                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4767            }
4768
4769            return Ok((plan, target_variable.clone(), target_variable));
4770        }
4771
4772        // If we have a mix of known and unknown types, error for now
4773        // (could be extended to Union of Traverse + TraverseMainByType)
4774        if !unknown_types.is_empty() {
4775            return Err(anyhow!(
4776                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
4777                unknown_types
4778            ));
4779        }
4780
4781        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
4782            // Use first label for target_label_id
4783            // For schemaless support, allow unknown target labels
4784            self.schema.get_label_case_insensitive(label_name)
4785        } else if !target_is_bound {
4786            // Infer from edge type(s)
4787            let unique_dsts: Vec<_> = dst_labels
4788                .into_iter()
4789                .collect::<HashSet<_>>()
4790                .into_iter()
4791                .collect();
4792            if unique_dsts.len() == 1 {
4793                let label_name = &unique_dsts[0];
4794                self.schema.get_label_case_insensitive(label_name)
4795            } else {
4796                // Multiple or no destination labels inferred - allow any target
4797                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
4798                // where the edge type can connect to multiple labels
4799                None
4800            }
4801        } else {
4802            None
4803        };
4804
4805        // Check if this is a variable-length pattern (has range specifier like *1..3)
4806        let is_variable_length = params.rel.range.is_some();
4807
4808        // For VLP patterns, default min to 1 and max to a reasonable limit.
4809        // For single-hop patterns (no range), both are 1.
4810        const DEFAULT_MAX_HOPS: usize = 100;
4811        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
4812            let min = range.min.unwrap_or(1) as usize;
4813            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
4814            (min, max)
4815        } else {
4816            (1, 1)
4817        };
4818
4819        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
4820        //   Single-hop: step_var holds a single edge object
4821        //   VLP: step_var holds a list of edge objects
4822        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
4823        let step_var = params.rel.variable.clone();
4824        let path_var = params.path_variable.clone();
4825
4826        // If we have a bound edge variable from a previous clause, use a temp variable
4827        // for the Traverse step, then filter to match the bound edge
4828        let rebound_var = bound_edge_var
4829            .as_ref()
4830            .or(bound_edge_list_var.as_ref())
4831            .cloned();
4832        let effective_step_var = if let Some(ref bv) = rebound_var {
4833            Some(format!("__rebound_{}", bv))
4834        } else {
4835            step_var.clone()
4836        };
4837
4838        // If we have a bound target variable from a previous clause (e.g. WITH),
4839        // use a temp variable for the Traverse step, then filter to match the bound
4840        // target — mirroring the bound edge pattern above.
4841        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
4842            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
4843                .map(|info| info.var_type == VariableType::Imported)
4844                .unwrap_or(false);
4845            if !is_imported {
4846                Some(target_variable.clone())
4847            } else {
4848                None
4849            }
4850        } else {
4851            None
4852        };
4853
4854        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4855            format!("__rebound_{}", bv)
4856        } else {
4857            target_variable.clone()
4858        };
4859
4860        // Collect all variables (node + edge) from the current MATCH clause scope
4861        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
4862        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
4863        // if their associated variable is in this set. This prevents relationship
4864        // uniqueness from being enforced across disconnected MATCH clauses.
4865        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
4866            .iter()
4867            .map(|v| v.name.clone())
4868            .collect();
4869        // Include the current traverse's edge variable (not yet added to vars_in_scope)
4870        if let Some(ref sv) = effective_step_var {
4871            scope_match_variables.insert(sv.clone());
4872        }
4873        // Include the target variable (not yet added to vars_in_scope)
4874        scope_match_variables.insert(effective_target_var.clone());
4875        // Include bound edge variables from this path for cross-segment Trail mode
4876        // enforcement (same as the schemaless path above).
4877        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
4878
4879        let mut plan = LogicalPlan::Traverse {
4880            input: Box::new(plan),
4881            edge_type_ids,
4882            direction: params.rel.direction.clone(),
4883            source_variable: source_variable.to_string(),
4884            target_variable: effective_target_var.clone(),
4885            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4886            step_variable: effective_step_var.clone(),
4887            min_hops,
4888            max_hops,
4889            optional: params.optional,
4890            target_filter: self.node_filter_expr(
4891                &target_variable,
4892                &params.target_node.labels,
4893                &params.target_node.properties,
4894            ),
4895            path_variable: path_var.clone(),
4896            edge_properties: HashSet::new(),
4897            is_variable_length,
4898            optional_pattern_vars: params.optional_pattern_vars.clone(),
4899            scope_match_variables,
4900            edge_filter_expr: if is_variable_length {
4901                // Use the step variable name, or a fallback for anonymous edges.
4902                // The variable name is used by properties_to_expr to build
4903                // `var.prop = value` expressions. For BFS property checking,
4904                // only the property name and value matter (the variable name
4905                // is stripped during extraction).
4906                let filter_var = effective_step_var
4907                    .clone()
4908                    .unwrap_or_else(|| "__anon_edge".to_string());
4909                self.properties_to_expr(&filter_var, &params.rel.properties)
4910            } else {
4911                None
4912            },
4913            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4914            qpp_steps: None,
4915        };
4916
4917        // Pre-compute optional variables set for filter nodes in this traverse.
4918        // Used by relationship property filters and bound-edge filters below.
4919        let filter_optional_vars = if params.optional {
4920            params.optional_pattern_vars.clone()
4921        } else {
4922            HashSet::new()
4923        };
4924
4925        // Apply relationship property predicates (e.g. [r {k: v}]).
4926        // For VLP, predicates are stored inline in edge_filter_expr (above).
4927        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
4928        if !is_variable_length
4929            && let Some(edge_var_name) = effective_step_var.as_ref()
4930            && let Some(edge_prop_filter) =
4931                self.properties_to_expr(edge_var_name, &params.rel.properties)
4932        {
4933            plan = LogicalPlan::Filter {
4934                input: Box::new(plan),
4935                predicate: edge_prop_filter,
4936                optional_variables: filter_optional_vars.clone(),
4937            };
4938        }
4939
4940        // Only apply bound target filter for Imported variables (from outer scope/subquery).
4941        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
4942        // uses Parameter which requires the value to be in params (subquery context).
4943        if target_is_bound
4944            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4945            && info.var_type == VariableType::Imported
4946        {
4947            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
4948        }
4949
4950        // If we have a bound edge variable, add a filter to match it
4951        if let Some(ref bv) = bound_edge_var {
4952            let temp_var = format!("__rebound_{}", bv);
4953            let bound_check = Expr::BinaryOp {
4954                left: Box::new(Expr::Property(
4955                    Box::new(Expr::Variable(temp_var)),
4956                    "_eid".to_string(),
4957                )),
4958                op: BinaryOp::Eq,
4959                right: Box::new(Expr::Property(
4960                    Box::new(Expr::Variable(bv.clone())),
4961                    "_eid".to_string(),
4962                )),
4963            };
4964            plan = LogicalPlan::Filter {
4965                input: Box::new(plan),
4966                predicate: bound_check,
4967                optional_variables: filter_optional_vars.clone(),
4968            };
4969        }
4970
4971        // If we have a bound relationship list variable for a VLP pattern,
4972        // add a filter to match the traversed relationship list exactly.
4973        if let Some(ref bv) = bound_edge_list_var {
4974            let temp_var = format!("__rebound_{}", bv);
4975            let temp_eids = Expr::ListComprehension {
4976                variable: "__rebound_edge".to_string(),
4977                list: Box::new(Expr::Variable(temp_var)),
4978                where_clause: None,
4979                map_expr: Box::new(Expr::FunctionCall {
4980                    name: "toInteger".to_string(),
4981                    args: vec![Expr::Property(
4982                        Box::new(Expr::Variable("__rebound_edge".to_string())),
4983                        "_eid".to_string(),
4984                    )],
4985                    distinct: false,
4986                    window_spec: None,
4987                }),
4988            };
4989            let bound_eids = Expr::ListComprehension {
4990                variable: "__bound_edge".to_string(),
4991                list: Box::new(Expr::Variable(bv.clone())),
4992                where_clause: None,
4993                map_expr: Box::new(Expr::FunctionCall {
4994                    name: "toInteger".to_string(),
4995                    args: vec![Expr::Property(
4996                        Box::new(Expr::Variable("__bound_edge".to_string())),
4997                        "_eid".to_string(),
4998                    )],
4999                    distinct: false,
5000                    window_spec: None,
5001                }),
5002            };
5003            let bound_list_check = Expr::BinaryOp {
5004                left: Box::new(temp_eids),
5005                op: BinaryOp::Eq,
5006                right: Box::new(bound_eids),
5007            };
5008            plan = LogicalPlan::Filter {
5009                input: Box::new(plan),
5010                predicate: bound_list_check,
5011                optional_variables: filter_optional_vars.clone(),
5012            };
5013        }
5014
5015        // If we have a bound target variable (non-imported), add a filter to constrain
5016        // the traversal output to match the previously bound target node.
5017        if let Some(ref bv) = rebound_target_var {
5018            let temp_var = format!("__rebound_{}", bv);
5019            let bound_check = Expr::BinaryOp {
5020                left: Box::new(Expr::Property(
5021                    Box::new(Expr::Variable(temp_var.clone())),
5022                    "_vid".to_string(),
5023                )),
5024                op: BinaryOp::Eq,
5025                right: Box::new(Expr::Property(
5026                    Box::new(Expr::Variable(bv.clone())),
5027                    "_vid".to_string(),
5028                )),
5029            };
5030            // For OPTIONAL MATCH, include the rebound variable in optional_variables
5031            // so that OptionalFilterExec excludes it from the grouping key and
5032            // properly nullifies it in recovery rows when all matches are filtered out.
5033            // Without this, each traverse result creates its own group (keyed by
5034            // __rebound_c._vid), and null-row recovery emits a spurious null row
5035            // for every non-matching target instead of one per source group.
5036            let mut rebound_filter_vars = filter_optional_vars;
5037            if params.optional {
5038                rebound_filter_vars.insert(temp_var);
5039            }
5040            plan = LogicalPlan::Filter {
5041                input: Box::new(plan),
5042                predicate: bound_check,
5043                optional_variables: rebound_filter_vars,
5044            };
5045        }
5046
5047        // Add the bound variables to scope
5048        // Skip adding the edge variable if it's already bound from a previous clause
5049        if let Some(sv) = &step_var
5050            && bound_edge_var.is_none()
5051            && bound_edge_list_var.is_none()
5052        {
5053            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5054            if is_variable_length
5055                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5056            {
5057                info.is_vlp = true;
5058            }
5059        }
5060        if let Some(pv) = &path_var
5061            && !is_var_in_scope(vars_in_scope, pv)
5062        {
5063            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5064        }
5065        if !is_var_in_scope(vars_in_scope, &target_variable) {
5066            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5067        }
5068
5069        Ok((plan, target_variable, effective_target_var))
5070    }
5071
5072    /// Combine a new scan plan with an existing plan.
5073    ///
5074    /// If the existing plan is `Empty`, returns the new plan directly.
5075    /// Otherwise, wraps them in a `CrossJoin`.
5076    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5077        if matches!(existing, LogicalPlan::Empty) {
5078            new
5079        } else {
5080            LogicalPlan::CrossJoin {
5081                left: Box::new(existing),
5082                right: Box::new(new),
5083            }
5084        }
5085    }
5086
5087    /// Split node map predicates into scan-pushable and residual filters.
5088    ///
5089    /// A predicate is scan-pushable when its value expression references only
5090    /// the node variable itself (or no variables). Predicates referencing other
5091    /// in-scope variables (correlated predicates) are returned as residual so
5092    /// they can be applied after joining with the existing plan.
5093    fn split_node_property_filters_for_scan(
5094        &self,
5095        variable: &str,
5096        properties: &Option<Expr>,
5097    ) -> (Option<Expr>, Option<Expr>) {
5098        let entries = match properties {
5099            Some(Expr::Map(entries)) => entries,
5100            _ => return (None, None),
5101        };
5102
5103        if entries.is_empty() {
5104            return (None, None);
5105        }
5106
5107        let mut pushdown_entries = Vec::new();
5108        let mut residual_entries = Vec::new();
5109
5110        for (prop, val_expr) in entries {
5111            let vars = collect_expr_variables(val_expr);
5112            if vars.iter().all(|v| v == variable) {
5113                pushdown_entries.push((prop.clone(), val_expr.clone()));
5114            } else {
5115                residual_entries.push((prop.clone(), val_expr.clone()));
5116            }
5117        }
5118
5119        let pushdown_map = if pushdown_entries.is_empty() {
5120            None
5121        } else {
5122            Some(Expr::Map(pushdown_entries))
5123        };
5124        let residual_map = if residual_entries.is_empty() {
5125            None
5126        } else {
5127            Some(Expr::Map(residual_entries))
5128        };
5129
5130        (
5131            self.properties_to_expr(variable, &pushdown_map),
5132            self.properties_to_expr(variable, &residual_map),
5133        )
5134    }
5135
5136    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5137    fn plan_unbound_node(
5138        &self,
5139        node: &NodePattern,
5140        variable: &str,
5141        plan: LogicalPlan,
5142        optional: bool,
5143    ) -> Result<LogicalPlan> {
5144        // Properties handling
5145        let properties = match &node.properties {
5146            Some(Expr::Map(entries)) => entries.as_slice(),
5147            Some(Expr::Parameter(_)) => {
5148                return Err(anyhow!(
5149                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5150                ));
5151            }
5152            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5153            None => &[],
5154        };
5155
5156        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5157
5158        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5159            if let Some(predicate) = residual {
5160                LogicalPlan::Filter {
5161                    input: Box::new(input),
5162                    predicate,
5163                    optional_variables: HashSet::new(),
5164                }
5165            } else {
5166                input
5167            }
5168        };
5169
5170        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5171            self.split_node_property_filters_for_scan(variable, &node.properties)
5172        } else {
5173            (self.properties_to_expr(variable, &node.properties), None)
5174        };
5175
5176        // Check for ext_id in properties when no label is specified
5177        if node.labels.is_empty() {
5178            // Try to find ext_id property for main table lookup
5179            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5180                // Extract the ext_id value as a string
5181                let ext_id = match ext_id_value {
5182                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5183                    _ => {
5184                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5185                    }
5186                };
5187
5188                // Build filter for remaining properties (excluding ext_id)
5189                let remaining_props: Vec<_> = properties
5190                    .iter()
5191                    .filter(|(k, _)| k != "ext_id")
5192                    .cloned()
5193                    .collect();
5194
5195                let remaining_expr = if remaining_props.is_empty() {
5196                    None
5197                } else {
5198                    Some(Expr::Map(remaining_props))
5199                };
5200
5201                let (prop_filter, residual_filter) = if has_existing_scope {
5202                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5203                } else {
5204                    (self.properties_to_expr(variable, &remaining_expr), None)
5205                };
5206
5207                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5208                    variable: variable.to_string(),
5209                    ext_id,
5210                    filter: prop_filter,
5211                    optional,
5212                };
5213
5214                let joined = Self::join_with_plan(plan, ext_id_lookup);
5215                return Ok(apply_residual_filter(joined, residual_filter));
5216            }
5217
5218            // No ext_id: create ScanAll for unlabeled node pattern
5219            let scan_all = LogicalPlan::ScanAll {
5220                variable: variable.to_string(),
5221                filter: node_scan_filter,
5222                optional,
5223            };
5224
5225            let joined = Self::join_with_plan(plan, scan_all);
5226            return Ok(apply_residual_filter(joined, node_residual_filter));
5227        }
5228
5229        // Use first label for label_id (primary label for dataset selection)
5230        let label_name = &node.labels[0];
5231
5232        // Check if label exists in schema
5233        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
5234            // Known label: use standard Scan
5235            let scan = LogicalPlan::Scan {
5236                label_id: label_meta.id,
5237                labels: node.labels.clone(),
5238                variable: variable.to_string(),
5239                filter: node_scan_filter,
5240                optional,
5241            };
5242
5243            let joined = Self::join_with_plan(plan, scan);
5244            Ok(apply_residual_filter(joined, node_residual_filter))
5245        } else {
5246            // Unknown label: use ScanMainByLabels for schemaless support
5247            let scan_main = LogicalPlan::ScanMainByLabels {
5248                labels: node.labels.clone(),
5249                variable: variable.to_string(),
5250                filter: node_scan_filter,
5251                optional,
5252            };
5253
5254            let joined = Self::join_with_plan(plan, scan_main);
5255            Ok(apply_residual_filter(joined, node_residual_filter))
5256        }
5257    }
5258
5259    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
5260    ///
5261    /// When `optional_vars` is non-empty, the Filter will preserve rows where
5262    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
5263    fn plan_where_clause(
5264        &self,
5265        predicate: &Expr,
5266        plan: LogicalPlan,
5267        vars_in_scope: &[VariableInfo],
5268        optional_vars: HashSet<String>,
5269    ) -> Result<LogicalPlan> {
5270        // Validate no aggregation functions in WHERE clause
5271        validate_no_aggregation_in_where(predicate)?;
5272
5273        // Validate all variables used are in scope
5274        validate_expression_variables(predicate, vars_in_scope)?;
5275
5276        // Validate expression types (function args, boolean operators)
5277        validate_expression(predicate, vars_in_scope)?;
5278
5279        // Check that WHERE predicate isn't a bare node/edge/path variable
5280        if let Expr::Variable(var_name) = predicate
5281            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
5282            && matches!(
5283                info.var_type,
5284                VariableType::Node | VariableType::Edge | VariableType::Path
5285            )
5286        {
5287            return Err(anyhow!(
5288                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
5289                info.var_type
5290            ));
5291        }
5292
5293        let mut plan = plan;
5294
5295        // Transform VALID_AT macro to function call
5296        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
5297
5298        let mut current_predicate =
5299            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
5300
5301        // 1. Try to extract vector_similarity predicate for optimization
5302        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
5303            let vs = &extraction.predicate;
5304            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
5305                plan = Self::replace_scan_with_knn(
5306                    plan,
5307                    &vs.variable,
5308                    &vs.property,
5309                    vs.query.clone(),
5310                    vs.threshold,
5311                );
5312                if let Some(residual) = extraction.residual {
5313                    current_predicate = residual;
5314                } else {
5315                    current_predicate = Expr::TRUE;
5316                }
5317            }
5318        }
5319
5320        // 3. Push eligible predicates to Scan OR Traverse filters
5321        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
5322        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
5323        // rows. Let them stay in the Filter operator which handles NULL preservation.
5324        for var in vars_in_scope {
5325            // Skip pushdown for optional variables - they need NULL preservation in Filter
5326            if optional_vars.contains(&var.name) {
5327                continue;
5328            }
5329
5330            // Check if var is produced by a Scan
5331            if Self::find_scan_label_id(&plan, &var.name).is_some() {
5332                let (pushable, residual) =
5333                    Self::extract_variable_predicates(&current_predicate, &var.name);
5334
5335                for pred in pushable {
5336                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
5337                }
5338
5339                if let Some(r) = residual {
5340                    current_predicate = r;
5341                } else {
5342                    current_predicate = Expr::TRUE;
5343                }
5344            } else if Self::is_traverse_target(&plan, &var.name) {
5345                // Push to Traverse
5346                let (pushable, residual) =
5347                    Self::extract_variable_predicates(&current_predicate, &var.name);
5348
5349                for pred in pushable {
5350                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
5351                }
5352
5353                if let Some(r) = residual {
5354                    current_predicate = r;
5355                } else {
5356                    current_predicate = Expr::TRUE;
5357                }
5358            }
5359        }
5360
5361        // 4. Push predicates to Apply.input_filter
5362        // This filters input rows BEFORE executing correlated subqueries.
5363        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
5364
5365        // 5. Add Filter node for any remaining predicates
5366        if !current_predicate.is_true_literal() {
5367            plan = LogicalPlan::Filter {
5368                input: Box::new(plan),
5369                predicate: current_predicate,
5370                optional_variables: optional_vars,
5371            };
5372        }
5373
5374        Ok(plan)
5375    }
5376
5377    fn rewrite_predicates_using_indexes(
5378        &self,
5379        predicate: &Expr,
5380        plan: &LogicalPlan,
5381        vars_in_scope: &[VariableInfo],
5382    ) -> Result<Expr> {
5383        let mut rewritten = predicate.clone();
5384
5385        for var in vars_in_scope {
5386            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
5387                // Find label name
5388                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
5389
5390                if let Some(label) = label_name
5391                    && let Some(props) = self.schema.properties.get(&label)
5392                {
5393                    for (gen_col, meta) in props {
5394                        if meta.generation_expression.is_some() {
5395                            // Use cached parsed expression
5396                            if let Some(schema_expr) =
5397                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
5398                            {
5399                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
5400                                rewritten = Self::replace_expression(
5401                                    rewritten,
5402                                    schema_expr,
5403                                    &var.name,
5404                                    gen_col,
5405                                );
5406                            }
5407                        }
5408                    }
5409                }
5410            }
5411        }
5412        Ok(rewritten)
5413    }
5414
5415    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
5416        // First, normalize schema_expr to use query_var
5417        let schema_var = schema_expr.extract_variable();
5418
5419        if let Some(s_var) = schema_var {
5420            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
5421
5422            if expr == target_expr {
5423                return Expr::Property(
5424                    Box::new(Expr::Variable(query_var.to_string())),
5425                    gen_col.to_string(),
5426                );
5427            }
5428        }
5429
5430        // Recurse
5431        match expr {
5432            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
5433                left: Box::new(Self::replace_expression(
5434                    *left,
5435                    schema_expr,
5436                    query_var,
5437                    gen_col,
5438                )),
5439                op,
5440                right: Box::new(Self::replace_expression(
5441                    *right,
5442                    schema_expr,
5443                    query_var,
5444                    gen_col,
5445                )),
5446            },
5447            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
5448                op,
5449                expr: Box::new(Self::replace_expression(
5450                    *expr,
5451                    schema_expr,
5452                    query_var,
5453                    gen_col,
5454                )),
5455            },
5456            Expr::FunctionCall {
5457                name,
5458                args,
5459                distinct,
5460                window_spec,
5461            } => Expr::FunctionCall {
5462                name,
5463                args: args
5464                    .into_iter()
5465                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
5466                    .collect(),
5467                distinct,
5468                window_spec,
5469            },
5470            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
5471                *expr,
5472                schema_expr,
5473                query_var,
5474                gen_col,
5475            ))),
5476            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
5477                *expr,
5478                schema_expr,
5479                query_var,
5480                gen_col,
5481            ))),
5482            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
5483                *expr,
5484                schema_expr,
5485                query_var,
5486                gen_col,
5487            ))),
5488            Expr::ArrayIndex {
5489                array: e,
5490                index: idx,
5491            } => Expr::ArrayIndex {
5492                array: Box::new(Self::replace_expression(
5493                    *e,
5494                    schema_expr,
5495                    query_var,
5496                    gen_col,
5497                )),
5498                index: Box::new(Self::replace_expression(
5499                    *idx,
5500                    schema_expr,
5501                    query_var,
5502                    gen_col,
5503                )),
5504            },
5505            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
5506                array: Box::new(Self::replace_expression(
5507                    *array,
5508                    schema_expr,
5509                    query_var,
5510                    gen_col,
5511                )),
5512                start: start.map(|s| {
5513                    Box::new(Self::replace_expression(
5514                        *s,
5515                        schema_expr,
5516                        query_var,
5517                        gen_col,
5518                    ))
5519                }),
5520                end: end.map(|e| {
5521                    Box::new(Self::replace_expression(
5522                        *e,
5523                        schema_expr,
5524                        query_var,
5525                        gen_col,
5526                    ))
5527                }),
5528            },
5529            Expr::List(exprs) => Expr::List(
5530                exprs
5531                    .into_iter()
5532                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
5533                    .collect(),
5534            ),
5535            Expr::Map(entries) => Expr::Map(
5536                entries
5537                    .into_iter()
5538                    .map(|(k, v)| {
5539                        (
5540                            k,
5541                            Self::replace_expression(v, schema_expr, query_var, gen_col),
5542                        )
5543                    })
5544                    .collect(),
5545            ),
5546            Expr::Property(e, prop) => Expr::Property(
5547                Box::new(Self::replace_expression(
5548                    *e,
5549                    schema_expr,
5550                    query_var,
5551                    gen_col,
5552                )),
5553                prop,
5554            ),
5555            Expr::Case {
5556                expr: case_expr,
5557                when_then,
5558                else_expr,
5559            } => Expr::Case {
5560                expr: case_expr.map(|e| {
5561                    Box::new(Self::replace_expression(
5562                        *e,
5563                        schema_expr,
5564                        query_var,
5565                        gen_col,
5566                    ))
5567                }),
5568                when_then: when_then
5569                    .into_iter()
5570                    .map(|(w, t)| {
5571                        (
5572                            Self::replace_expression(w, schema_expr, query_var, gen_col),
5573                            Self::replace_expression(t, schema_expr, query_var, gen_col),
5574                        )
5575                    })
5576                    .collect(),
5577                else_expr: else_expr.map(|e| {
5578                    Box::new(Self::replace_expression(
5579                        *e,
5580                        schema_expr,
5581                        query_var,
5582                        gen_col,
5583                    ))
5584                }),
5585            },
5586            Expr::Reduce {
5587                accumulator,
5588                init,
5589                variable: reduce_var,
5590                list,
5591                expr: reduce_expr,
5592            } => Expr::Reduce {
5593                accumulator,
5594                init: Box::new(Self::replace_expression(
5595                    *init,
5596                    schema_expr,
5597                    query_var,
5598                    gen_col,
5599                )),
5600                variable: reduce_var,
5601                list: Box::new(Self::replace_expression(
5602                    *list,
5603                    schema_expr,
5604                    query_var,
5605                    gen_col,
5606                )),
5607                expr: Box::new(Self::replace_expression(
5608                    *reduce_expr,
5609                    schema_expr,
5610                    query_var,
5611                    gen_col,
5612                )),
5613            },
5614
5615            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
5616            _ => expr,
5617        }
5618    }
5619
5620    /// Check if the variable is the target of a Traverse node
5621    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
5622        match plan {
5623            LogicalPlan::Traverse {
5624                target_variable,
5625                input,
5626                ..
5627            } => target_variable == variable || Self::is_traverse_target(input, variable),
5628            LogicalPlan::Filter { input, .. }
5629            | LogicalPlan::Project { input, .. }
5630            | LogicalPlan::Sort { input, .. }
5631            | LogicalPlan::Limit { input, .. }
5632            | LogicalPlan::Aggregate { input, .. }
5633            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
5634            LogicalPlan::CrossJoin { left, right } => {
5635                Self::is_traverse_target(left, variable)
5636                    || Self::is_traverse_target(right, variable)
5637            }
5638            _ => false,
5639        }
5640    }
5641
5642    /// Push a predicate into a Traverse's target_filter for the specified variable
5643    fn push_predicate_to_traverse(
5644        plan: LogicalPlan,
5645        variable: &str,
5646        predicate: Expr,
5647    ) -> LogicalPlan {
5648        match plan {
5649            LogicalPlan::Traverse {
5650                input,
5651                edge_type_ids,
5652                direction,
5653                source_variable,
5654                target_variable,
5655                target_label_id,
5656                step_variable,
5657                min_hops,
5658                max_hops,
5659                optional,
5660                target_filter,
5661                path_variable,
5662                edge_properties,
5663                is_variable_length,
5664                optional_pattern_vars,
5665                scope_match_variables,
5666                edge_filter_expr,
5667                path_mode,
5668                qpp_steps,
5669            } => {
5670                if target_variable == variable {
5671                    // Found the traverse producing this variable
5672                    let new_filter = match target_filter {
5673                        Some(existing) => Some(Expr::BinaryOp {
5674                            left: Box::new(existing),
5675                            op: BinaryOp::And,
5676                            right: Box::new(predicate),
5677                        }),
5678                        None => Some(predicate),
5679                    };
5680                    LogicalPlan::Traverse {
5681                        input,
5682                        edge_type_ids,
5683                        direction,
5684                        source_variable,
5685                        target_variable,
5686                        target_label_id,
5687                        step_variable,
5688                        min_hops,
5689                        max_hops,
5690                        optional,
5691                        target_filter: new_filter,
5692                        path_variable,
5693                        edge_properties,
5694                        is_variable_length,
5695                        optional_pattern_vars,
5696                        scope_match_variables,
5697                        edge_filter_expr,
5698                        path_mode,
5699                        qpp_steps,
5700                    }
5701                } else {
5702                    // Recurse into input
5703                    LogicalPlan::Traverse {
5704                        input: Box::new(Self::push_predicate_to_traverse(
5705                            *input, variable, predicate,
5706                        )),
5707                        edge_type_ids,
5708                        direction,
5709                        source_variable,
5710                        target_variable,
5711                        target_label_id,
5712                        step_variable,
5713                        min_hops,
5714                        max_hops,
5715                        optional,
5716                        target_filter,
5717                        path_variable,
5718                        edge_properties,
5719                        is_variable_length,
5720                        optional_pattern_vars,
5721                        scope_match_variables,
5722                        edge_filter_expr,
5723                        path_mode,
5724                        qpp_steps,
5725                    }
5726                }
5727            }
5728            LogicalPlan::Filter {
5729                input,
5730                predicate: p,
5731                optional_variables: opt_vars,
5732            } => LogicalPlan::Filter {
5733                input: Box::new(Self::push_predicate_to_traverse(
5734                    *input, variable, predicate,
5735                )),
5736                predicate: p,
5737                optional_variables: opt_vars,
5738            },
5739            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
5740                input: Box::new(Self::push_predicate_to_traverse(
5741                    *input, variable, predicate,
5742                )),
5743                projections,
5744            },
5745            LogicalPlan::CrossJoin { left, right } => {
5746                // Check which side has the variable
5747                if Self::is_traverse_target(&left, variable) {
5748                    LogicalPlan::CrossJoin {
5749                        left: Box::new(Self::push_predicate_to_traverse(
5750                            *left, variable, predicate,
5751                        )),
5752                        right,
5753                    }
5754                } else {
5755                    LogicalPlan::CrossJoin {
5756                        left,
5757                        right: Box::new(Self::push_predicate_to_traverse(
5758                            *right, variable, predicate,
5759                        )),
5760                    }
5761                }
5762            }
5763            other => other,
5764        }
5765    }
5766
5767    /// Plan a WITH clause, handling aggregations and projections.
5768    fn plan_with_clause(
5769        &self,
5770        with_clause: &WithClause,
5771        plan: LogicalPlan,
5772        vars_in_scope: &[VariableInfo],
5773    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
5774        let mut plan = plan;
5775        let mut group_by: Vec<Expr> = Vec::new();
5776        let mut aggregates: Vec<Expr> = Vec::new();
5777        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
5778        let mut has_agg = false;
5779        let mut projections = Vec::new();
5780        let mut new_vars: Vec<VariableInfo> = Vec::new();
5781        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
5782        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
5783        let mut projected_aliases: HashSet<String> = HashSet::new();
5784        let mut has_unaliased_non_variable_expr = false;
5785
5786        for item in &with_clause.items {
5787            match item {
5788                ReturnItem::All => {
5789                    // WITH * - add all variables in scope
5790                    for v in vars_in_scope {
5791                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
5792                        projected_aliases.insert(v.name.clone());
5793                        projected_simple_reprs.insert(v.name.clone());
5794                    }
5795                    new_vars.extend(vars_in_scope.iter().cloned());
5796                }
5797                ReturnItem::Expr { expr, alias, .. } => {
5798                    if matches!(expr, Expr::Wildcard) {
5799                        for v in vars_in_scope {
5800                            projections
5801                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
5802                            projected_aliases.insert(v.name.clone());
5803                            projected_simple_reprs.insert(v.name.clone());
5804                        }
5805                        new_vars.extend(vars_in_scope.iter().cloned());
5806                    } else {
5807                        // Validate expression variables and syntax
5808                        validate_expression_variables(expr, vars_in_scope)?;
5809                        validate_expression(expr, vars_in_scope)?;
5810                        // Pattern predicates are not allowed in WITH
5811                        if contains_pattern_predicate(expr) {
5812                            return Err(anyhow!(
5813                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
5814                            ));
5815                        }
5816
5817                        projections.push((expr.clone(), alias.clone()));
5818                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
5819                            // Bare aggregate — push directly
5820                            has_agg = true;
5821                            aggregates.push(expr.clone());
5822                            projected_aggregate_reprs.insert(expr.to_string_repr());
5823                        } else if !is_window_function(expr)
5824                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
5825                        {
5826                            // Compound aggregate or expression containing aggregates
5827                            has_agg = true;
5828                            compound_agg_exprs.push(expr.clone());
5829                            for inner in extract_inner_aggregates(expr) {
5830                                let repr = inner.to_string_repr();
5831                                if !projected_aggregate_reprs.contains(&repr) {
5832                                    aggregates.push(inner);
5833                                    projected_aggregate_reprs.insert(repr);
5834                                }
5835                            }
5836                        } else if !group_by.contains(expr) {
5837                            group_by.push(expr.clone());
5838                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
5839                                projected_simple_reprs.insert(expr.to_string_repr());
5840                            }
5841                        }
5842
5843                        // Preserve non-scalar type information when WITH aliases
5844                        // entity/path-capable expressions.
5845                        if let Some(a) = alias {
5846                            if projected_aliases.contains(a) {
5847                                return Err(anyhow!(
5848                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
5849                                    a
5850                                ));
5851                            }
5852                            let inferred = infer_with_output_type(expr, vars_in_scope);
5853                            new_vars.push(VariableInfo::new(a.clone(), inferred));
5854                            projected_aliases.insert(a.clone());
5855                        } else if let Expr::Variable(v) = expr {
5856                            if projected_aliases.contains(v) {
5857                                return Err(anyhow!(
5858                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
5859                                    v
5860                                ));
5861                            }
5862                            // Preserve the original type if the variable is just passed through
5863                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
5864                                new_vars.push(existing.clone());
5865                            } else {
5866                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
5867                            }
5868                            projected_aliases.insert(v.clone());
5869                        } else {
5870                            has_unaliased_non_variable_expr = true;
5871                        }
5872                    }
5873                }
5874            }
5875        }
5876
5877        // Collect extra variables that need to survive the projection stage
5878        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
5879        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
5880        let mut passthrough_extras: Vec<String> = Vec::new();
5881        let mut seen_passthrough: HashSet<String> = HashSet::new();
5882
5883        if let Some(predicate) = &with_clause.where_clause {
5884            for name in collect_expr_variables(predicate) {
5885                if !projected_names.contains(name.as_str())
5886                    && find_var_in_scope(vars_in_scope, &name).is_some()
5887                    && seen_passthrough.insert(name.clone())
5888                {
5889                    passthrough_extras.push(name);
5890                }
5891            }
5892        }
5893
5894        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
5895        // Carry those variables through the projection so Sort can resolve them.
5896        if !has_agg && let Some(order_by) = &with_clause.order_by {
5897            for item in order_by {
5898                for name in collect_expr_variables(&item.expr) {
5899                    if !projected_names.contains(name.as_str())
5900                        && find_var_in_scope(vars_in_scope, &name).is_some()
5901                        && seen_passthrough.insert(name.clone())
5902                    {
5903                        passthrough_extras.push(name);
5904                    }
5905                }
5906            }
5907        }
5908
5909        let needs_cleanup = !passthrough_extras.is_empty();
5910        for extra in &passthrough_extras {
5911            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
5912        }
5913
5914        // Validate compound aggregate expressions: non-aggregate refs must be
5915        // individually present in the group_by as simple variables or properties.
5916        if has_agg {
5917            let group_by_reprs: HashSet<String> =
5918                group_by.iter().map(|e| e.to_string_repr()).collect();
5919            for expr in &compound_agg_exprs {
5920                let mut refs = Vec::new();
5921                collect_non_aggregate_refs(expr, false, &mut refs);
5922                for r in &refs {
5923                    let is_covered = match r {
5924                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
5925                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
5926                    };
5927                    if !is_covered {
5928                        return Err(anyhow!(
5929                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
5930                        ));
5931                    }
5932                }
5933            }
5934        }
5935
5936        if has_agg {
5937            plan = LogicalPlan::Aggregate {
5938                input: Box::new(plan),
5939                group_by,
5940                aggregates,
5941            };
5942
5943            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
5944            // can reference the WITH aliases instead of raw column names.
5945            let rename_projections: Vec<(Expr, Option<String>)> = projections
5946                .iter()
5947                .map(|(expr, alias)| {
5948                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
5949                        // Bare aggregate — reference by column name
5950                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
5951                    } else if is_compound_aggregate(expr)
5952                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
5953                    {
5954                        // Compound aggregate — replace inner aggregates with
5955                        // column references, keep outer expression
5956                        (replace_aggregates_with_columns(expr), alias.clone())
5957                    } else {
5958                        (Expr::Variable(expr.to_string_repr()), alias.clone())
5959                    }
5960                })
5961                .collect();
5962            plan = LogicalPlan::Project {
5963                input: Box::new(plan),
5964                projections: rename_projections,
5965            };
5966        } else if !projections.is_empty() {
5967            plan = LogicalPlan::Project {
5968                input: Box::new(plan),
5969                projections: projections.clone(),
5970            };
5971        }
5972
5973        // Apply the WHERE filter (post-projection, with extras still visible).
5974        if let Some(predicate) = &with_clause.where_clause {
5975            plan = LogicalPlan::Filter {
5976                input: Box::new(plan),
5977                predicate: predicate.clone(),
5978                optional_variables: HashSet::new(),
5979            };
5980        }
5981
5982        // Validate and apply ORDER BY for WITH clause.
5983        // Keep pre-WITH vars in scope for parser compatibility, then apply
5984        // stricter checks for aggregate-containing ORDER BY items.
5985        if let Some(order_by) = &with_clause.order_by {
5986            // Build a mapping from aliases and projected expression reprs to
5987            // output columns of the preceding Project/Aggregate pipeline.
5988            let with_order_aliases: HashMap<String, Expr> = projections
5989                .iter()
5990                .flat_map(|(expr, alias)| {
5991                    let output_col = if let Some(a) = alias {
5992                        a.clone()
5993                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
5994                        aggregate_column_name(expr)
5995                    } else {
5996                        expr.to_string_repr()
5997                    };
5998
5999                    let mut entries = Vec::new();
6000                    // ORDER BY alias
6001                    if let Some(a) = alias {
6002                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
6003                    }
6004                    // ORDER BY projected expression (e.g. me.age)
6005                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
6006                    entries
6007                })
6008                .collect();
6009
6010            let order_by_scope: Vec<VariableInfo> = {
6011                let mut scope = new_vars.clone();
6012                for v in vars_in_scope {
6013                    if !is_var_in_scope(&scope, &v.name) {
6014                        scope.push(v.clone());
6015                    }
6016                }
6017                scope
6018            };
6019            for item in order_by {
6020                validate_expression_variables(&item.expr, &order_by_scope)?;
6021                validate_expression(&item.expr, &order_by_scope)?;
6022                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
6023                if has_aggregate_in_item && !has_agg {
6024                    return Err(anyhow!(
6025                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
6026                    ));
6027                }
6028                if has_agg && has_aggregate_in_item {
6029                    validate_with_order_by_aggregate_item(
6030                        &item.expr,
6031                        &projected_aggregate_reprs,
6032                        &projected_simple_reprs,
6033                        &projected_aliases,
6034                    )?;
6035                }
6036            }
6037            let rewritten_order_by: Vec<SortItem> = order_by
6038                .iter()
6039                .map(|item| {
6040                    let mut expr =
6041                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
6042                    if has_agg {
6043                        // Rewrite any aggregate calls to the aggregate output
6044                        // columns produced by Aggregate.
6045                        expr = replace_aggregates_with_columns(&expr);
6046                        // Then re-map projected property expressions to aliases
6047                        // from the WITH projection.
6048                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
6049                    }
6050                    SortItem {
6051                        expr,
6052                        ascending: item.ascending,
6053                    }
6054                })
6055                .collect();
6056            plan = LogicalPlan::Sort {
6057                input: Box::new(plan),
6058                order_by: rewritten_order_by,
6059            };
6060        }
6061
6062        // Non-variable expressions in WITH must be aliased.
6063        // This check is intentionally placed after ORDER BY validation so
6064        // higher-priority semantic errors (e.g., ambiguous aggregation in
6065        // ORDER BY) can surface first.
6066        if has_unaliased_non_variable_expr {
6067            return Err(anyhow!(
6068                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
6069            ));
6070        }
6071
6072        // Validate and apply SKIP/LIMIT for WITH clause
6073        let skip = with_clause
6074            .skip
6075            .as_ref()
6076            .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
6077            .transpose()?
6078            .flatten();
6079        let fetch = with_clause
6080            .limit
6081            .as_ref()
6082            .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
6083            .transpose()?
6084            .flatten();
6085
6086        if skip.is_some() || fetch.is_some() {
6087            plan = LogicalPlan::Limit {
6088                input: Box::new(plan),
6089                skip,
6090                fetch,
6091            };
6092        }
6093
6094        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
6095        if needs_cleanup {
6096            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
6097                .iter()
6098                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
6099                .collect();
6100            plan = LogicalPlan::Project {
6101                input: Box::new(plan),
6102                projections: cleanup_projections,
6103            };
6104        }
6105
6106        if with_clause.distinct {
6107            plan = LogicalPlan::Distinct {
6108                input: Box::new(plan),
6109            };
6110        }
6111
6112        Ok((plan, new_vars))
6113    }
6114
6115    fn plan_with_recursive(
6116        &self,
6117        with_recursive: &WithRecursiveClause,
6118        _prev_plan: LogicalPlan,
6119        vars_in_scope: &[VariableInfo],
6120    ) -> Result<LogicalPlan> {
6121        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
6122        match &*with_recursive.query {
6123            Query::Union { left, right, .. } => {
6124                // Plan the anchor (initial) query with current scope
6125                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
6126
6127                // Plan the recursive query with the CTE name added to scope
6128                // so it can reference itself
6129                let mut recursive_scope = vars_in_scope.to_vec();
6130                recursive_scope.push(VariableInfo::new(
6131                    with_recursive.name.clone(),
6132                    VariableType::Scalar,
6133                ));
6134                let recursive_plan =
6135                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
6136
6137                Ok(LogicalPlan::RecursiveCTE {
6138                    cte_name: with_recursive.name.clone(),
6139                    initial: Box::new(initial_plan),
6140                    recursive: Box::new(recursive_plan),
6141                })
6142            }
6143            _ => Err(anyhow::anyhow!(
6144                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
6145            )),
6146        }
6147    }
6148
6149    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
6150        let entries = match properties {
6151            Some(Expr::Map(entries)) => entries,
6152            _ => return None,
6153        };
6154
6155        if entries.is_empty() {
6156            return None;
6157        }
6158        let mut final_expr = None;
6159        for (prop, val_expr) in entries {
6160            let eq_expr = Expr::BinaryOp {
6161                left: Box::new(Expr::Property(
6162                    Box::new(Expr::Variable(variable.to_string())),
6163                    prop.clone(),
6164                )),
6165                op: BinaryOp::Eq,
6166                right: Box::new(val_expr.clone()),
6167            };
6168
6169            if let Some(e) = final_expr {
6170                final_expr = Some(Expr::BinaryOp {
6171                    left: Box::new(e),
6172                    op: BinaryOp::And,
6173                    right: Box::new(eq_expr),
6174                });
6175            } else {
6176                final_expr = Some(eq_expr);
6177            }
6178        }
6179        final_expr
6180    }
6181
6182    /// Build a filter expression from node properties and labels.
6183    ///
6184    /// This is used for TraverseMainByType where we need to filter target nodes
6185    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
6186    pub fn node_filter_expr(
6187        &self,
6188        variable: &str,
6189        labels: &[String],
6190        properties: &Option<Expr>,
6191    ) -> Option<Expr> {
6192        let mut final_expr = None;
6193
6194        // Add label checks using hasLabel(variable, 'label')
6195        for label in labels {
6196            let label_check = Expr::FunctionCall {
6197                name: "hasLabel".to_string(),
6198                args: vec![
6199                    Expr::Variable(variable.to_string()),
6200                    Expr::Literal(CypherLiteral::String(label.clone())),
6201                ],
6202                distinct: false,
6203                window_spec: None,
6204            };
6205
6206            final_expr = match final_expr {
6207                Some(e) => Some(Expr::BinaryOp {
6208                    left: Box::new(e),
6209                    op: BinaryOp::And,
6210                    right: Box::new(label_check),
6211                }),
6212                None => Some(label_check),
6213            };
6214        }
6215
6216        // Add property checks
6217        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
6218            final_expr = match final_expr {
6219                Some(e) => Some(Expr::BinaryOp {
6220                    left: Box::new(e),
6221                    op: BinaryOp::And,
6222                    right: Box::new(prop_expr),
6223                }),
6224                None => Some(prop_expr),
6225            };
6226        }
6227
6228        final_expr
6229    }
6230
6231    /// Create a filter plan that ensures traversed target matches a bound variable.
6232    ///
6233    /// Used in EXISTS subquery patterns where the target is already bound.
6234    /// Compares the target's VID against the bound variable's VID.
6235    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
6236        // Compare the traverse-discovered target's VID against the bound variable's VID.
6237        // Left side: Property access on the variable from current scope.
6238        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
6239        // We use Variable("{var}._vid") to access the VID column from the traverse output,
6240        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
6241        let bound_check = Expr::BinaryOp {
6242            left: Box::new(Expr::Property(
6243                Box::new(Expr::Variable(target_variable.to_string())),
6244                "_vid".to_string(),
6245            )),
6246            op: BinaryOp::Eq,
6247            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
6248        };
6249        LogicalPlan::Filter {
6250            input: Box::new(plan),
6251            predicate: bound_check,
6252            optional_variables: HashSet::new(),
6253        }
6254    }
6255
6256    /// Replace a Scan node matching the variable with a VectorKnn node
6257    fn replace_scan_with_knn(
6258        plan: LogicalPlan,
6259        variable: &str,
6260        property: &str,
6261        query: Expr,
6262        threshold: Option<f32>,
6263    ) -> LogicalPlan {
6264        match plan {
6265            LogicalPlan::Scan {
6266                label_id,
6267                labels,
6268                variable: scan_var,
6269                filter,
6270                optional,
6271            } => {
6272                if scan_var == variable {
6273                    // Inject any existing scan filter into VectorKnn?
6274                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
6275                    // Typically filter is applied post-Knn or during Knn if supported.
6276                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
6277                    // If `filter` is present on Scan, it must be preserved.
6278                    // We can wrap VectorKnn in Filter if Scan had filter.
6279
6280                    let knn = LogicalPlan::VectorKnn {
6281                        label_id,
6282                        variable: variable.to_string(),
6283                        property: property.to_string(),
6284                        query,
6285                        k: 100, // Default K, should push down LIMIT
6286                        threshold,
6287                    };
6288
6289                    if let Some(f) = filter {
6290                        LogicalPlan::Filter {
6291                            input: Box::new(knn),
6292                            predicate: f,
6293                            optional_variables: HashSet::new(),
6294                        }
6295                    } else {
6296                        knn
6297                    }
6298                } else {
6299                    LogicalPlan::Scan {
6300                        label_id,
6301                        labels,
6302                        variable: scan_var,
6303                        filter,
6304                        optional,
6305                    }
6306                }
6307            }
6308            LogicalPlan::Filter {
6309                input,
6310                predicate,
6311                optional_variables,
6312            } => LogicalPlan::Filter {
6313                input: Box::new(Self::replace_scan_with_knn(
6314                    *input, variable, property, query, threshold,
6315                )),
6316                predicate,
6317                optional_variables,
6318            },
6319            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6320                input: Box::new(Self::replace_scan_with_knn(
6321                    *input, variable, property, query, threshold,
6322                )),
6323                projections,
6324            },
6325            LogicalPlan::Limit { input, skip, fetch } => {
6326                // If we encounter Limit, we should ideally push K down to VectorKnn
6327                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
6328                // Limit comes later.
6329                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
6330                LogicalPlan::Limit {
6331                    input: Box::new(Self::replace_scan_with_knn(
6332                        *input, variable, property, query, threshold,
6333                    )),
6334                    skip,
6335                    fetch,
6336                }
6337            }
6338            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6339                left: Box::new(Self::replace_scan_with_knn(
6340                    *left,
6341                    variable,
6342                    property,
6343                    query.clone(),
6344                    threshold,
6345                )),
6346                right: Box::new(Self::replace_scan_with_knn(
6347                    *right, variable, property, query, threshold,
6348                )),
6349            },
6350            other => other,
6351        }
6352    }
6353
6354    /// Find the label_id for a Scan node matching the given variable
6355    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
6356        match plan {
6357            LogicalPlan::Scan {
6358                label_id,
6359                variable: var,
6360                ..
6361            } if var == variable => Some(*label_id),
6362            LogicalPlan::Filter { input, .. }
6363            | LogicalPlan::Project { input, .. }
6364            | LogicalPlan::Sort { input, .. }
6365            | LogicalPlan::Limit { input, .. }
6366            | LogicalPlan::Aggregate { input, .. }
6367            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
6368            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
6369                .or_else(|| Self::find_scan_label_id(right, variable)),
6370            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
6371            _ => None,
6372        }
6373    }
6374
6375    /// Push a predicate into a Scan's filter for the specified variable
6376    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
6377        match plan {
6378            LogicalPlan::Scan {
6379                label_id,
6380                labels,
6381                variable: var,
6382                filter,
6383                optional,
6384            } if var == variable => {
6385                // Merge the predicate with existing filter
6386                let new_filter = match filter {
6387                    Some(existing) => Some(Expr::BinaryOp {
6388                        left: Box::new(existing),
6389                        op: BinaryOp::And,
6390                        right: Box::new(predicate),
6391                    }),
6392                    None => Some(predicate),
6393                };
6394                LogicalPlan::Scan {
6395                    label_id,
6396                    labels,
6397                    variable: var,
6398                    filter: new_filter,
6399                    optional,
6400                }
6401            }
6402            LogicalPlan::Filter {
6403                input,
6404                predicate: p,
6405                optional_variables: opt_vars,
6406            } => LogicalPlan::Filter {
6407                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6408                predicate: p,
6409                optional_variables: opt_vars,
6410            },
6411            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6412                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6413                projections,
6414            },
6415            LogicalPlan::CrossJoin { left, right } => {
6416                // Check which side has the variable
6417                if Self::find_scan_label_id(&left, variable).is_some() {
6418                    LogicalPlan::CrossJoin {
6419                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
6420                        right,
6421                    }
6422                } else {
6423                    LogicalPlan::CrossJoin {
6424                        left,
6425                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
6426                    }
6427                }
6428            }
6429            LogicalPlan::Traverse {
6430                input,
6431                edge_type_ids,
6432                direction,
6433                source_variable,
6434                target_variable,
6435                target_label_id,
6436                step_variable,
6437                min_hops,
6438                max_hops,
6439                optional,
6440                target_filter,
6441                path_variable,
6442                edge_properties,
6443                is_variable_length,
6444                optional_pattern_vars,
6445                scope_match_variables,
6446                edge_filter_expr,
6447                path_mode,
6448                qpp_steps,
6449            } => LogicalPlan::Traverse {
6450                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6451                edge_type_ids,
6452                direction,
6453                source_variable,
6454                target_variable,
6455                target_label_id,
6456                step_variable,
6457                min_hops,
6458                max_hops,
6459                optional,
6460                target_filter,
6461                path_variable,
6462                edge_properties,
6463                is_variable_length,
6464                optional_pattern_vars,
6465                scope_match_variables,
6466                edge_filter_expr,
6467                path_mode,
6468                qpp_steps,
6469            },
6470            other => other,
6471        }
6472    }
6473
6474    /// Extract predicates that reference only the specified variable
6475    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
6476        let analyzer = PredicateAnalyzer::new();
6477        let analysis = analyzer.analyze(predicate, variable);
6478
6479        // Return pushable predicates and combined residual
6480        let residual = if analysis.residual.is_empty() {
6481            None
6482        } else {
6483            let mut iter = analysis.residual.into_iter();
6484            let first = iter.next().unwrap();
6485            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
6486                left: Box::new(acc),
6487                op: BinaryOp::And,
6488                right: Box::new(e),
6489            }))
6490        };
6491
6492        (analysis.pushable, residual)
6493    }
6494
6495    // =====================================================================
6496    // Apply Predicate Pushdown - Helper Functions
6497    // =====================================================================
6498
6499    /// Split AND-connected predicates into a list.
6500    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
6501        match expr {
6502            Expr::BinaryOp {
6503                left,
6504                op: BinaryOp::And,
6505                right,
6506            } => {
6507                let mut result = Self::split_and_conjuncts(left);
6508                result.extend(Self::split_and_conjuncts(right));
6509                result
6510            }
6511            _ => vec![expr.clone()],
6512        }
6513    }
6514
6515    /// Combine predicates with AND.
6516    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
6517        if predicates.is_empty() {
6518            return None;
6519        }
6520        let mut result = predicates[0].clone();
6521        for pred in predicates.iter().skip(1) {
6522            result = Expr::BinaryOp {
6523                left: Box::new(result),
6524                op: BinaryOp::And,
6525                right: Box::new(pred.clone()),
6526            };
6527        }
6528        Some(result)
6529    }
6530
6531    /// Collect all variable names referenced in an expression.
6532    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
6533        let mut vars = HashSet::new();
6534        Self::collect_expr_variables_impl(expr, &mut vars);
6535        vars
6536    }
6537
6538    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
6539        match expr {
6540            Expr::Variable(name) => {
6541                vars.insert(name.clone());
6542            }
6543            Expr::Property(inner, _) => {
6544                if let Expr::Variable(name) = inner.as_ref() {
6545                    vars.insert(name.clone());
6546                } else {
6547                    Self::collect_expr_variables_impl(inner, vars);
6548                }
6549            }
6550            Expr::BinaryOp { left, right, .. } => {
6551                Self::collect_expr_variables_impl(left, vars);
6552                Self::collect_expr_variables_impl(right, vars);
6553            }
6554            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
6555            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
6556            Expr::FunctionCall { args, .. } => {
6557                for arg in args {
6558                    Self::collect_expr_variables_impl(arg, vars);
6559                }
6560            }
6561            Expr::List(items) => {
6562                for item in items {
6563                    Self::collect_expr_variables_impl(item, vars);
6564                }
6565            }
6566            Expr::Case {
6567                expr,
6568                when_then,
6569                else_expr,
6570            } => {
6571                if let Some(e) = expr {
6572                    Self::collect_expr_variables_impl(e, vars);
6573                }
6574                for (w, t) in when_then {
6575                    Self::collect_expr_variables_impl(w, vars);
6576                    Self::collect_expr_variables_impl(t, vars);
6577                }
6578                if let Some(e) = else_expr {
6579                    Self::collect_expr_variables_impl(e, vars);
6580                }
6581            }
6582            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
6583            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
6584            // they introduce local variable bindings not in outer scope.
6585            _ => {}
6586        }
6587    }
6588
6589    /// Collect all variables produced by a logical plan.
6590    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
6591        let mut vars = HashSet::new();
6592        Self::collect_plan_variables_impl(plan, &mut vars);
6593        vars
6594    }
6595
6596    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
6597        match plan {
6598            LogicalPlan::Scan { variable, .. } => {
6599                vars.insert(variable.clone());
6600            }
6601            LogicalPlan::Traverse {
6602                target_variable,
6603                step_variable,
6604                input,
6605                path_variable,
6606                ..
6607            } => {
6608                vars.insert(target_variable.clone());
6609                if let Some(sv) = step_variable {
6610                    vars.insert(sv.clone());
6611                }
6612                if let Some(pv) = path_variable {
6613                    vars.insert(pv.clone());
6614                }
6615                Self::collect_plan_variables_impl(input, vars);
6616            }
6617            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
6618            LogicalPlan::Project { input, projections } => {
6619                for (expr, alias) in projections {
6620                    if let Some(a) = alias {
6621                        vars.insert(a.clone());
6622                    } else if let Expr::Variable(v) = expr {
6623                        vars.insert(v.clone());
6624                    }
6625                }
6626                Self::collect_plan_variables_impl(input, vars);
6627            }
6628            LogicalPlan::Apply {
6629                input, subquery, ..
6630            } => {
6631                Self::collect_plan_variables_impl(input, vars);
6632                Self::collect_plan_variables_impl(subquery, vars);
6633            }
6634            LogicalPlan::CrossJoin { left, right } => {
6635                Self::collect_plan_variables_impl(left, vars);
6636                Self::collect_plan_variables_impl(right, vars);
6637            }
6638            LogicalPlan::Unwind {
6639                input, variable, ..
6640            } => {
6641                vars.insert(variable.clone());
6642                Self::collect_plan_variables_impl(input, vars);
6643            }
6644            LogicalPlan::Aggregate { input, .. } => {
6645                Self::collect_plan_variables_impl(input, vars);
6646            }
6647            LogicalPlan::Distinct { input } => {
6648                Self::collect_plan_variables_impl(input, vars);
6649            }
6650            LogicalPlan::Sort { input, .. } => {
6651                Self::collect_plan_variables_impl(input, vars);
6652            }
6653            LogicalPlan::Limit { input, .. } => {
6654                Self::collect_plan_variables_impl(input, vars);
6655            }
6656            LogicalPlan::VectorKnn { variable, .. } => {
6657                vars.insert(variable.clone());
6658            }
6659            LogicalPlan::ProcedureCall { yield_items, .. } => {
6660                for (name, alias) in yield_items {
6661                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
6662                }
6663            }
6664            LogicalPlan::ShortestPath {
6665                input,
6666                path_variable,
6667                ..
6668            } => {
6669                vars.insert(path_variable.clone());
6670                Self::collect_plan_variables_impl(input, vars);
6671            }
6672            LogicalPlan::AllShortestPaths {
6673                input,
6674                path_variable,
6675                ..
6676            } => {
6677                vars.insert(path_variable.clone());
6678                Self::collect_plan_variables_impl(input, vars);
6679            }
6680            LogicalPlan::RecursiveCTE {
6681                initial, recursive, ..
6682            } => {
6683                Self::collect_plan_variables_impl(initial, vars);
6684                Self::collect_plan_variables_impl(recursive, vars);
6685            }
6686            LogicalPlan::SubqueryCall {
6687                input, subquery, ..
6688            } => {
6689                Self::collect_plan_variables_impl(input, vars);
6690                Self::collect_plan_variables_impl(subquery, vars);
6691            }
6692            _ => {}
6693        }
6694    }
6695
6696    /// Extract predicates that only reference variables from Apply's input.
6697    /// Returns (input_only_predicates, remaining_predicates).
6698    fn extract_apply_input_predicates(
6699        predicate: &Expr,
6700        input_variables: &HashSet<String>,
6701        subquery_new_variables: &HashSet<String>,
6702    ) -> (Vec<Expr>, Vec<Expr>) {
6703        let conjuncts = Self::split_and_conjuncts(predicate);
6704        let mut input_preds = Vec::new();
6705        let mut remaining = Vec::new();
6706
6707        for conj in conjuncts {
6708            let vars = Self::collect_expr_variables(&conj);
6709
6710            // Predicate only references input variables (none from subquery)
6711            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
6712            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
6713
6714            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
6715                input_preds.push(conj);
6716            } else {
6717                remaining.push(conj);
6718            }
6719        }
6720
6721        (input_preds, remaining)
6722    }
6723
6724    /// Push eligible predicates into Apply.input_filter.
6725    /// This filters input rows BEFORE executing the correlated subquery.
6726    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
6727        match plan {
6728            LogicalPlan::Apply {
6729                input,
6730                subquery,
6731                input_filter,
6732            } => {
6733                // Collect variables from input plan
6734                let input_vars = Self::collect_plan_variables(&input);
6735
6736                // Collect NEW variables introduced by subquery (not in input)
6737                let subquery_vars = Self::collect_plan_variables(&subquery);
6738                let new_subquery_vars: HashSet<String> =
6739                    subquery_vars.difference(&input_vars).cloned().collect();
6740
6741                // Extract predicates that only reference input variables
6742                let (input_preds, remaining) = Self::extract_apply_input_predicates(
6743                    current_predicate,
6744                    &input_vars,
6745                    &new_subquery_vars,
6746                );
6747
6748                // Update current_predicate to only remaining predicates
6749                *current_predicate = if remaining.is_empty() {
6750                    Expr::TRUE
6751                } else {
6752                    Self::combine_predicates(remaining).unwrap()
6753                };
6754
6755                // Combine extracted predicates with existing input_filter
6756                let new_input_filter = if input_preds.is_empty() {
6757                    input_filter
6758                } else {
6759                    let extracted = Self::combine_predicates(input_preds).unwrap();
6760                    match input_filter {
6761                        Some(existing) => Some(Expr::BinaryOp {
6762                            left: Box::new(existing),
6763                            op: BinaryOp::And,
6764                            right: Box::new(extracted),
6765                        }),
6766                        None => Some(extracted),
6767                    }
6768                };
6769
6770                // Recurse into input plan
6771                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
6772
6773                LogicalPlan::Apply {
6774                    input: Box::new(new_input),
6775                    subquery,
6776                    input_filter: new_input_filter,
6777                }
6778            }
6779            // Recurse into other plan nodes
6780            LogicalPlan::Filter {
6781                input,
6782                predicate,
6783                optional_variables,
6784            } => LogicalPlan::Filter {
6785                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6786                predicate,
6787                optional_variables,
6788            },
6789            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6790                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6791                projections,
6792            },
6793            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
6794                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6795                order_by,
6796            },
6797            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
6798                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6799                skip,
6800                fetch,
6801            },
6802            LogicalPlan::Aggregate {
6803                input,
6804                group_by,
6805                aggregates,
6806            } => LogicalPlan::Aggregate {
6807                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6808                group_by,
6809                aggregates,
6810            },
6811            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6812                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
6813                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
6814            },
6815            LogicalPlan::Traverse {
6816                input,
6817                edge_type_ids,
6818                direction,
6819                source_variable,
6820                target_variable,
6821                target_label_id,
6822                step_variable,
6823                min_hops,
6824                max_hops,
6825                optional,
6826                target_filter,
6827                path_variable,
6828                edge_properties,
6829                is_variable_length,
6830                optional_pattern_vars,
6831                scope_match_variables,
6832                edge_filter_expr,
6833                path_mode,
6834                qpp_steps,
6835            } => LogicalPlan::Traverse {
6836                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6837                edge_type_ids,
6838                direction,
6839                source_variable,
6840                target_variable,
6841                target_label_id,
6842                step_variable,
6843                min_hops,
6844                max_hops,
6845                optional,
6846                target_filter,
6847                path_variable,
6848                edge_properties,
6849                is_variable_length,
6850                optional_pattern_vars,
6851                scope_match_variables,
6852                edge_filter_expr,
6853                path_mode,
6854                qpp_steps,
6855            },
6856            other => other,
6857        }
6858    }
6859}
6860
6861/// Get the expected column name for an aggregate expression.
6862///
6863/// This is the single source of truth for aggregate column naming, used by:
6864/// - Logical planner (to create column references)
6865/// - Physical planner (to rename DataFusion's auto-generated column names)
6866/// - Fallback executor (to name result columns)
6867pub fn aggregate_column_name(expr: &Expr) -> String {
6868    expr.to_string_repr()
6869}
6870
6871/// Output produced by `EXPLAIN` — a human-readable plan with index and cost info.
6872#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6873pub struct ExplainOutput {
6874    /// Debug-formatted logical plan tree.
6875    pub plan_text: String,
6876    /// Index availability report for each scan in the plan.
6877    pub index_usage: Vec<IndexUsage>,
6878    /// Rough row and cost estimates for the full plan.
6879    pub cost_estimates: CostEstimates,
6880    /// Planner warnings (e.g., missing index, forced full scan).
6881    pub warnings: Vec<String>,
6882    /// Suggested indexes that would improve this query.
6883    pub suggestions: Vec<IndexSuggestion>,
6884}
6885
6886/// Suggestion for creating an index to improve query performance.
6887#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6888pub struct IndexSuggestion {
6889    /// Label or edge type that would benefit from the index.
6890    pub label_or_type: String,
6891    /// Property to index.
6892    pub property: String,
6893    /// Recommended index type (e.g., `"SCALAR"`, `"VECTOR"`).
6894    pub index_type: String,
6895    /// Human-readable explanation of the performance benefit.
6896    pub reason: String,
6897    /// Ready-to-execute Cypher statement to create the index.
6898    pub create_statement: String,
6899}
6900
6901/// Index availability report for a single scan operator.
6902#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6903pub struct IndexUsage {
6904    pub label_or_type: String,
6905    pub property: String,
6906    pub index_type: String,
6907    /// Whether the index was actually used for this scan.
6908    pub used: bool,
6909    /// Human-readable explanation of why the index was or was not used.
6910    pub reason: Option<String>,
6911}
6912
6913/// Rough cost and row count estimates for a complete logical plan.
6914#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6915pub struct CostEstimates {
6916    /// Estimated number of rows the plan will produce.
6917    pub estimated_rows: f64,
6918    /// Abstract cost units (lower is cheaper).
6919    pub estimated_cost: f64,
6920}
6921
6922impl QueryPlanner {
6923    /// Plan a query and produce an EXPLAIN report (plan text, index usage, costs).
6924    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
6925        let plan = self.plan(ast)?;
6926        self.explain_logical_plan(&plan)
6927    }
6928
6929    /// Produce an EXPLAIN report for an already-planned logical plan.
6930    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
6931        let index_usage = self.analyze_index_usage(plan)?;
6932        let cost_estimates = self.estimate_costs(plan)?;
6933        let suggestions = self.collect_index_suggestions(plan);
6934        let warnings = Vec::new();
6935        let plan_text = format!("{:#?}", plan);
6936
6937        Ok(ExplainOutput {
6938            plan_text,
6939            index_usage,
6940            cost_estimates,
6941            warnings,
6942            suggestions,
6943        })
6944    }
6945
6946    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
6947        let mut usage = Vec::new();
6948        self.collect_index_usage(plan, &mut usage);
6949        Ok(usage)
6950    }
6951
6952    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
6953        match plan {
6954            LogicalPlan::Scan { .. } => {
6955                // Placeholder: Scan might use index if it was optimized
6956                // Ideally LogicalPlan::Scan should store if it uses index.
6957                // But typically Planner converts Scan to specific index scan or we infer it here.
6958            }
6959            LogicalPlan::VectorKnn {
6960                label_id, property, ..
6961            } => {
6962                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
6963                usage.push(IndexUsage {
6964                    label_or_type: label_name.to_string(),
6965                    property: property.clone(),
6966                    index_type: "VECTOR".to_string(),
6967                    used: true,
6968                    reason: None,
6969                });
6970            }
6971            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
6972            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
6973            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
6974            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
6975            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
6976            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
6977            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
6978            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
6979                self.collect_index_usage(left, usage);
6980                self.collect_index_usage(right, usage);
6981            }
6982            _ => {}
6983        }
6984    }
6985
6986    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
6987        Ok(CostEstimates {
6988            estimated_rows: 100.0,
6989            estimated_cost: 10.0,
6990        })
6991    }
6992
6993    /// Collect index suggestions based on query patterns.
6994    ///
6995    /// Currently detects:
6996    /// - Temporal predicates from `uni.validAt()` function calls
6997    /// - Temporal predicates from `VALID_AT` macro expansion
6998    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
6999        let mut suggestions = Vec::new();
7000        self.collect_temporal_suggestions(plan, &mut suggestions);
7001        suggestions
7002    }
7003
7004    /// Recursively collect temporal index suggestions from the plan.
7005    fn collect_temporal_suggestions(
7006        &self,
7007        plan: &LogicalPlan,
7008        suggestions: &mut Vec<IndexSuggestion>,
7009    ) {
7010        match plan {
7011            LogicalPlan::Filter {
7012                input, predicate, ..
7013            } => {
7014                // Check for temporal patterns in the predicate
7015                self.detect_temporal_pattern(predicate, suggestions);
7016                // Recurse into input
7017                self.collect_temporal_suggestions(input, suggestions);
7018            }
7019            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
7020            LogicalPlan::Project { input, .. } => {
7021                self.collect_temporal_suggestions(input, suggestions)
7022            }
7023            LogicalPlan::Limit { input, .. } => {
7024                self.collect_temporal_suggestions(input, suggestions)
7025            }
7026            LogicalPlan::Sort { input, .. } => {
7027                self.collect_temporal_suggestions(input, suggestions)
7028            }
7029            LogicalPlan::Aggregate { input, .. } => {
7030                self.collect_temporal_suggestions(input, suggestions)
7031            }
7032            LogicalPlan::Traverse { input, .. } => {
7033                self.collect_temporal_suggestions(input, suggestions)
7034            }
7035            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
7036                self.collect_temporal_suggestions(left, suggestions);
7037                self.collect_temporal_suggestions(right, suggestions);
7038            }
7039            _ => {}
7040        }
7041    }
7042
7043    /// Detect temporal predicate patterns and suggest indexes.
7044    ///
7045    /// Detects two patterns:
7046    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
7047    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
7048    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
7049        match expr {
7050            // Pattern 1: uni.temporal.validAt() function call
7051            Expr::FunctionCall { name, args, .. }
7052                if name.eq_ignore_ascii_case("uni.temporal.validAt")
7053                    || name.eq_ignore_ascii_case("validAt") =>
7054            {
7055                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
7056                if args.len() >= 2 {
7057                    let start_prop =
7058                        if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1) {
7059                            s.clone()
7060                        } else {
7061                            "valid_from".to_string()
7062                        };
7063
7064                    // Try to extract label from the node expression
7065                    if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
7066                        self.suggest_temporal_index(&var, &start_prop, suggestions);
7067                    }
7068                }
7069            }
7070
7071            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
7072            Expr::BinaryOp {
7073                left,
7074                op: BinaryOp::And,
7075                right,
7076            } => {
7077                // Check left side for `prop <= time` pattern (temporal start condition)
7078                if let Expr::BinaryOp {
7079                    left: prop_expr,
7080                    op: BinaryOp::LtEq,
7081                    ..
7082                } = left.as_ref()
7083                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
7084                    && (prop_name == "valid_from"
7085                        || prop_name.contains("start")
7086                        || prop_name.contains("from")
7087                        || prop_name.contains("begin"))
7088                    && let Some(var) = base.extract_variable()
7089                {
7090                    self.suggest_temporal_index(&var, prop_name, suggestions);
7091                }
7092
7093                // Recurse into both sides of AND
7094                self.detect_temporal_pattern(left.as_ref(), suggestions);
7095                self.detect_temporal_pattern(right.as_ref(), suggestions);
7096            }
7097
7098            // Recurse into other binary ops
7099            Expr::BinaryOp { left, right, .. } => {
7100                self.detect_temporal_pattern(left.as_ref(), suggestions);
7101                self.detect_temporal_pattern(right.as_ref(), suggestions);
7102            }
7103
7104            _ => {}
7105        }
7106    }
7107
7108    /// Suggest a scalar index for a temporal property if one doesn't already exist.
7109    fn suggest_temporal_index(
7110        &self,
7111        _variable: &str,
7112        property: &str,
7113        suggestions: &mut Vec<IndexSuggestion>,
7114    ) {
7115        // Check if a scalar index already exists for this property
7116        // We need to check all labels since we may not know the exact label from the variable
7117        let mut has_index = false;
7118
7119        for index in &self.schema.indexes {
7120            if let IndexDefinition::Scalar(config) = index
7121                && config.properties.contains(&property.to_string())
7122            {
7123                has_index = true;
7124                break;
7125            }
7126        }
7127
7128        if !has_index {
7129            // Avoid duplicate suggestions
7130            let already_suggested = suggestions.iter().any(|s| s.property == property);
7131            if !already_suggested {
7132                suggestions.push(IndexSuggestion {
7133                    label_or_type: "(detected from temporal query)".to_string(),
7134                    property: property.to_string(),
7135                    index_type: "SCALAR (BTree)".to_string(),
7136                    reason: format!(
7137                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
7138                        property
7139                    ),
7140                    create_statement: format!(
7141                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
7142                        property, property
7143                    ),
7144                });
7145            }
7146        }
7147    }
7148
7149    /// Helper functions for expression normalization
7150    /// Normalize an expression for storage: strip variable prefixes
7151    /// For simple property: u.email -> "email"
7152    /// For expressions: lower(u.email) -> "lower(email)"
7153    fn normalize_expression_for_storage(expr: &Expr) -> String {
7154        match expr {
7155            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
7156            _ => {
7157                // Serialize expression and strip variable prefix
7158                let expr_str = expr.to_string_repr();
7159                Self::strip_variable_prefix(&expr_str)
7160            }
7161        }
7162    }
7163
7164    /// Strip variable references like "u.prop" from expression strings
7165    /// Converts "lower(u.email)" to "lower(email)"
7166    fn strip_variable_prefix(expr_str: &str) -> String {
7167        use regex::Regex;
7168        // Match patterns like "word.property" and replace with just "property"
7169        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
7170        re.replace_all(expr_str, "$1").to_string()
7171    }
7172
7173    /// Plan a schema command from the new AST
7174    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
7175        match cmd {
7176            SchemaCommand::CreateVectorIndex(c) => {
7177                // Parse index type from options (default: IvfPq)
7178                let index_type = if let Some(type_val) = c.options.get("type") {
7179                    match type_val.as_str() {
7180                        Some("hnsw") => VectorIndexType::Hnsw {
7181                            m: 16,
7182                            ef_construction: 200,
7183                            ef_search: 100,
7184                        },
7185                        Some("flat") => VectorIndexType::Flat,
7186                        _ => VectorIndexType::IvfPq {
7187                            num_partitions: 256,
7188                            num_sub_vectors: 16,
7189                            bits_per_subvector: 8,
7190                        },
7191                    }
7192                } else {
7193                    VectorIndexType::IvfPq {
7194                        num_partitions: 256,
7195                        num_sub_vectors: 16,
7196                        bits_per_subvector: 8,
7197                    }
7198                };
7199
7200                // Parse embedding config from options
7201                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
7202                    Self::parse_embedding_config(emb_val)?
7203                } else {
7204                    None
7205                };
7206
7207                let config = VectorIndexConfig {
7208                    name: c.name,
7209                    label: c.label,
7210                    property: c.property,
7211                    metric: DistanceMetric::Cosine,
7212                    index_type,
7213                    embedding_config,
7214                    metadata: Default::default(),
7215                };
7216                Ok(LogicalPlan::CreateVectorIndex {
7217                    config,
7218                    if_not_exists: c.if_not_exists,
7219                })
7220            }
7221            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
7222                config: FullTextIndexConfig {
7223                    name: cfg.name,
7224                    label: cfg.label,
7225                    properties: cfg.properties,
7226                    tokenizer: TokenizerConfig::Standard,
7227                    with_positions: true,
7228                    metadata: Default::default(),
7229                },
7230                if_not_exists: cfg.if_not_exists,
7231            }),
7232            SchemaCommand::CreateScalarIndex(cfg) => {
7233                // Convert expressions to storage strings (strip variable prefix)
7234                let properties: Vec<String> = cfg
7235                    .expressions
7236                    .iter()
7237                    .map(Self::normalize_expression_for_storage)
7238                    .collect();
7239
7240                Ok(LogicalPlan::CreateScalarIndex {
7241                    config: ScalarIndexConfig {
7242                        name: cfg.name,
7243                        label: cfg.label,
7244                        properties,
7245                        index_type: ScalarIndexType::BTree,
7246                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
7247                        metadata: Default::default(),
7248                    },
7249                    if_not_exists: cfg.if_not_exists,
7250                })
7251            }
7252            SchemaCommand::CreateJsonFtsIndex(cfg) => {
7253                let with_positions = cfg
7254                    .options
7255                    .get("with_positions")
7256                    .and_then(|v| v.as_bool())
7257                    .unwrap_or(false);
7258                Ok(LogicalPlan::CreateJsonFtsIndex {
7259                    config: JsonFtsIndexConfig {
7260                        name: cfg.name,
7261                        label: cfg.label,
7262                        column: cfg.column,
7263                        paths: Vec::new(),
7264                        with_positions,
7265                        metadata: Default::default(),
7266                    },
7267                    if_not_exists: cfg.if_not_exists,
7268                })
7269            }
7270            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
7271                name: drop.name,
7272                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
7273            }),
7274            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
7275            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
7276            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
7277            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
7278            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
7279            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
7280            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
7281            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
7282            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
7283            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
7284            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
7285            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
7286            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
7287            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
7288            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
7289            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
7290                destination: path,
7291                options: HashMap::new(),
7292            }),
7293            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
7294                label: cmd.label,
7295                path: cmd.path,
7296                format: cmd.format,
7297                options: cmd.options,
7298            }),
7299            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
7300                label: cmd.label,
7301                path: cmd.path,
7302                format: cmd.format,
7303                options: cmd.options,
7304            }),
7305        }
7306    }
7307
7308    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
7309        let obj = emb_val
7310            .as_object()
7311            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
7312
7313        // Parse alias (required)
7314        let alias = obj
7315            .get("alias")
7316            .and_then(|v| v.as_str())
7317            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
7318
7319        // Parse source properties (required)
7320        let source_properties = obj
7321            .get("source")
7322            .and_then(|v| v.as_array())
7323            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
7324            .iter()
7325            .filter_map(|v| v.as_str().map(|s| s.to_string()))
7326            .collect::<Vec<_>>();
7327
7328        if source_properties.is_empty() {
7329            return Err(anyhow!(
7330                "embedding.source must contain at least one property"
7331            ));
7332        }
7333
7334        let batch_size = obj
7335            .get("batch_size")
7336            .and_then(|v| v.as_u64())
7337            .map(|v| v as usize)
7338            .unwrap_or(32);
7339
7340        Ok(Some(EmbeddingConfig {
7341            alias: alias.to_string(),
7342            source_properties,
7343            batch_size,
7344        }))
7345    }
7346}
7347
7348/// Collect all properties referenced anywhere in the LogicalPlan tree.
7349///
7350/// This is critical for window functions: properties must be materialized
7351/// at the Scan node so they're available for window operations later.
7352///
7353/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
7354pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
7355    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
7356    collect_properties_recursive(plan, &mut properties);
7357    properties
7358}
7359
7360/// Recursively walk the LogicalPlan tree and collect all property references.
7361fn collect_properties_recursive(
7362    plan: &LogicalPlan,
7363    properties: &mut HashMap<String, HashSet<String>>,
7364) {
7365    match plan {
7366        LogicalPlan::Window {
7367            input,
7368            window_exprs,
7369        } => {
7370            // Collect from window expressions
7371            for expr in window_exprs {
7372                collect_properties_from_expr_into(expr, properties);
7373            }
7374            collect_properties_recursive(input, properties);
7375        }
7376        LogicalPlan::Project { input, projections } => {
7377            for (expr, _alias) in projections {
7378                collect_properties_from_expr_into(expr, properties);
7379            }
7380            collect_properties_recursive(input, properties);
7381        }
7382        LogicalPlan::Sort { input, order_by } => {
7383            for sort_item in order_by {
7384                collect_properties_from_expr_into(&sort_item.expr, properties);
7385            }
7386            collect_properties_recursive(input, properties);
7387        }
7388        LogicalPlan::Filter {
7389            input, predicate, ..
7390        } => {
7391            collect_properties_from_expr_into(predicate, properties);
7392            collect_properties_recursive(input, properties);
7393        }
7394        LogicalPlan::Aggregate {
7395            input,
7396            group_by,
7397            aggregates,
7398        } => {
7399            for expr in group_by {
7400                collect_properties_from_expr_into(expr, properties);
7401            }
7402            for expr in aggregates {
7403                collect_properties_from_expr_into(expr, properties);
7404            }
7405            collect_properties_recursive(input, properties);
7406        }
7407        LogicalPlan::Scan {
7408            filter: Some(expr), ..
7409        } => {
7410            collect_properties_from_expr_into(expr, properties);
7411        }
7412        LogicalPlan::Scan { filter: None, .. } => {}
7413        LogicalPlan::ExtIdLookup {
7414            filter: Some(expr), ..
7415        } => {
7416            collect_properties_from_expr_into(expr, properties);
7417        }
7418        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
7419        LogicalPlan::ScanAll {
7420            filter: Some(expr), ..
7421        } => {
7422            collect_properties_from_expr_into(expr, properties);
7423        }
7424        LogicalPlan::ScanAll { filter: None, .. } => {}
7425        LogicalPlan::ScanMainByLabels {
7426            filter: Some(expr), ..
7427        } => {
7428            collect_properties_from_expr_into(expr, properties);
7429        }
7430        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
7431        LogicalPlan::TraverseMainByType {
7432            input,
7433            target_filter,
7434            ..
7435        } => {
7436            if let Some(expr) = target_filter {
7437                collect_properties_from_expr_into(expr, properties);
7438            }
7439            collect_properties_recursive(input, properties);
7440        }
7441        LogicalPlan::Traverse {
7442            input,
7443            target_filter,
7444            step_variable: _,
7445            ..
7446        } => {
7447            if let Some(expr) = target_filter {
7448                collect_properties_from_expr_into(expr, properties);
7449            }
7450            // Note: Edge properties (step_variable) will be collected from expressions
7451            // that reference them. The edge_properties field in LogicalPlan is populated
7452            // later during physical planning based on this collected map.
7453            collect_properties_recursive(input, properties);
7454        }
7455        LogicalPlan::Unwind { input, expr, .. } => {
7456            collect_properties_from_expr_into(expr, properties);
7457            collect_properties_recursive(input, properties);
7458        }
7459        LogicalPlan::Create { input, pattern } => {
7460            // Mark variables referenced in CREATE patterns with "*" so plan_scan
7461            // adds structural projections (bare entity columns). Without this,
7462            // execute_create_pattern() can't find bound variables and creates
7463            // spurious new nodes instead of using existing MATCH'd ones.
7464            mark_pattern_variables(pattern, properties);
7465            collect_properties_recursive(input, properties);
7466        }
7467        LogicalPlan::CreateBatch { input, patterns } => {
7468            for pattern in patterns {
7469                mark_pattern_variables(pattern, properties);
7470            }
7471            collect_properties_recursive(input, properties);
7472        }
7473        LogicalPlan::Merge {
7474            input,
7475            pattern,
7476            on_match,
7477            on_create,
7478        } => {
7479            mark_pattern_variables(pattern, properties);
7480            if let Some(set_clause) = on_match {
7481                mark_set_item_variables(&set_clause.items, properties);
7482            }
7483            if let Some(set_clause) = on_create {
7484                mark_set_item_variables(&set_clause.items, properties);
7485            }
7486            collect_properties_recursive(input, properties);
7487        }
7488        LogicalPlan::Set { input, items } => {
7489            mark_set_item_variables(items, properties);
7490            collect_properties_recursive(input, properties);
7491        }
7492        LogicalPlan::Remove { input, items } => {
7493            for item in items {
7494                match item {
7495                    RemoveItem::Property(expr) => {
7496                        // REMOVE n.prop — collect the property and mark the variable
7497                        // with "*" so full structural projection is applied.
7498                        collect_properties_from_expr_into(expr, properties);
7499                        if let Expr::Property(base, _) = expr
7500                            && let Expr::Variable(var) = base.as_ref()
7501                        {
7502                            properties
7503                                .entry(var.clone())
7504                                .or_default()
7505                                .insert("*".to_string());
7506                        }
7507                    }
7508                    RemoveItem::Labels { variable, .. } => {
7509                        // REMOVE n:Label — mark n with "*"
7510                        properties
7511                            .entry(variable.clone())
7512                            .or_default()
7513                            .insert("*".to_string());
7514                    }
7515                }
7516            }
7517            collect_properties_recursive(input, properties);
7518        }
7519        LogicalPlan::Delete { input, items, .. } => {
7520            for expr in items {
7521                collect_properties_from_expr_into(expr, properties);
7522            }
7523            collect_properties_recursive(input, properties);
7524        }
7525        LogicalPlan::Foreach {
7526            input, list, body, ..
7527        } => {
7528            collect_properties_from_expr_into(list, properties);
7529            for plan in body {
7530                collect_properties_recursive(plan, properties);
7531            }
7532            collect_properties_recursive(input, properties);
7533        }
7534        LogicalPlan::Limit { input, .. } => {
7535            collect_properties_recursive(input, properties);
7536        }
7537        LogicalPlan::CrossJoin { left, right } => {
7538            collect_properties_recursive(left, properties);
7539            collect_properties_recursive(right, properties);
7540        }
7541        LogicalPlan::Apply {
7542            input,
7543            subquery,
7544            input_filter,
7545        } => {
7546            if let Some(expr) = input_filter {
7547                collect_properties_from_expr_into(expr, properties);
7548            }
7549            collect_properties_recursive(input, properties);
7550            collect_properties_recursive(subquery, properties);
7551        }
7552        LogicalPlan::Union { left, right, .. } => {
7553            collect_properties_recursive(left, properties);
7554            collect_properties_recursive(right, properties);
7555        }
7556        LogicalPlan::RecursiveCTE {
7557            initial, recursive, ..
7558        } => {
7559            collect_properties_recursive(initial, properties);
7560            collect_properties_recursive(recursive, properties);
7561        }
7562        LogicalPlan::ProcedureCall { arguments, .. } => {
7563            for arg in arguments {
7564                collect_properties_from_expr_into(arg, properties);
7565            }
7566        }
7567        LogicalPlan::VectorKnn { query, .. } => {
7568            collect_properties_from_expr_into(query, properties);
7569        }
7570        LogicalPlan::InvertedIndexLookup { terms, .. } => {
7571            collect_properties_from_expr_into(terms, properties);
7572        }
7573        LogicalPlan::ShortestPath { input, .. } => {
7574            collect_properties_recursive(input, properties);
7575        }
7576        LogicalPlan::AllShortestPaths { input, .. } => {
7577            collect_properties_recursive(input, properties);
7578        }
7579        LogicalPlan::Distinct { input } => {
7580            collect_properties_recursive(input, properties);
7581        }
7582        LogicalPlan::QuantifiedPattern {
7583            input,
7584            pattern_plan,
7585            ..
7586        } => {
7587            collect_properties_recursive(input, properties);
7588            collect_properties_recursive(pattern_plan, properties);
7589        }
7590        LogicalPlan::BindZeroLengthPath { input, .. } => {
7591            collect_properties_recursive(input, properties);
7592        }
7593        LogicalPlan::BindPath { input, .. } => {
7594            collect_properties_recursive(input, properties);
7595        }
7596        LogicalPlan::SubqueryCall { input, subquery } => {
7597            collect_properties_recursive(input, properties);
7598            collect_properties_recursive(subquery, properties);
7599        }
7600        LogicalPlan::LocyProject {
7601            input, projections, ..
7602        } => {
7603            for (expr, _alias) in projections {
7604                match expr {
7605                    // Bare variable in LocyProject: only need _vid for node variables
7606                    // (plan_locy_project extracts VID directly). Adding "*" would create
7607                    // a structural Struct column that conflicts with derived scan columns.
7608                    Expr::Variable(name) if !name.contains('.') => {
7609                        properties
7610                            .entry(name.clone())
7611                            .or_default()
7612                            .insert("_vid".to_string());
7613                    }
7614                    _ => collect_properties_from_expr_into(expr, properties),
7615                }
7616            }
7617            collect_properties_recursive(input, properties);
7618        }
7619        LogicalPlan::LocyFold {
7620            input,
7621            fold_bindings,
7622            ..
7623        } => {
7624            for (_name, expr) in fold_bindings {
7625                collect_properties_from_expr_into(expr, properties);
7626            }
7627            collect_properties_recursive(input, properties);
7628        }
7629        LogicalPlan::LocyBestBy {
7630            input, criteria, ..
7631        } => {
7632            for (expr, _asc) in criteria {
7633                collect_properties_from_expr_into(expr, properties);
7634            }
7635            collect_properties_recursive(input, properties);
7636        }
7637        LogicalPlan::LocyPriority { input, .. } => {
7638            collect_properties_recursive(input, properties);
7639        }
7640        // DDL and other plans don't reference properties
7641        _ => {}
7642    }
7643}
7644
7645/// Mark target variables from SET items with "*" and collect value expressions.
7646fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
7647    for item in items {
7648        match item {
7649            SetItem::Property { expr, value } => {
7650                // SET n.prop = val — mark n via the property expr, collect from value.
7651                // Also mark the variable with "*" for full structural projection so
7652                // edge identity fields (_src/_dst) are available for write operations.
7653                collect_properties_from_expr_into(expr, properties);
7654                collect_properties_from_expr_into(value, properties);
7655                if let Expr::Property(base, _) = expr
7656                    && let Expr::Variable(var) = base.as_ref()
7657                {
7658                    properties
7659                        .entry(var.clone())
7660                        .or_default()
7661                        .insert("*".to_string());
7662                }
7663            }
7664            SetItem::Labels { variable, .. } => {
7665                // SET n:Label — need full access to n
7666                properties
7667                    .entry(variable.clone())
7668                    .or_default()
7669                    .insert("*".to_string());
7670            }
7671            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
7672                // SET n = {props} or SET n += {props}
7673                properties
7674                    .entry(variable.clone())
7675                    .or_default()
7676                    .insert("*".to_string());
7677                collect_properties_from_expr_into(value, properties);
7678            }
7679        }
7680    }
7681}
7682
7683/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
7684/// adds structural projections (bare entity Struct columns) for them.
7685/// This is needed so that execute_create_pattern() can find bound variables
7686/// in the row HashMap and reuse existing nodes instead of creating new ones.
7687fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
7688    for path in &pattern.paths {
7689        if let Some(ref v) = path.variable {
7690            properties
7691                .entry(v.clone())
7692                .or_default()
7693                .insert("*".to_string());
7694        }
7695        for element in &path.elements {
7696            match element {
7697                PatternElement::Node(n) => {
7698                    if let Some(ref v) = n.variable {
7699                        properties
7700                            .entry(v.clone())
7701                            .or_default()
7702                            .insert("*".to_string());
7703                    }
7704                    // Also collect properties from inline property expressions
7705                    if let Some(ref props) = n.properties {
7706                        collect_properties_from_expr_into(props, properties);
7707                    }
7708                }
7709                PatternElement::Relationship(r) => {
7710                    if let Some(ref v) = r.variable {
7711                        properties
7712                            .entry(v.clone())
7713                            .or_default()
7714                            .insert("*".to_string());
7715                    }
7716                    if let Some(ref props) = r.properties {
7717                        collect_properties_from_expr_into(props, properties);
7718                    }
7719                }
7720                PatternElement::Parenthesized { pattern, .. } => {
7721                    let sub = Pattern {
7722                        paths: vec![pattern.as_ref().clone()],
7723                    };
7724                    mark_pattern_variables(&sub, properties);
7725                }
7726            }
7727        }
7728    }
7729}
7730
7731/// Collect properties from an expression into a HashMap.
7732fn collect_properties_from_expr_into(
7733    expr: &Expr,
7734    properties: &mut HashMap<String, HashSet<String>>,
7735) {
7736    match expr {
7737        Expr::PatternComprehension {
7738            where_clause,
7739            map_expr,
7740            ..
7741        } => {
7742            // Collect properties from the WHERE clause and map expression.
7743            // The pattern itself creates local bindings that don't need
7744            // property collection from the outer scope.
7745            if let Some(where_expr) = where_clause {
7746                collect_properties_from_expr_into(where_expr, properties);
7747            }
7748            collect_properties_from_expr_into(map_expr, properties);
7749        }
7750        Expr::Variable(name) => {
7751            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
7752            if let Some((var, prop)) = name.split_once('.') {
7753                properties
7754                    .entry(var.to_string())
7755                    .or_default()
7756                    .insert(prop.to_string());
7757            } else {
7758                // Bare variable (e.g., RETURN n) — needs all properties materialized
7759                properties
7760                    .entry(name.clone())
7761                    .or_default()
7762                    .insert("*".to_string());
7763            }
7764        }
7765        Expr::Property(base, name) => {
7766            // Extract variable name from the base expression
7767            if let Expr::Variable(var) = base.as_ref() {
7768                properties
7769                    .entry(var.clone())
7770                    .or_default()
7771                    .insert(name.clone());
7772                // Don't recurse into Variable — that would mark it as a bare
7773                // variable reference (adding "*") when it's just a property base.
7774            } else {
7775                // Recurse for complex base expressions (nested property, function call, etc.)
7776                collect_properties_from_expr_into(base, properties);
7777            }
7778        }
7779        Expr::BinaryOp { left, right, .. } => {
7780            collect_properties_from_expr_into(left, properties);
7781            collect_properties_from_expr_into(right, properties);
7782        }
7783        Expr::FunctionCall {
7784            name,
7785            args,
7786            window_spec,
7787            ..
7788        } => {
7789            // Analyze function for property requirements (pushdown hydration)
7790            analyze_function_property_requirements(name, args, properties);
7791
7792            // Collect from arguments
7793            for arg in args {
7794                collect_properties_from_expr_into(arg, properties);
7795            }
7796
7797            // Collect from window spec (PARTITION BY, ORDER BY)
7798            if let Some(spec) = window_spec {
7799                for part_expr in &spec.partition_by {
7800                    collect_properties_from_expr_into(part_expr, properties);
7801                }
7802                for sort_item in &spec.order_by {
7803                    collect_properties_from_expr_into(&sort_item.expr, properties);
7804                }
7805            }
7806        }
7807        Expr::UnaryOp { expr, .. } => {
7808            collect_properties_from_expr_into(expr, properties);
7809        }
7810        Expr::List(items) => {
7811            for item in items {
7812                collect_properties_from_expr_into(item, properties);
7813            }
7814        }
7815        Expr::Map(entries) => {
7816            for (_key, value) in entries {
7817                collect_properties_from_expr_into(value, properties);
7818            }
7819        }
7820        Expr::ListComprehension {
7821            list,
7822            where_clause,
7823            map_expr,
7824            ..
7825        } => {
7826            collect_properties_from_expr_into(list, properties);
7827            if let Some(where_expr) = where_clause {
7828                collect_properties_from_expr_into(where_expr, properties);
7829            }
7830            collect_properties_from_expr_into(map_expr, properties);
7831        }
7832        Expr::Case {
7833            expr,
7834            when_then,
7835            else_expr,
7836        } => {
7837            if let Some(scrutinee_expr) = expr {
7838                collect_properties_from_expr_into(scrutinee_expr, properties);
7839            }
7840            for (when, then) in when_then {
7841                collect_properties_from_expr_into(when, properties);
7842                collect_properties_from_expr_into(then, properties);
7843            }
7844            if let Some(default_expr) = else_expr {
7845                collect_properties_from_expr_into(default_expr, properties);
7846            }
7847        }
7848        Expr::Quantifier {
7849            list, predicate, ..
7850        } => {
7851            collect_properties_from_expr_into(list, properties);
7852            collect_properties_from_expr_into(predicate, properties);
7853        }
7854        Expr::Reduce {
7855            init, list, expr, ..
7856        } => {
7857            collect_properties_from_expr_into(init, properties);
7858            collect_properties_from_expr_into(list, properties);
7859            collect_properties_from_expr_into(expr, properties);
7860        }
7861        Expr::Exists { query, .. } => {
7862            // Walk into EXISTS body to collect property references for outer-scope variables.
7863            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
7864            // are included in the outer scan's property list. Extra properties collected for
7865            // inner-only variables are harmless — the outer scan ignores unknown variable names.
7866            collect_properties_from_subquery(query, properties);
7867        }
7868        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
7869            collect_properties_from_subquery(query, properties);
7870        }
7871        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
7872            collect_properties_from_expr_into(expr, properties);
7873        }
7874        Expr::In { expr, list } => {
7875            collect_properties_from_expr_into(expr, properties);
7876            collect_properties_from_expr_into(list, properties);
7877        }
7878        Expr::ArrayIndex { array, index } => {
7879            if let Expr::Variable(var) = array.as_ref() {
7880                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
7881                    // Static string key: e['name'] → only need that specific property
7882                    properties
7883                        .entry(var.clone())
7884                        .or_default()
7885                        .insert(prop_name.clone());
7886                } else {
7887                    // Dynamic property access: e[prop] → need all properties
7888                    properties
7889                        .entry(var.clone())
7890                        .or_default()
7891                        .insert("*".to_string());
7892                }
7893            }
7894            collect_properties_from_expr_into(array, properties);
7895            collect_properties_from_expr_into(index, properties);
7896        }
7897        Expr::ArraySlice { array, start, end } => {
7898            collect_properties_from_expr_into(array, properties);
7899            if let Some(start_expr) = start {
7900                collect_properties_from_expr_into(start_expr, properties);
7901            }
7902            if let Some(end_expr) = end {
7903                collect_properties_from_expr_into(end_expr, properties);
7904            }
7905        }
7906        Expr::ValidAt {
7907            entity,
7908            timestamp,
7909            start_prop,
7910            end_prop,
7911        } => {
7912            // Extract property requirements from ValidAt expression
7913            if let Expr::Variable(var) = entity.as_ref() {
7914                if let Some(prop) = start_prop {
7915                    properties
7916                        .entry(var.clone())
7917                        .or_default()
7918                        .insert(prop.clone());
7919                }
7920                if let Some(prop) = end_prop {
7921                    properties
7922                        .entry(var.clone())
7923                        .or_default()
7924                        .insert(prop.clone());
7925                }
7926            }
7927            collect_properties_from_expr_into(entity, properties);
7928            collect_properties_from_expr_into(timestamp, properties);
7929        }
7930        Expr::MapProjection { base, items } => {
7931            collect_properties_from_expr_into(base, properties);
7932            for item in items {
7933                match item {
7934                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
7935                        if let Expr::Variable(var) = base.as_ref() {
7936                            properties
7937                                .entry(var.clone())
7938                                .or_default()
7939                                .insert(prop.clone());
7940                        }
7941                    }
7942                    uni_cypher::ast::MapProjectionItem::AllProperties => {
7943                        if let Expr::Variable(var) = base.as_ref() {
7944                            properties
7945                                .entry(var.clone())
7946                                .or_default()
7947                                .insert("*".to_string());
7948                        }
7949                    }
7950                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
7951                        collect_properties_from_expr_into(expr, properties);
7952                    }
7953                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
7954                }
7955            }
7956        }
7957        Expr::LabelCheck { expr, .. } => {
7958            collect_properties_from_expr_into(expr, properties);
7959        }
7960        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
7961        // Mark them with "*" so the outer scan produces structural projections that
7962        // extract_row_params can resolve.
7963        Expr::Parameter(name) => {
7964            properties
7965                .entry(name.clone())
7966                .or_default()
7967                .insert("*".to_string());
7968        }
7969        // Literals and wildcard don't reference properties
7970        Expr::Literal(_) | Expr::Wildcard => {}
7971    }
7972}
7973
7974/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
7975///
7976/// This is needed so that correlated property accesses like `a.city` inside
7977/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
7978/// scan to include `a.city` in its projected columns.
7979fn collect_properties_from_subquery(
7980    query: &Query,
7981    properties: &mut HashMap<String, HashSet<String>>,
7982) {
7983    match query {
7984        Query::Single(stmt) => {
7985            for clause in &stmt.clauses {
7986                match clause {
7987                    Clause::Match(m) => {
7988                        if let Some(ref wc) = m.where_clause {
7989                            collect_properties_from_expr_into(wc, properties);
7990                        }
7991                    }
7992                    Clause::With(w) => {
7993                        for item in &w.items {
7994                            if let ReturnItem::Expr { expr, .. } = item {
7995                                collect_properties_from_expr_into(expr, properties);
7996                            }
7997                        }
7998                        if let Some(ref wc) = w.where_clause {
7999                            collect_properties_from_expr_into(wc, properties);
8000                        }
8001                    }
8002                    Clause::Return(r) => {
8003                        for item in &r.items {
8004                            if let ReturnItem::Expr { expr, .. } = item {
8005                                collect_properties_from_expr_into(expr, properties);
8006                            }
8007                        }
8008                    }
8009                    _ => {}
8010                }
8011            }
8012        }
8013        Query::Union { left, right, .. } => {
8014            collect_properties_from_subquery(left, properties);
8015            collect_properties_from_subquery(right, properties);
8016        }
8017        _ => {}
8018    }
8019}
8020
8021/// Analyze function calls to extract property requirements for pushdown hydration
8022///
8023/// This function examines function calls and their arguments to determine which properties
8024/// need to be loaded for entity arguments. For example:
8025/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
8026/// - keys(n) -> n needs all properties (*)
8027///
8028/// The extracted requirements are added to the properties map for later use during
8029/// scan planning.
8030fn analyze_function_property_requirements(
8031    name: &str,
8032    args: &[Expr],
8033    properties: &mut HashMap<String, HashSet<String>>,
8034) {
8035    use crate::query::function_props::get_function_spec;
8036
8037    /// Helper to mark a variable as needing all properties.
8038    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
8039        properties
8040            .entry(var.to_string())
8041            .or_default()
8042            .insert("*".to_string());
8043    }
8044
8045    let Some(spec) = get_function_spec(name) else {
8046        // Unknown function: conservatively require all properties for variable args
8047        for arg in args {
8048            if let Expr::Variable(var) = arg {
8049                mark_wildcard(var, properties);
8050            }
8051        }
8052        return;
8053    };
8054
8055    // Extract property names from string literal arguments
8056    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
8057        let entity_arg = args.get(entity_arg_idx);
8058        let prop_arg = args.get(prop_arg_idx);
8059
8060        match (entity_arg, prop_arg) {
8061            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
8062                properties
8063                    .entry(var.clone())
8064                    .or_default()
8065                    .insert(prop.clone());
8066            }
8067            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
8068                // Parameter property name: need all properties
8069                mark_wildcard(var, properties);
8070            }
8071            _ => {}
8072        }
8073    }
8074
8075    // Handle full entity requirement (keys(), properties())
8076    if spec.needs_full_entity {
8077        for &idx in spec.entity_args {
8078            if let Some(Expr::Variable(var)) = args.get(idx) {
8079                mark_wildcard(var, properties);
8080            }
8081        }
8082    }
8083}
8084
8085#[cfg(test)]
8086mod pushdown_tests {
8087    use super::*;
8088
8089    #[test]
8090    fn test_validat_extracts_property_names() {
8091        // validAt(e, 'start', 'end', ts) → e: {start, end}
8092        let mut properties = HashMap::new();
8093
8094        let args = vec![
8095            Expr::Variable("e".to_string()),
8096            Expr::Literal(CypherLiteral::String("start".to_string())),
8097            Expr::Literal(CypherLiteral::String("end".to_string())),
8098            Expr::Variable("ts".to_string()),
8099        ];
8100
8101        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
8102
8103        assert!(properties.contains_key("e"));
8104        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
8105            .iter()
8106            .cloned()
8107            .collect();
8108        assert_eq!(properties.get("e").unwrap(), &e_props);
8109    }
8110
8111    #[test]
8112    fn test_keys_requires_wildcard() {
8113        // keys(n) → n: {*}
8114        let mut properties = HashMap::new();
8115
8116        let args = vec![Expr::Variable("n".to_string())];
8117
8118        analyze_function_property_requirements("keys", &args, &mut properties);
8119
8120        assert!(properties.contains_key("n"));
8121        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8122        assert_eq!(properties.get("n").unwrap(), &n_props);
8123    }
8124
8125    #[test]
8126    fn test_properties_requires_wildcard() {
8127        // properties(n) → n: {*}
8128        let mut properties = HashMap::new();
8129
8130        let args = vec![Expr::Variable("n".to_string())];
8131
8132        analyze_function_property_requirements("properties", &args, &mut properties);
8133
8134        assert!(properties.contains_key("n"));
8135        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8136        assert_eq!(properties.get("n").unwrap(), &n_props);
8137    }
8138
8139    #[test]
8140    fn test_unknown_function_conservative() {
8141        // customUdf(e) → e: {*}
8142        let mut properties = HashMap::new();
8143
8144        let args = vec![Expr::Variable("e".to_string())];
8145
8146        analyze_function_property_requirements("customUdf", &args, &mut properties);
8147
8148        assert!(properties.contains_key("e"));
8149        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8150        assert_eq!(properties.get("e").unwrap(), &e_props);
8151    }
8152
8153    #[test]
8154    fn test_parameter_property_name() {
8155        // validAt(e, $start, $end, ts) → e: {*}
8156        let mut properties = HashMap::new();
8157
8158        let args = vec![
8159            Expr::Variable("e".to_string()),
8160            Expr::Parameter("start".to_string()),
8161            Expr::Parameter("end".to_string()),
8162            Expr::Variable("ts".to_string()),
8163        ];
8164
8165        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
8166
8167        assert!(properties.contains_key("e"));
8168        assert!(properties.get("e").unwrap().contains("*"));
8169    }
8170
8171    #[test]
8172    fn test_validat_expr_extracts_properties() {
8173        // Test Expr::ValidAt variant property extraction
8174        let mut properties = HashMap::new();
8175
8176        let validat_expr = Expr::ValidAt {
8177            entity: Box::new(Expr::Variable("e".to_string())),
8178            timestamp: Box::new(Expr::Variable("ts".to_string())),
8179            start_prop: Some("valid_from".to_string()),
8180            end_prop: Some("valid_to".to_string()),
8181        };
8182
8183        collect_properties_from_expr_into(&validat_expr, &mut properties);
8184
8185        assert!(properties.contains_key("e"));
8186        assert!(properties.get("e").unwrap().contains("valid_from"));
8187        assert!(properties.get("e").unwrap().contains("valid_to"));
8188    }
8189
8190    #[test]
8191    fn test_array_index_requires_wildcard() {
8192        // e[prop] → e: {*}
8193        let mut properties = HashMap::new();
8194
8195        let array_index_expr = Expr::ArrayIndex {
8196            array: Box::new(Expr::Variable("e".to_string())),
8197            index: Box::new(Expr::Variable("prop".to_string())),
8198        };
8199
8200        collect_properties_from_expr_into(&array_index_expr, &mut properties);
8201
8202        assert!(properties.contains_key("e"));
8203        assert!(properties.get("e").unwrap().contains("*"));
8204    }
8205
8206    #[test]
8207    fn test_property_access_extraction() {
8208        // e.name → e: {name}
8209        let mut properties = HashMap::new();
8210
8211        let prop_access = Expr::Property(
8212            Box::new(Expr::Variable("e".to_string())),
8213            "name".to_string(),
8214        );
8215
8216        collect_properties_from_expr_into(&prop_access, &mut properties);
8217
8218        assert!(properties.contains_key("e"));
8219        assert!(properties.get("e").unwrap().contains("name"));
8220    }
8221}