Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::PredicateAnalyzer;
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig,
14    ScalarIndexConfig, ScalarIndexType, Schema, TokenizerConfig, VectorIndexConfig,
15    VectorIndexType,
16};
17use uni_cypher::ast::{
18    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
19    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
20    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
21    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
22    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
23    WithRecursiveClause,
24};
25
26/// Type of variable in scope for semantic validation.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum VariableType {
29    /// Node variable (from MATCH (n), CREATE (n), etc.)
30    Node,
31    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
32    Edge,
33    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
34    Path,
35    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
36    /// Could hold a map or dynamic value — property access is allowed.
37    Scalar,
38    /// Scalar from a known non-graph literal (int, float, bool, string, list).
39    /// Property access is NOT allowed on these at compile time.
40    ScalarLiteral,
41    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
42    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
43    Imported,
44}
45
46impl VariableType {
47    /// Returns true if this type is compatible with the expected type.
48    ///
49    /// `Imported` is always compatible because the actual type is unknown at plan time.
50    fn is_compatible_with(self, expected: VariableType) -> bool {
51        self == expected
52            || self == VariableType::Imported
53            // ScalarLiteral behaves like Scalar for compatibility checks
54            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
55    }
56}
57
58/// Information about a variable in scope during planning.
59#[derive(Debug, Clone)]
60pub struct VariableInfo {
61    /// Variable name as written in the query.
62    pub name: String,
63    /// Semantic type of the variable.
64    pub var_type: VariableType,
65    /// True if this is a variable-length path (VLP) step variable.
66    ///
67    /// VLP step variables are typed as Edge but semantically hold edge lists.
68    pub is_vlp: bool,
69}
70
71impl VariableInfo {
72    pub fn new(name: String, var_type: VariableType) -> Self {
73        Self {
74            name,
75            var_type,
76            is_vlp: false,
77        }
78    }
79}
80
81/// Find a variable in scope by name.
82fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
83    vars.iter().find(|v| v.name == name)
84}
85
86/// Check if a variable is in scope.
87fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
88    find_var_in_scope(vars, name).is_some()
89}
90
91/// Check if an expression contains a pattern predicate.
92fn contains_pattern_predicate(expr: &Expr) -> bool {
93    if matches!(
94        expr,
95        Expr::Exists {
96            from_pattern_predicate: true,
97            ..
98        }
99    ) {
100        return true;
101    }
102    let mut found = false;
103    expr.for_each_child(&mut |child| {
104        if !found {
105            found = contains_pattern_predicate(child);
106        }
107    });
108    found
109}
110
111/// Add a variable to scope with type conflict validation.
112/// Returns an error if the variable already exists with a different type.
113fn add_var_to_scope(
114    vars: &mut Vec<VariableInfo>,
115    name: &str,
116    var_type: VariableType,
117) -> Result<()> {
118    if name.is_empty() {
119        return Ok(());
120    }
121
122    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
123        if existing.var_type == VariableType::Imported {
124            // Imported vars upgrade to the concrete type
125            existing.var_type = var_type;
126        } else if var_type == VariableType::Imported || existing.var_type == var_type {
127            // New type is Imported (keep existing) or same type — no conflict
128        } else if matches!(
129            existing.var_type,
130            VariableType::Scalar | VariableType::ScalarLiteral
131        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
132        {
133            // Scalar can be used as Node/Edge in CREATE context — a scalar
134            // holding a node/edge reference is valid for pattern use
135            existing.var_type = var_type;
136        } else {
137            return Err(anyhow!(
138                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
139                name,
140                existing.var_type,
141                var_type
142            ));
143        }
144    } else {
145        vars.push(VariableInfo::new(name.to_string(), var_type));
146    }
147    Ok(())
148}
149
150/// Convert VariableInfo vec to String vec for backward compatibility
151fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
152    vars.iter().map(|v| v.name.clone()).collect()
153}
154
155fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
156    match expr {
157        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
158            .map(|info| info.var_type)
159            .unwrap_or(VariableType::Scalar),
160        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
161        // Known non-graph literals: property access is NOT valid on these.
162        Expr::Literal(CypherLiteral::Integer(_))
163        | Expr::Literal(CypherLiteral::Float(_))
164        | Expr::Literal(CypherLiteral::String(_))
165        | Expr::Literal(CypherLiteral::Bool(_))
166        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
167        Expr::FunctionCall { name, args, .. } => {
168            let lower = name.to_lowercase();
169            if lower == "coalesce" {
170                infer_coalesce_type(args, vars_in_scope)
171            } else if lower == "collect" && !args.is_empty() {
172                let collected = infer_with_output_type(&args[0], vars_in_scope);
173                if matches!(
174                    collected,
175                    VariableType::Node
176                        | VariableType::Edge
177                        | VariableType::Path
178                        | VariableType::Imported
179                ) {
180                    collected
181                } else {
182                    VariableType::Scalar
183                }
184            } else {
185                VariableType::Scalar
186            }
187        }
188        // WITH list literals/expressions produce scalar list values. Preserving
189        // entity typing here causes invalid node/edge reuse in later MATCH clauses
190        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
191        // Lists are ScalarLiteral since property access is not valid on them.
192        Expr::List(_) => VariableType::ScalarLiteral,
193        _ => VariableType::Scalar,
194    }
195}
196
197fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
198    let mut resolved: Option<VariableType> = None;
199    let mut saw_imported = false;
200    for arg in args {
201        let t = infer_with_output_type(arg, vars_in_scope);
202        match t {
203            VariableType::Node | VariableType::Edge | VariableType::Path => {
204                if let Some(existing) = resolved {
205                    if existing != t {
206                        return VariableType::Scalar;
207                    }
208                } else {
209                    resolved = Some(t);
210                }
211            }
212            VariableType::Imported => saw_imported = true,
213            VariableType::Scalar | VariableType::ScalarLiteral => {}
214        }
215    }
216    if let Some(t) = resolved {
217        t
218    } else if saw_imported {
219        VariableType::Imported
220    } else {
221        VariableType::Scalar
222    }
223}
224
225fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
226    match expr {
227        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
228            .map(|info| info.var_type)
229            .unwrap_or(VariableType::Scalar),
230        Expr::FunctionCall { name, args, .. }
231            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
232        {
233            infer_with_output_type(&args[0], vars_in_scope)
234        }
235        Expr::List(items) => {
236            let mut inferred: Option<VariableType> = None;
237            for item in items {
238                let t = infer_with_output_type(item, vars_in_scope);
239                if !matches!(
240                    t,
241                    VariableType::Node
242                        | VariableType::Edge
243                        | VariableType::Path
244                        | VariableType::Imported
245                ) {
246                    return VariableType::Scalar;
247                }
248                if let Some(existing) = inferred {
249                    if existing != t
250                        && t != VariableType::Imported
251                        && existing != VariableType::Imported
252                    {
253                        return VariableType::Scalar;
254                    }
255                    if existing == VariableType::Imported && t != VariableType::Imported {
256                        inferred = Some(t);
257                    }
258                } else {
259                    inferred = Some(t);
260                }
261            }
262            inferred.unwrap_or(VariableType::Scalar)
263        }
264        _ => VariableType::Scalar,
265    }
266}
267
268/// Collect all variable names referenced in an expression
269fn collect_expr_variables(expr: &Expr) -> Vec<String> {
270    let mut vars = Vec::new();
271    collect_expr_variables_inner(expr, &mut vars);
272    vars
273}
274
275fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
276    let mut add_var = |name: &String| {
277        if !vars.contains(name) {
278            vars.push(name.clone());
279        }
280    };
281
282    match expr {
283        Expr::Variable(name) => add_var(name),
284        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
285        Expr::BinaryOp { left, right, .. } => {
286            collect_expr_variables_inner(left, vars);
287            collect_expr_variables_inner(right, vars);
288        }
289        Expr::UnaryOp { expr: e, .. }
290        | Expr::IsNull(e)
291        | Expr::IsNotNull(e)
292        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
293        Expr::FunctionCall { args, .. } => {
294            for a in args {
295                collect_expr_variables_inner(a, vars);
296            }
297        }
298        Expr::List(items) => {
299            for item in items {
300                collect_expr_variables_inner(item, vars);
301            }
302        }
303        Expr::In { expr: e, list } => {
304            collect_expr_variables_inner(e, vars);
305            collect_expr_variables_inner(list, vars);
306        }
307        Expr::Case {
308            expr: case_expr,
309            when_then,
310            else_expr,
311        } => {
312            if let Some(e) = case_expr {
313                collect_expr_variables_inner(e, vars);
314            }
315            for (w, t) in when_then {
316                collect_expr_variables_inner(w, vars);
317                collect_expr_variables_inner(t, vars);
318            }
319            if let Some(e) = else_expr {
320                collect_expr_variables_inner(e, vars);
321            }
322        }
323        Expr::Map(entries) => {
324            for (_, v) in entries {
325                collect_expr_variables_inner(v, vars);
326            }
327        }
328        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
329        Expr::ArrayIndex { array, index } => {
330            collect_expr_variables_inner(array, vars);
331            collect_expr_variables_inner(index, vars);
332        }
333        Expr::ArraySlice { array, start, end } => {
334            collect_expr_variables_inner(array, vars);
335            if let Some(s) = start {
336                collect_expr_variables_inner(s, vars);
337            }
338            if let Some(e) = end {
339                collect_expr_variables_inner(e, vars);
340            }
341        }
342        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
343        // they introduce local variable bindings not in outer scope.
344        _ => {}
345    }
346}
347
348/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
349///
350/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
351/// before the final RETURN projection without losing alias semantics.
352fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
353    let repr = expr.to_string_repr();
354    if let Some(rewritten) = aliases.get(&repr) {
355        return rewritten.clone();
356    }
357
358    match expr {
359        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
360        Expr::Property(base, prop) => Expr::Property(
361            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
362            prop.clone(),
363        ),
364        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
365            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
366            op: *op,
367            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
368        },
369        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
370            op: *op,
371            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
372        },
373        Expr::FunctionCall {
374            name,
375            args,
376            distinct,
377            window_spec,
378        } => Expr::FunctionCall {
379            name: name.clone(),
380            args: args
381                .iter()
382                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
383                .collect(),
384            distinct: *distinct,
385            window_spec: window_spec.clone(),
386        },
387        Expr::List(items) => Expr::List(
388            items
389                .iter()
390                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
391                .collect(),
392        ),
393        Expr::Map(entries) => Expr::Map(
394            entries
395                .iter()
396                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
397                .collect(),
398        ),
399        Expr::Case {
400            expr: case_expr,
401            when_then,
402            else_expr,
403        } => Expr::Case {
404            expr: case_expr
405                .as_ref()
406                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
407            when_then: when_then
408                .iter()
409                .map(|(w, t)| {
410                    (
411                        rewrite_order_by_expr_with_aliases(w, aliases),
412                        rewrite_order_by_expr_with_aliases(t, aliases),
413                    )
414                })
415                .collect(),
416            else_expr: else_expr
417                .as_ref()
418                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
419        },
420        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
421        // they introduce local variable bindings that could shadow aliases.
422        _ => expr.clone(),
423    }
424}
425
426/// Validate function call argument types.
427/// Returns error if type constraints are violated.
428fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
429    let name_lower = name.to_lowercase();
430
431    // labels() requires Node
432    if name_lower == "labels"
433        && let Some(Expr::Variable(var_name)) = args.first()
434        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
435        && !info.var_type.is_compatible_with(VariableType::Node)
436    {
437        return Err(anyhow!(
438            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
439        ));
440    }
441
442    // type() requires Edge
443    if name_lower == "type"
444        && let Some(Expr::Variable(var_name)) = args.first()
445        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
446        && !info.var_type.is_compatible_with(VariableType::Edge)
447    {
448        return Err(anyhow!(
449            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
450        ));
451    }
452
453    // properties() requires Node/Edge/Map (not scalar literals)
454    if name_lower == "properties"
455        && let Some(arg) = args.first()
456    {
457        match arg {
458            Expr::Literal(CypherLiteral::Integer(_))
459            | Expr::Literal(CypherLiteral::Float(_))
460            | Expr::Literal(CypherLiteral::String(_))
461            | Expr::Literal(CypherLiteral::Bool(_))
462            | Expr::List(_) => {
463                return Err(anyhow!(
464                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
465                ));
466            }
467            Expr::Variable(var_name) => {
468                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
469                    && matches!(
470                        info.var_type,
471                        VariableType::Scalar | VariableType::ScalarLiteral
472                    )
473                {
474                    return Err(anyhow!(
475                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
476                    ));
477                }
478            }
479            _ => {}
480        }
481    }
482
483    // nodes()/relationships() require Path
484    if (name_lower == "nodes" || name_lower == "relationships")
485        && let Some(Expr::Variable(var_name)) = args.first()
486        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
487        && !info.var_type.is_compatible_with(VariableType::Path)
488    {
489        return Err(anyhow!(
490            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
491            name_lower
492        ));
493    }
494
495    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
496    if name_lower == "size"
497        && let Some(Expr::Variable(var_name)) = args.first()
498        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
499        && info.var_type == VariableType::Path
500    {
501        return Err(anyhow!(
502            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
503        ));
504    }
505
506    // length()/size() do NOT accept Node or single-Edge arguments.
507    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
508    // but are actually edge lists — size()/length() is valid on those.
509    if (name_lower == "length" || name_lower == "size")
510        && let Some(Expr::Variable(var_name)) = args.first()
511        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
512        && (info.var_type == VariableType::Node
513            || (info.var_type == VariableType::Edge && !info.is_vlp))
514    {
515        return Err(anyhow!(
516            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
517            name_lower
518        ));
519    }
520
521    Ok(())
522}
523
524/// Check if an expression is a non-boolean literal.
525fn is_non_boolean_literal(expr: &Expr) -> bool {
526    matches!(
527        expr,
528        Expr::Literal(CypherLiteral::Integer(_))
529            | Expr::Literal(CypherLiteral::Float(_))
530            | Expr::Literal(CypherLiteral::String(_))
531            | Expr::List(_)
532            | Expr::Map(_)
533    )
534}
535
536/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
537fn validate_boolean_expression(expr: &Expr) -> Result<()> {
538    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
539    if let Expr::BinaryOp { left, op, right } = expr
540        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
541    {
542        let op_name = format!("{op:?}").to_uppercase();
543        for operand in [left.as_ref(), right.as_ref()] {
544            if is_non_boolean_literal(operand) {
545                return Err(anyhow!(
546                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
547                    op_name
548                ));
549            }
550        }
551    }
552    if let Expr::UnaryOp {
553        op: uni_cypher::ast::UnaryOp::Not,
554        expr: inner,
555    } = expr
556        && is_non_boolean_literal(inner)
557    {
558        return Err(anyhow!(
559            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
560        ));
561    }
562    let mut result = Ok(());
563    expr.for_each_child(&mut |child| {
564        if result.is_ok() {
565            result = validate_boolean_expression(child);
566        }
567    });
568    result
569}
570
571/// Validate that all variables used in an expression are in scope.
572fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
573    let used_vars = collect_expr_variables(expr);
574    for var in used_vars {
575        if !is_var_in_scope(vars_in_scope, &var) {
576            return Err(anyhow!(
577                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
578                var
579            ));
580        }
581    }
582    Ok(())
583}
584
585/// Check if a function name (lowercase) is an aggregate function.
586fn is_aggregate_function_name(name: &str) -> bool {
587    matches!(
588        name.to_lowercase().as_str(),
589        "count"
590            | "sum"
591            | "avg"
592            | "min"
593            | "max"
594            | "collect"
595            | "stdev"
596            | "stdevp"
597            | "percentiledisc"
598            | "percentilecont"
599    )
600}
601
602/// Returns true if the expression is a window function (FunctionCall with window_spec).
603fn is_window_function(expr: &Expr) -> bool {
604    matches!(
605        expr,
606        Expr::FunctionCall {
607            window_spec: Some(_),
608            ..
609        }
610    )
611}
612
613/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
614/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
615/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
616/// size() call, BinaryOp, etc.).
617fn is_compound_aggregate(expr: &Expr) -> bool {
618    if !expr.is_aggregate() {
619        return false;
620    }
621    match expr {
622        Expr::FunctionCall {
623            name, window_spec, ..
624        } => {
625            // A bare aggregate FunctionCall is NOT compound
626            if window_spec.is_some() {
627                return true; // window wrapping an aggregate — treat as compound
628            }
629            !is_aggregate_function_name(name)
630        }
631        // Subquery aggregates are "bare" (not compound)
632        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
633        // Everything else (ListComprehension, BinaryOp, etc.) is compound
634        _ => true,
635    }
636}
637
638/// Recursively collect all bare aggregate FunctionCall sub-expressions from
639/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
640/// the outermost aggregate boundaries).
641///
642/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
643/// is searched because the body (`map_expr`, `predicate`, `expr`) references
644/// the loop variable, not outer-scope aggregates.
645fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
646    let mut out = Vec::new();
647    extract_inner_aggregates_rec(expr, &mut out);
648    out
649}
650
651fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
652    match expr {
653        Expr::FunctionCall {
654            name, window_spec, ..
655        } if window_spec.is_none() && is_aggregate_function_name(name) => {
656            // Found a bare aggregate — collect it and stop recursing
657            out.push(expr.clone());
658        }
659        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
660            out.push(expr.clone());
661        }
662        // For list comprehension, only search the `list` source for aggregates
663        Expr::ListComprehension { list, .. } => {
664            extract_inner_aggregates_rec(list, out);
665        }
666        // For quantifier, only search the `list` source
667        Expr::Quantifier { list, .. } => {
668            extract_inner_aggregates_rec(list, out);
669        }
670        // For reduce, search `init` and `list` (not the body `expr`)
671        Expr::Reduce { init, list, .. } => {
672            extract_inner_aggregates_rec(init, out);
673            extract_inner_aggregates_rec(list, out);
674        }
675        // Standard recursive cases
676        Expr::FunctionCall { args, .. } => {
677            for arg in args {
678                extract_inner_aggregates_rec(arg, out);
679            }
680        }
681        Expr::BinaryOp { left, right, .. } => {
682            extract_inner_aggregates_rec(left, out);
683            extract_inner_aggregates_rec(right, out);
684        }
685        Expr::UnaryOp { expr: e, .. }
686        | Expr::IsNull(e)
687        | Expr::IsNotNull(e)
688        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
689        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
690        Expr::List(items) => {
691            for item in items {
692                extract_inner_aggregates_rec(item, out);
693            }
694        }
695        Expr::Case {
696            expr: case_expr,
697            when_then,
698            else_expr,
699        } => {
700            if let Some(e) = case_expr {
701                extract_inner_aggregates_rec(e, out);
702            }
703            for (w, t) in when_then {
704                extract_inner_aggregates_rec(w, out);
705                extract_inner_aggregates_rec(t, out);
706            }
707            if let Some(e) = else_expr {
708                extract_inner_aggregates_rec(e, out);
709            }
710        }
711        Expr::In {
712            expr: in_expr,
713            list,
714        } => {
715            extract_inner_aggregates_rec(in_expr, out);
716            extract_inner_aggregates_rec(list, out);
717        }
718        Expr::ArrayIndex { array, index } => {
719            extract_inner_aggregates_rec(array, out);
720            extract_inner_aggregates_rec(index, out);
721        }
722        Expr::ArraySlice { array, start, end } => {
723            extract_inner_aggregates_rec(array, out);
724            if let Some(s) = start {
725                extract_inner_aggregates_rec(s, out);
726            }
727            if let Some(e) = end {
728                extract_inner_aggregates_rec(e, out);
729            }
730        }
731        Expr::Map(entries) => {
732            for (_, v) in entries {
733                extract_inner_aggregates_rec(v, out);
734            }
735        }
736        _ => {}
737    }
738}
739
740/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
741/// `Expr::Variable(aggregate_column_name(agg))`.
742///
743/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
744/// rewritten (the body references the loop variable, not outer-scope columns).
745fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
746    match expr {
747        Expr::FunctionCall {
748            name, window_spec, ..
749        } if window_spec.is_none() && is_aggregate_function_name(name) => {
750            // Replace bare aggregate with column reference
751            Expr::Variable(aggregate_column_name(expr))
752        }
753        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
754            Expr::Variable(aggregate_column_name(expr))
755        }
756        Expr::ListComprehension {
757            variable,
758            list,
759            where_clause,
760            map_expr,
761        } => Expr::ListComprehension {
762            variable: variable.clone(),
763            list: Box::new(replace_aggregates_with_columns(list)),
764            where_clause: where_clause.clone(), // don't touch — references loop var
765            map_expr: map_expr.clone(),         // don't touch — references loop var
766        },
767        Expr::Quantifier {
768            quantifier,
769            variable,
770            list,
771            predicate,
772        } => Expr::Quantifier {
773            quantifier: *quantifier,
774            variable: variable.clone(),
775            list: Box::new(replace_aggregates_with_columns(list)),
776            predicate: predicate.clone(), // don't touch — references loop var
777        },
778        Expr::Reduce {
779            accumulator,
780            init,
781            variable,
782            list,
783            expr: body,
784        } => Expr::Reduce {
785            accumulator: accumulator.clone(),
786            init: Box::new(replace_aggregates_with_columns(init)),
787            variable: variable.clone(),
788            list: Box::new(replace_aggregates_with_columns(list)),
789            expr: body.clone(), // don't touch — references loop var
790        },
791        Expr::FunctionCall {
792            name,
793            args,
794            distinct,
795            window_spec,
796        } => Expr::FunctionCall {
797            name: name.clone(),
798            args: args.iter().map(replace_aggregates_with_columns).collect(),
799            distinct: *distinct,
800            window_spec: window_spec.clone(),
801        },
802        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
803            left: Box::new(replace_aggregates_with_columns(left)),
804            op: *op,
805            right: Box::new(replace_aggregates_with_columns(right)),
806        },
807        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
808            op: *op,
809            expr: Box::new(replace_aggregates_with_columns(e)),
810        },
811        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
812        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
813        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
814        Expr::Property(base, prop) => Expr::Property(
815            Box::new(replace_aggregates_with_columns(base)),
816            prop.clone(),
817        ),
818        Expr::List(items) => {
819            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
820        }
821        Expr::Case {
822            expr: case_expr,
823            when_then,
824            else_expr,
825        } => Expr::Case {
826            expr: case_expr
827                .as_ref()
828                .map(|e| Box::new(replace_aggregates_with_columns(e))),
829            when_then: when_then
830                .iter()
831                .map(|(w, t)| {
832                    (
833                        replace_aggregates_with_columns(w),
834                        replace_aggregates_with_columns(t),
835                    )
836                })
837                .collect(),
838            else_expr: else_expr
839                .as_ref()
840                .map(|e| Box::new(replace_aggregates_with_columns(e))),
841        },
842        Expr::In {
843            expr: in_expr,
844            list,
845        } => Expr::In {
846            expr: Box::new(replace_aggregates_with_columns(in_expr)),
847            list: Box::new(replace_aggregates_with_columns(list)),
848        },
849        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
850            array: Box::new(replace_aggregates_with_columns(array)),
851            index: Box::new(replace_aggregates_with_columns(index)),
852        },
853        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
854            array: Box::new(replace_aggregates_with_columns(array)),
855            start: start
856                .as_ref()
857                .map(|e| Box::new(replace_aggregates_with_columns(e))),
858            end: end
859                .as_ref()
860                .map(|e| Box::new(replace_aggregates_with_columns(e))),
861        },
862        Expr::Map(entries) => Expr::Map(
863            entries
864                .iter()
865                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
866                .collect(),
867        ),
868        // Leaf expressions — return as-is
869        other => other.clone(),
870    }
871}
872
873/// Check if an expression contains any aggregate function (recursively).
874fn contains_aggregate_recursive(expr: &Expr) -> bool {
875    match expr {
876        Expr::FunctionCall { name, args, .. } => {
877            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
878        }
879        Expr::BinaryOp { left, right, .. } => {
880            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
881        }
882        Expr::UnaryOp { expr: e, .. }
883        | Expr::IsNull(e)
884        | Expr::IsNotNull(e)
885        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
886        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
887        Expr::Case {
888            expr,
889            when_then,
890            else_expr,
891        } => {
892            expr.as_deref().is_some_and(contains_aggregate_recursive)
893                || when_then.iter().any(|(w, t)| {
894                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
895                })
896                || else_expr
897                    .as_deref()
898                    .is_some_and(contains_aggregate_recursive)
899        }
900        Expr::In { expr, list } => {
901            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
902        }
903        Expr::Property(base, _) => contains_aggregate_recursive(base),
904        Expr::ListComprehension { list, .. } => {
905            // Only check the list source — where_clause/map_expr reference the loop variable
906            contains_aggregate_recursive(list)
907        }
908        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
909        Expr::Reduce { init, list, .. } => {
910            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
911        }
912        Expr::ArrayIndex { array, index } => {
913            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
914        }
915        Expr::ArraySlice { array, start, end } => {
916            contains_aggregate_recursive(array)
917                || start.as_deref().is_some_and(contains_aggregate_recursive)
918                || end.as_deref().is_some_and(contains_aggregate_recursive)
919        }
920        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
921        _ => false,
922    }
923}
924
925/// Check if an expression contains a non-deterministic function (e.g. rand()).
926fn contains_non_deterministic(expr: &Expr) -> bool {
927    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
928        return true;
929    }
930    let mut found = false;
931    expr.for_each_child(&mut |child| {
932        if !found {
933            found = contains_non_deterministic(child);
934        }
935    });
936    found
937}
938
939fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
940    match expr {
941        Expr::FunctionCall { name, args, .. } => {
942            if is_aggregate_function_name(name) {
943                out.insert(expr.to_string_repr());
944                return;
945            }
946            for arg in args {
947                collect_aggregate_reprs(arg, out);
948            }
949        }
950        Expr::BinaryOp { left, right, .. } => {
951            collect_aggregate_reprs(left, out);
952            collect_aggregate_reprs(right, out);
953        }
954        Expr::UnaryOp { expr, .. }
955        | Expr::IsNull(expr)
956        | Expr::IsNotNull(expr)
957        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
958        Expr::List(items) => {
959            for item in items {
960                collect_aggregate_reprs(item, out);
961            }
962        }
963        Expr::Case {
964            expr,
965            when_then,
966            else_expr,
967        } => {
968            if let Some(e) = expr {
969                collect_aggregate_reprs(e, out);
970            }
971            for (w, t) in when_then {
972                collect_aggregate_reprs(w, out);
973                collect_aggregate_reprs(t, out);
974            }
975            if let Some(e) = else_expr {
976                collect_aggregate_reprs(e, out);
977            }
978        }
979        Expr::In { expr, list } => {
980            collect_aggregate_reprs(expr, out);
981            collect_aggregate_reprs(list, out);
982        }
983        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
984        Expr::ListComprehension { list, .. } => {
985            collect_aggregate_reprs(list, out);
986        }
987        Expr::Quantifier { list, .. } => {
988            collect_aggregate_reprs(list, out);
989        }
990        Expr::Reduce { init, list, .. } => {
991            collect_aggregate_reprs(init, out);
992            collect_aggregate_reprs(list, out);
993        }
994        Expr::ArrayIndex { array, index } => {
995            collect_aggregate_reprs(array, out);
996            collect_aggregate_reprs(index, out);
997        }
998        Expr::ArraySlice { array, start, end } => {
999            collect_aggregate_reprs(array, out);
1000            if let Some(s) = start {
1001                collect_aggregate_reprs(s, out);
1002            }
1003            if let Some(e) = end {
1004                collect_aggregate_reprs(e, out);
1005            }
1006        }
1007        _ => {}
1008    }
1009}
1010
1011#[derive(Debug, Clone)]
1012enum NonAggregateRef {
1013    Var(String),
1014    Property {
1015        repr: String,
1016        base_var: Option<String>,
1017    },
1018}
1019
1020fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1021    match expr {
1022        Expr::FunctionCall { name, args, .. } => {
1023            if is_aggregate_function_name(name) {
1024                return;
1025            }
1026            for arg in args {
1027                collect_non_aggregate_refs(arg, inside_agg, out);
1028            }
1029        }
1030        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1031        Expr::Property(base, _) if !inside_agg => {
1032            let base_var = if let Expr::Variable(v) = base.as_ref() {
1033                Some(v.clone())
1034            } else {
1035                None
1036            };
1037            out.push(NonAggregateRef::Property {
1038                repr: expr.to_string_repr(),
1039                base_var,
1040            });
1041        }
1042        Expr::BinaryOp { left, right, .. } => {
1043            collect_non_aggregate_refs(left, inside_agg, out);
1044            collect_non_aggregate_refs(right, inside_agg, out);
1045        }
1046        Expr::UnaryOp { expr, .. }
1047        | Expr::IsNull(expr)
1048        | Expr::IsNotNull(expr)
1049        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1050        Expr::List(items) => {
1051            for item in items {
1052                collect_non_aggregate_refs(item, inside_agg, out);
1053            }
1054        }
1055        Expr::Case {
1056            expr,
1057            when_then,
1058            else_expr,
1059        } => {
1060            if let Some(e) = expr {
1061                collect_non_aggregate_refs(e, inside_agg, out);
1062            }
1063            for (w, t) in when_then {
1064                collect_non_aggregate_refs(w, inside_agg, out);
1065                collect_non_aggregate_refs(t, inside_agg, out);
1066            }
1067            if let Some(e) = else_expr {
1068                collect_non_aggregate_refs(e, inside_agg, out);
1069            }
1070        }
1071        Expr::In { expr, list } => {
1072            collect_non_aggregate_refs(expr, inside_agg, out);
1073            collect_non_aggregate_refs(list, inside_agg, out);
1074        }
1075        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1076        // source. The body references the loop variable, not outer-scope vars.
1077        Expr::ListComprehension { list, .. } => {
1078            collect_non_aggregate_refs(list, inside_agg, out);
1079        }
1080        Expr::Quantifier { list, .. } => {
1081            collect_non_aggregate_refs(list, inside_agg, out);
1082        }
1083        Expr::Reduce { init, list, .. } => {
1084            collect_non_aggregate_refs(init, inside_agg, out);
1085            collect_non_aggregate_refs(list, inside_agg, out);
1086        }
1087        _ => {}
1088    }
1089}
1090
1091fn validate_with_order_by_aggregate_item(
1092    expr: &Expr,
1093    projected_aggregate_reprs: &HashSet<String>,
1094    projected_simple_reprs: &HashSet<String>,
1095    projected_aliases: &HashSet<String>,
1096) -> Result<()> {
1097    let mut aggregate_reprs = HashSet::new();
1098    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1099    for agg in aggregate_reprs {
1100        if !projected_aggregate_reprs.contains(&agg) {
1101            return Err(anyhow!(
1102                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1103                agg
1104            ));
1105        }
1106    }
1107
1108    let mut refs = Vec::new();
1109    collect_non_aggregate_refs(expr, false, &mut refs);
1110    refs.retain(|r| match r {
1111        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1112        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1113    });
1114
1115    let mut dedup = HashSet::new();
1116    refs.retain(|r| {
1117        let key = match r {
1118            NonAggregateRef::Var(v) => format!("v:{v}"),
1119            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1120        };
1121        dedup.insert(key)
1122    });
1123
1124    if refs.len() > 1 {
1125        return Err(anyhow!(
1126            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1127        ));
1128    }
1129
1130    if let Some(r) = refs.first() {
1131        return match r {
1132            NonAggregateRef::Var(v) => Err(anyhow!(
1133                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1134                v
1135            )),
1136            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1137                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1138                base_var
1139                    .clone()
1140                    .unwrap_or_else(|| "<property-base>".to_string())
1141            )),
1142        };
1143    }
1144
1145    Ok(())
1146}
1147
1148/// Validate that no aggregation functions appear in WHERE clause.
1149fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1150    if contains_aggregate_recursive(predicate) {
1151        return Err(anyhow!(
1152            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1153        ));
1154    }
1155    Ok(())
1156}
1157
1158#[derive(Debug, Clone, Copy)]
1159enum ConstNumber {
1160    Int(i64),
1161    Float(f64),
1162}
1163
1164impl ConstNumber {
1165    fn to_f64(self) -> f64 {
1166        match self {
1167            Self::Int(v) => v as f64,
1168            Self::Float(v) => v,
1169        }
1170    }
1171}
1172
1173fn eval_const_numeric_expr(
1174    expr: &Expr,
1175    params: &HashMap<String, uni_common::Value>,
1176) -> Result<ConstNumber> {
1177    match expr {
1178        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1179        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1180        Expr::Parameter(name) => match params.get(name) {
1181            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1182            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1183            Some(uni_common::Value::Null) => Err(anyhow!(
1184                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1185                name
1186            )),
1187            Some(other) => Err(anyhow!(
1188                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1189                name,
1190                other
1191            )),
1192            None => Err(anyhow!(
1193                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1194            )),
1195        },
1196        Expr::UnaryOp {
1197            op: uni_cypher::ast::UnaryOp::Neg,
1198            expr,
1199        } => match eval_const_numeric_expr(expr, params)? {
1200            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1201            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1202        },
1203        Expr::BinaryOp { left, op, right } => {
1204            let l = eval_const_numeric_expr(left, params)?;
1205            let r = eval_const_numeric_expr(right, params)?;
1206            match op {
1207                BinaryOp::Add => match (l, r) {
1208                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1209                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1210                },
1211                BinaryOp::Sub => match (l, r) {
1212                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1213                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1214                },
1215                BinaryOp::Mul => match (l, r) {
1216                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1217                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1218                },
1219                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1220                BinaryOp::Mod => match (l, r) {
1221                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1222                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1223                },
1224                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1225                _ => Err(anyhow!(
1226                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1227                )),
1228            }
1229        }
1230        Expr::FunctionCall { name, args, .. } => {
1231            let lower = name.to_lowercase();
1232            match lower.as_str() {
1233                "rand" if args.is_empty() => {
1234                    use rand::Rng;
1235                    let mut rng = rand::thread_rng();
1236                    Ok(ConstNumber::Float(rng.r#gen::<f64>()))
1237                }
1238                "tointeger" | "toint" if args.len() == 1 => {
1239                    match eval_const_numeric_expr(&args[0], params)? {
1240                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1241                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1242                    }
1243                }
1244                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1245                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1246                )),
1247                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1248                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1249                )),
1250                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1251                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1252                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1253                },
1254                _ => Err(anyhow!(
1255                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1256                )),
1257            }
1258        }
1259        _ => Err(anyhow!(
1260            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1261        )),
1262    }
1263}
1264
1265/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1266/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1267fn parse_non_negative_integer(
1268    expr: &Expr,
1269    clause_name: &str,
1270    params: &HashMap<String, uni_common::Value>,
1271) -> Result<Option<usize>> {
1272    let referenced_vars = collect_expr_variables(expr);
1273    if !referenced_vars.is_empty() {
1274        return Err(anyhow!(
1275            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1276            clause_name
1277        ));
1278    }
1279
1280    let value = eval_const_numeric_expr(expr, params)?;
1281    let as_int = match value {
1282        ConstNumber::Int(v) => v,
1283        ConstNumber::Float(v) => {
1284            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1285                return Err(anyhow!(
1286                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1287                    clause_name
1288                ));
1289            }
1290            v as i64
1291        }
1292    };
1293    if as_int < 0 {
1294        return Err(anyhow!(
1295            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1296            clause_name
1297        ));
1298    }
1299    Ok(Some(as_int as usize))
1300}
1301
1302/// Validate that aggregation functions are not nested.
1303fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1304    if let Expr::FunctionCall { name, args, .. } = expr
1305        && is_aggregate_function_name(name)
1306    {
1307        for arg in args {
1308            if contains_aggregate_recursive(arg) {
1309                return Err(anyhow!(
1310                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1311                ));
1312            }
1313            if contains_non_deterministic(arg) {
1314                return Err(anyhow!(
1315                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1316                ));
1317            }
1318        }
1319    }
1320    let mut result = Ok(());
1321    expr.for_each_child(&mut |child| {
1322        if result.is_ok() {
1323            result = validate_no_nested_aggregation(child);
1324        }
1325    });
1326    result
1327}
1328
1329/// Validate that an expression does not access properties or labels of
1330/// deleted entities. `type(r)` on a deleted relationship is allowed per
1331/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1332fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1333    // Check n.prop on a deleted variable
1334    if let Expr::Property(inner, _) = expr
1335        && let Expr::Variable(name) = inner.as_ref()
1336        && deleted_vars.contains(name)
1337    {
1338        return Err(anyhow!(
1339            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1340            name
1341        ));
1342    }
1343    // Check labels(n) or keys(n) on a deleted variable
1344    if let Expr::FunctionCall { name, args, .. } = expr
1345        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1346        && args.len() == 1
1347        && let Expr::Variable(var) = &args[0]
1348        && deleted_vars.contains(var)
1349    {
1350        return Err(anyhow!(
1351            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1352            name.to_lowercase(),
1353            var
1354        ));
1355    }
1356    let mut result = Ok(());
1357    expr.for_each_child(&mut |child| {
1358        if result.is_ok() {
1359            result = validate_no_deleted_entity_access(child, deleted_vars);
1360        }
1361    });
1362    result
1363}
1364
1365/// Validate that all variables referenced in properties are defined,
1366/// either in scope or in the local CREATE variable list.
1367fn validate_property_variables(
1368    properties: &Option<Expr>,
1369    vars_in_scope: &[VariableInfo],
1370    create_vars: &[&str],
1371) -> Result<()> {
1372    if let Some(props) = properties {
1373        for var in collect_expr_variables(props) {
1374            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1375                return Err(anyhow!(
1376                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1377                    var
1378                ));
1379            }
1380        }
1381    }
1382    Ok(())
1383}
1384
1385/// Check that a variable name is not already bound in scope or in the local CREATE list.
1386/// Used to prevent rebinding in CREATE clauses.
1387fn check_not_already_bound(
1388    name: &str,
1389    vars_in_scope: &[VariableInfo],
1390    create_vars: &[&str],
1391) -> Result<()> {
1392    if is_var_in_scope(vars_in_scope, name) {
1393        return Err(anyhow!(
1394            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1395            name
1396        ));
1397    }
1398    if create_vars.contains(&name) {
1399        return Err(anyhow!(
1400            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1401            name
1402        ));
1403    }
1404    Ok(())
1405}
1406
1407fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1408    let mut scope = vars_in_scope.to_vec();
1409
1410    for path in &pattern.paths {
1411        if let Some(path_var) = &path.variable
1412            && !path_var.is_empty()
1413            && !is_var_in_scope(&scope, path_var)
1414        {
1415            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1416        }
1417        for element in &path.elements {
1418            match element {
1419                PatternElement::Node(n) => {
1420                    if let Some(v) = &n.variable
1421                        && !v.is_empty()
1422                        && !is_var_in_scope(&scope, v)
1423                    {
1424                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1425                    }
1426                }
1427                PatternElement::Relationship(r) => {
1428                    if let Some(v) = &r.variable
1429                        && !v.is_empty()
1430                        && !is_var_in_scope(&scope, v)
1431                    {
1432                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1433                    }
1434                }
1435                PatternElement::Parenthesized { .. } => {}
1436            }
1437        }
1438    }
1439
1440    scope
1441}
1442
1443fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1444    match item {
1445        SetItem::Property { expr, value } => {
1446            validate_expression_variables(expr, vars_in_scope)?;
1447            validate_expression(expr, vars_in_scope)?;
1448            validate_expression_variables(value, vars_in_scope)?;
1449            validate_expression(value, vars_in_scope)?;
1450            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1451                return Err(anyhow!(
1452                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1453                ));
1454            }
1455        }
1456        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1457            if !is_var_in_scope(vars_in_scope, variable) {
1458                return Err(anyhow!(
1459                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1460                    variable
1461                ));
1462            }
1463            validate_expression_variables(value, vars_in_scope)?;
1464            validate_expression(value, vars_in_scope)?;
1465            if contains_pattern_predicate(value) {
1466                return Err(anyhow!(
1467                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1468                ));
1469            }
1470        }
1471        SetItem::Labels { variable, .. } => {
1472            if !is_var_in_scope(vars_in_scope, variable) {
1473                return Err(anyhow!(
1474                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1475                    variable
1476                ));
1477            }
1478        }
1479    }
1480
1481    Ok(())
1482}
1483
1484/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1485/// The OpenCypher spec requires all property values in MERGE to be non-null.
1486fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1487    if let Some(Expr::Map(entries)) = properties {
1488        for (key, value) in entries {
1489            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1490                return Err(anyhow!(
1491                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1492                    key
1493                ));
1494            }
1495        }
1496    }
1497    Ok(())
1498}
1499
1500fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1501    for path in &merge_clause.pattern.paths {
1502        for element in &path.elements {
1503            match element {
1504                PatternElement::Node(n) => {
1505                    if let Some(Expr::Parameter(_)) = &n.properties {
1506                        return Err(anyhow!(
1507                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1508                        ));
1509                    }
1510                    reject_null_merge_properties(&n.properties)?;
1511                    // VariableAlreadyBound: reject if a bound variable is used
1512                    // as a standalone MERGE node or introduces new labels/properties.
1513                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1514                    if let Some(variable) = &n.variable
1515                        && !variable.is_empty()
1516                        && is_var_in_scope(vars_in_scope, variable)
1517                    {
1518                        let is_standalone = path.elements.len() == 1;
1519                        let has_new_labels = !n.labels.is_empty();
1520                        let has_new_properties = n.properties.is_some();
1521                        if is_standalone || has_new_labels || has_new_properties {
1522                            return Err(anyhow!(
1523                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1524                                variable
1525                            ));
1526                        }
1527                    }
1528                }
1529                PatternElement::Relationship(r) => {
1530                    if let Some(variable) = &r.variable
1531                        && !variable.is_empty()
1532                        && is_var_in_scope(vars_in_scope, variable)
1533                    {
1534                        return Err(anyhow!(
1535                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1536                            variable
1537                        ));
1538                    }
1539                    if r.types.len() != 1 {
1540                        return Err(anyhow!(
1541                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1542                        ));
1543                    }
1544                    if r.range.is_some() {
1545                        return Err(anyhow!(
1546                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1547                        ));
1548                    }
1549                    if let Some(Expr::Parameter(_)) = &r.properties {
1550                        return Err(anyhow!(
1551                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1552                        ));
1553                    }
1554                    reject_null_merge_properties(&r.properties)?;
1555                }
1556                PatternElement::Parenthesized { .. } => {}
1557            }
1558        }
1559    }
1560
1561    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1562    for item in &merge_clause.on_create {
1563        validate_merge_set_item(item, &merge_scope)?;
1564    }
1565    for item in &merge_clause.on_match {
1566        validate_merge_set_item(item, &merge_scope)?;
1567    }
1568
1569    Ok(())
1570}
1571
1572/// Recursively validate an expression for type errors, undefined variables, etc.
1573fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1574    // Validate boolean operators and nested aggregation first
1575    validate_boolean_expression(expr)?;
1576    validate_no_nested_aggregation(expr)?;
1577
1578    // Helper to validate multiple expressions
1579    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1580        for e in exprs {
1581            validate_expression(e, vars)?;
1582        }
1583        Ok(())
1584    }
1585
1586    match expr {
1587        Expr::FunctionCall { name, args, .. } => {
1588            validate_function_call(name, args, vars_in_scope)?;
1589            validate_all(args, vars_in_scope)
1590        }
1591        Expr::BinaryOp { left, right, .. } => {
1592            validate_expression(left, vars_in_scope)?;
1593            validate_expression(right, vars_in_scope)
1594        }
1595        Expr::UnaryOp { expr: e, .. }
1596        | Expr::IsNull(e)
1597        | Expr::IsNotNull(e)
1598        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1599        Expr::Property(base, prop) => {
1600            if let Expr::Variable(var_name) = base.as_ref()
1601                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1602            {
1603                // Paths don't have properties
1604                if var_info.var_type == VariableType::Path {
1605                    return Err(anyhow!(
1606                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1607                        var_name,
1608                        prop
1609                    ));
1610                }
1611                // Known non-graph literals (int, float, bool, string, list) don't have properties
1612                if var_info.var_type == VariableType::ScalarLiteral {
1613                    return Err(anyhow!(
1614                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1615                    ));
1616                }
1617            }
1618            validate_expression(base, vars_in_scope)
1619        }
1620        Expr::List(items) => validate_all(items, vars_in_scope),
1621        Expr::Case {
1622            expr: case_expr,
1623            when_then,
1624            else_expr,
1625        } => {
1626            if let Some(e) = case_expr {
1627                validate_expression(e, vars_in_scope)?;
1628            }
1629            for (w, t) in when_then {
1630                validate_expression(w, vars_in_scope)?;
1631                validate_expression(t, vars_in_scope)?;
1632            }
1633            if let Some(e) = else_expr {
1634                validate_expression(e, vars_in_scope)?;
1635            }
1636            Ok(())
1637        }
1638        Expr::In { expr: e, list } => {
1639            validate_expression(e, vars_in_scope)?;
1640            validate_expression(list, vars_in_scope)
1641        }
1642        Expr::Exists {
1643            query,
1644            from_pattern_predicate: true,
1645        } => {
1646            // Pattern predicates cannot introduce new named variables.
1647            // Extract named vars from inner MATCH pattern, check each is in scope.
1648            if let Query::Single(stmt) = query.as_ref() {
1649                for clause in &stmt.clauses {
1650                    if let Clause::Match(m) = clause {
1651                        for path in &m.pattern.paths {
1652                            for elem in &path.elements {
1653                                match elem {
1654                                    PatternElement::Node(n) => {
1655                                        if let Some(var) = &n.variable
1656                                            && !is_var_in_scope(vars_in_scope, var)
1657                                        {
1658                                            return Err(anyhow!(
1659                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1660                                                var
1661                                            ));
1662                                        }
1663                                    }
1664                                    PatternElement::Relationship(r) => {
1665                                        if let Some(var) = &r.variable
1666                                            && !is_var_in_scope(vars_in_scope, var)
1667                                        {
1668                                            return Err(anyhow!(
1669                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1670                                                var
1671                                            ));
1672                                        }
1673                                    }
1674                                    _ => {}
1675                                }
1676                            }
1677                        }
1678                    }
1679                }
1680            }
1681            Ok(())
1682        }
1683        _ => Ok(()),
1684    }
1685}
1686
1687/// One step (hop) in a Quantified Path Pattern sub-pattern.
1688///
1689/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1690#[derive(Debug, Clone)]
1691pub struct QppStepInfo {
1692    /// Edge type IDs that this step can traverse.
1693    pub edge_type_ids: Vec<u32>,
1694    /// Traversal direction for this step.
1695    pub direction: Direction,
1696    /// Optional label constraint on the target node.
1697    pub target_label: Option<String>,
1698}
1699
1700/// Logical query plan produced by [`QueryPlanner`].
1701///
1702/// Each variant represents one step in the Cypher execution pipeline.
1703/// Plans are tree-structured — leaf nodes produce rows, intermediate nodes
1704/// transform or join them, and the root node defines the final output.
1705#[derive(Debug, Clone)]
1706pub enum LogicalPlan {
1707    /// UNION / UNION ALL of two sub-plans.
1708    Union {
1709        left: Box<LogicalPlan>,
1710        right: Box<LogicalPlan>,
1711        /// When `true`, duplicate rows are preserved (UNION ALL semantics).
1712        all: bool,
1713    },
1714    /// Scan vertices of a single labeled dataset.
1715    Scan {
1716        label_id: u16,
1717        labels: Vec<String>,
1718        variable: String,
1719        filter: Option<Expr>,
1720        optional: bool,
1721    },
1722    /// Lookup vertices by ext_id using the main vertices table.
1723    /// Used when a query references ext_id without specifying a label.
1724    ExtIdLookup {
1725        variable: String,
1726        ext_id: String,
1727        filter: Option<Expr>,
1728        optional: bool,
1729    },
1730    /// Scan all vertices from main table (MATCH (n) without label).
1731    /// Used for schemaless queries that don't specify any label.
1732    ScanAll {
1733        variable: String,
1734        filter: Option<Expr>,
1735        optional: bool,
1736    },
1737    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1738    /// Used for labels not defined in schema (schemaless support).
1739    /// Scan main vertices table by label name(s) for schemaless support.
1740    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1741    ScanMainByLabels {
1742        labels: Vec<String>,
1743        variable: String,
1744        filter: Option<Expr>,
1745        optional: bool,
1746    },
1747    /// Produces exactly one empty row (used to bootstrap pipelines with no source).
1748    Empty,
1749    /// UNWIND: expand a list expression into one row per element.
1750    Unwind {
1751        input: Box<LogicalPlan>,
1752        expr: Expr,
1753        variable: String,
1754    },
1755    Traverse {
1756        input: Box<LogicalPlan>,
1757        edge_type_ids: Vec<u32>,
1758        direction: Direction,
1759        source_variable: String,
1760        target_variable: String,
1761        target_label_id: u16,
1762        step_variable: Option<String>,
1763        min_hops: usize,
1764        max_hops: usize,
1765        optional: bool,
1766        target_filter: Option<Expr>,
1767        path_variable: Option<String>,
1768        edge_properties: HashSet<String>,
1769        /// Whether this is a variable-length pattern (has `*` range specifier).
1770        /// When true, step_variable holds a list of edges (even for *1..1).
1771        is_variable_length: bool,
1772        /// All variables from this OPTIONAL MATCH pattern.
1773        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1774        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1775        optional_pattern_vars: HashSet<String>,
1776        /// Variable names (node + edge) from the current MATCH clause scope.
1777        /// Used for relationship uniqueness scoping: only edge ID columns whose
1778        /// associated variable is in this set participate in uniqueness filtering.
1779        /// Variables from previous disconnected MATCH clauses are excluded.
1780        scope_match_variables: HashSet<String>,
1781        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1782        edge_filter_expr: Option<Expr>,
1783        /// Path traversal semantics (Trail by default for OpenCypher).
1784        path_mode: crate::query::df_graph::nfa::PathMode,
1785        /// QPP steps for multi-hop quantified path patterns.
1786        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1787        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1788        qpp_steps: Option<Vec<QppStepInfo>>,
1789    },
1790    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1791    /// Used for edge types not defined in schema (schemaless support).
1792    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1793    TraverseMainByType {
1794        type_names: Vec<String>,
1795        input: Box<LogicalPlan>,
1796        direction: Direction,
1797        source_variable: String,
1798        target_variable: String,
1799        step_variable: Option<String>,
1800        min_hops: usize,
1801        max_hops: usize,
1802        optional: bool,
1803        target_filter: Option<Expr>,
1804        path_variable: Option<String>,
1805        /// Whether this is a variable-length pattern (has `*` range specifier).
1806        /// When true, step_variable holds a list of edges (even for *1..1).
1807        is_variable_length: bool,
1808        /// All variables from this OPTIONAL MATCH pattern.
1809        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1810        optional_pattern_vars: HashSet<String>,
1811        /// Variables belonging to the current MATCH clause scope.
1812        /// Used for relationship uniqueness scoping: only edge columns whose
1813        /// associated variable is in this set participate in uniqueness filtering.
1814        scope_match_variables: HashSet<String>,
1815        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1816        edge_filter_expr: Option<Expr>,
1817        /// Path traversal semantics (Trail by default for OpenCypher).
1818        path_mode: crate::query::df_graph::nfa::PathMode,
1819    },
1820    Filter {
1821        input: Box<LogicalPlan>,
1822        predicate: Expr,
1823        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1824        /// When evaluating the filter, if any of these variables are NULL,
1825        /// the row is preserved regardless of the predicate result.
1826        optional_variables: HashSet<String>,
1827    },
1828    Create {
1829        input: Box<LogicalPlan>,
1830        pattern: Pattern,
1831    },
1832    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1833    ///
1834    /// This variant combines multiple CREATE patterns into a single plan node
1835    /// to avoid deep recursion when executing many CREATEs sequentially.
1836    CreateBatch {
1837        input: Box<LogicalPlan>,
1838        patterns: Vec<Pattern>,
1839    },
1840    Merge {
1841        input: Box<LogicalPlan>,
1842        pattern: Pattern,
1843        on_match: Option<SetClause>,
1844        on_create: Option<SetClause>,
1845    },
1846    Set {
1847        input: Box<LogicalPlan>,
1848        items: Vec<SetItem>,
1849    },
1850    Remove {
1851        input: Box<LogicalPlan>,
1852        items: Vec<RemoveItem>,
1853    },
1854    Delete {
1855        input: Box<LogicalPlan>,
1856        items: Vec<Expr>,
1857        detach: bool,
1858    },
1859    /// FOREACH (variable IN list | clauses)
1860    Foreach {
1861        input: Box<LogicalPlan>,
1862        variable: String,
1863        list: Expr,
1864        body: Vec<LogicalPlan>,
1865    },
1866    Sort {
1867        input: Box<LogicalPlan>,
1868        order_by: Vec<SortItem>,
1869    },
1870    Limit {
1871        input: Box<LogicalPlan>,
1872        skip: Option<usize>,
1873        fetch: Option<usize>,
1874    },
1875    Aggregate {
1876        input: Box<LogicalPlan>,
1877        group_by: Vec<Expr>,
1878        aggregates: Vec<Expr>,
1879    },
1880    Distinct {
1881        input: Box<LogicalPlan>,
1882    },
1883    Window {
1884        input: Box<LogicalPlan>,
1885        window_exprs: Vec<Expr>,
1886    },
1887    Project {
1888        input: Box<LogicalPlan>,
1889        projections: Vec<(Expr, Option<String>)>,
1890    },
1891    CrossJoin {
1892        left: Box<LogicalPlan>,
1893        right: Box<LogicalPlan>,
1894    },
1895    Apply {
1896        input: Box<LogicalPlan>,
1897        subquery: Box<LogicalPlan>,
1898        input_filter: Option<Expr>,
1899    },
1900    RecursiveCTE {
1901        cte_name: String,
1902        initial: Box<LogicalPlan>,
1903        recursive: Box<LogicalPlan>,
1904    },
1905    ProcedureCall {
1906        procedure_name: String,
1907        arguments: Vec<Expr>,
1908        yield_items: Vec<(String, Option<String>)>,
1909    },
1910    SubqueryCall {
1911        input: Box<LogicalPlan>,
1912        subquery: Box<LogicalPlan>,
1913    },
1914    VectorKnn {
1915        label_id: u16,
1916        variable: String,
1917        property: String,
1918        query: Expr,
1919        k: usize,
1920        threshold: Option<f32>,
1921    },
1922    InvertedIndexLookup {
1923        label_id: u16,
1924        variable: String,
1925        property: String,
1926        terms: Expr,
1927    },
1928    ShortestPath {
1929        input: Box<LogicalPlan>,
1930        edge_type_ids: Vec<u32>,
1931        direction: Direction,
1932        source_variable: String,
1933        target_variable: String,
1934        target_label_id: u16,
1935        path_variable: String,
1936        /// Minimum number of hops (edges) in the path. Default is 1.
1937        min_hops: u32,
1938        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
1939        max_hops: u32,
1940    },
1941    /// allShortestPaths() - Returns all paths with minimum length
1942    AllShortestPaths {
1943        input: Box<LogicalPlan>,
1944        edge_type_ids: Vec<u32>,
1945        direction: Direction,
1946        source_variable: String,
1947        target_variable: String,
1948        target_label_id: u16,
1949        path_variable: String,
1950        /// Minimum number of hops (edges) in the path. Default is 1.
1951        min_hops: u32,
1952        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
1953        max_hops: u32,
1954    },
1955    QuantifiedPattern {
1956        input: Box<LogicalPlan>,
1957        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
1958        min_iterations: u32,
1959        max_iterations: u32,
1960        path_variable: Option<String>,
1961        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
1962        binding_variable: String, // Output variable of iteration (e.g. 'b')
1963    },
1964    // DDL Plans
1965    CreateVectorIndex {
1966        config: VectorIndexConfig,
1967        if_not_exists: bool,
1968    },
1969    CreateFullTextIndex {
1970        config: FullTextIndexConfig,
1971        if_not_exists: bool,
1972    },
1973    CreateScalarIndex {
1974        config: ScalarIndexConfig,
1975        if_not_exists: bool,
1976    },
1977    CreateJsonFtsIndex {
1978        config: JsonFtsIndexConfig,
1979        if_not_exists: bool,
1980    },
1981    DropIndex {
1982        name: String,
1983        if_exists: bool,
1984    },
1985    ShowIndexes {
1986        filter: Option<String>,
1987    },
1988    Copy {
1989        target: String,
1990        source: String,
1991        is_export: bool,
1992        options: HashMap<String, Value>,
1993    },
1994    Backup {
1995        destination: String,
1996        options: HashMap<String, Value>,
1997    },
1998    Explain {
1999        plan: Box<LogicalPlan>,
2000    },
2001    // Admin Plans
2002    ShowDatabase,
2003    ShowConfig,
2004    ShowStatistics,
2005    Vacuum,
2006    Checkpoint,
2007    CopyTo {
2008        label: String,
2009        path: String,
2010        format: String,
2011        options: HashMap<String, Value>,
2012    },
2013    CopyFrom {
2014        label: String,
2015        path: String,
2016        format: String,
2017        options: HashMap<String, Value>,
2018    },
2019    // Schema DDL
2020    CreateLabel(CreateLabel),
2021    CreateEdgeType(CreateEdgeType),
2022    AlterLabel(AlterLabel),
2023    AlterEdgeType(AlterEdgeType),
2024    DropLabel(DropLabel),
2025    DropEdgeType(DropEdgeType),
2026    // Constraints
2027    CreateConstraint(CreateConstraint),
2028    DropConstraint(DropConstraint),
2029    ShowConstraints(ShowConstraints),
2030    /// Bind a zero-length path (single node pattern with path variable).
2031    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2032    BindZeroLengthPath {
2033        input: Box<LogicalPlan>,
2034        node_variable: String,
2035        path_variable: String,
2036    },
2037    /// Bind a fixed-length path from already-computed node and edge columns.
2038    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2039    BindPath {
2040        input: Box<LogicalPlan>,
2041        node_variables: Vec<String>,
2042        edge_variables: Vec<String>,
2043        path_variable: String,
2044    },
2045
2046    // ── Locy variants ──────────────────────────────────────────
2047    /// Top-level Locy program: stratified rules + commands.
2048    LocyProgram {
2049        strata: Vec<super::planner_locy_types::LocyStratum>,
2050        commands: Vec<super::planner_locy_types::LocyCommand>,
2051        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2052        max_iterations: usize,
2053        timeout: std::time::Duration,
2054        max_derived_bytes: usize,
2055        deterministic_best_by: bool,
2056        strict_probability_domain: bool,
2057        probability_epsilon: f64,
2058        exact_probability: bool,
2059        max_bdd_variables: usize,
2060        top_k_proofs: usize,
2061    },
2062    /// FOLD operator: lattice-join non-key columns per KEY group.
2063    LocyFold {
2064        input: Box<LogicalPlan>,
2065        key_columns: Vec<String>,
2066        fold_bindings: Vec<(String, Expr)>,
2067        strict_probability_domain: bool,
2068        probability_epsilon: f64,
2069    },
2070    /// BEST BY operator: select best row per KEY group by ordered criteria.
2071    LocyBestBy {
2072        input: Box<LogicalPlan>,
2073        key_columns: Vec<String>,
2074        /// (expression, ascending) pairs.
2075        criteria: Vec<(Expr, bool)>,
2076    },
2077    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2078    LocyPriority {
2079        input: Box<LogicalPlan>,
2080        key_columns: Vec<String>,
2081    },
2082    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2083    LocyDerivedScan {
2084        scan_index: usize,
2085        data: Arc<RwLock<Vec<RecordBatch>>>,
2086        schema: SchemaRef,
2087    },
2088    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2089    /// without carrying through helper/property columns like the regular Project.
2090    LocyProject {
2091        input: Box<LogicalPlan>,
2092        projections: Vec<(Expr, Option<String>)>,
2093        /// Expected output Arrow type per projection (for CAST support).
2094        target_types: Vec<DataType>,
2095    },
2096}
2097
2098/// Extracted vector similarity predicate info for optimization
2099struct VectorSimilarityPredicate {
2100    variable: String,
2101    property: String,
2102    query: Expr,
2103    threshold: Option<f32>,
2104}
2105
2106/// Result of extracting vector_similarity from a predicate
2107struct VectorSimilarityExtraction {
2108    /// The extracted vector similarity predicate
2109    predicate: VectorSimilarityPredicate,
2110    /// Remaining predicates that couldn't be optimized (if any)
2111    residual: Option<Expr>,
2112}
2113
2114/// Try to extract a vector_similarity predicate from an expression.
2115/// Matches patterns like:
2116/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2117/// - n.embedding ~= $query
2118///
2119/// Also handles AND predicates.
2120fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2121    match expr {
2122        Expr::BinaryOp { left, op, right } => {
2123            // Handle AND: check both sides for vector_similarity
2124            if matches!(op, BinaryOp::And) {
2125                // Try left side first
2126                if let Some(vs) = extract_simple_vector_similarity(left) {
2127                    return Some(VectorSimilarityExtraction {
2128                        predicate: vs,
2129                        residual: Some(right.as_ref().clone()),
2130                    });
2131                }
2132                // Try right side
2133                if let Some(vs) = extract_simple_vector_similarity(right) {
2134                    return Some(VectorSimilarityExtraction {
2135                        predicate: vs,
2136                        residual: Some(left.as_ref().clone()),
2137                    });
2138                }
2139                // Recursively check within left/right for nested ANDs
2140                if let Some(mut extraction) = extract_vector_similarity(left) {
2141                    extraction.residual = Some(combine_with_and(
2142                        extraction.residual,
2143                        right.as_ref().clone(),
2144                    ));
2145                    return Some(extraction);
2146                }
2147                if let Some(mut extraction) = extract_vector_similarity(right) {
2148                    extraction.residual =
2149                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2150                    return Some(extraction);
2151                }
2152                return None;
2153            }
2154
2155            // Simple case: direct vector_similarity comparison
2156            if let Some(vs) = extract_simple_vector_similarity(expr) {
2157                return Some(VectorSimilarityExtraction {
2158                    predicate: vs,
2159                    residual: None,
2160                });
2161            }
2162            None
2163        }
2164        _ => None,
2165    }
2166}
2167
2168/// Helper to combine an optional expression with another using AND
2169fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2170    match opt_expr {
2171        Some(e) => Expr::BinaryOp {
2172            left: Box::new(e),
2173            op: BinaryOp::And,
2174            right: Box::new(other),
2175        },
2176        None => other,
2177    }
2178}
2179
2180/// Extract a simple vector_similarity comparison (no AND)
2181fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2182    match expr {
2183        Expr::BinaryOp { left, op, right } => {
2184            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2185            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2186                && let (Some(vs), Some(thresh)) = (
2187                    extract_vector_similarity_call(left),
2188                    extract_float_literal(right),
2189                )
2190            {
2191                return Some(VectorSimilarityPredicate {
2192                    variable: vs.0,
2193                    property: vs.1,
2194                    query: vs.2,
2195                    threshold: Some(thresh),
2196                });
2197            }
2198            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2199            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2200                && let (Some(thresh), Some(vs)) = (
2201                    extract_float_literal(left),
2202                    extract_vector_similarity_call(right),
2203                )
2204            {
2205                return Some(VectorSimilarityPredicate {
2206                    variable: vs.0,
2207                    property: vs.1,
2208                    query: vs.2,
2209                    threshold: Some(thresh),
2210                });
2211            }
2212            // Pattern: n.embedding ~= query
2213            if matches!(op, BinaryOp::ApproxEq)
2214                && let Expr::Property(var_expr, prop) = left.as_ref()
2215                && let Expr::Variable(var) = var_expr.as_ref()
2216            {
2217                return Some(VectorSimilarityPredicate {
2218                    variable: var.clone(),
2219                    property: prop.clone(),
2220                    query: right.as_ref().clone(),
2221                    threshold: None,
2222                });
2223            }
2224            None
2225        }
2226        _ => None,
2227    }
2228}
2229
2230/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2231fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2232    if let Expr::FunctionCall { name, args, .. } = expr
2233        && name.eq_ignore_ascii_case("vector_similarity")
2234        && args.len() == 2
2235    {
2236        // First arg should be Property(Identifier(var), prop)
2237        if let Expr::Property(var_expr, prop) = &args[0]
2238            && let Expr::Variable(var) = var_expr.as_ref()
2239        {
2240            // Second arg is query
2241            return Some((var.clone(), prop.clone(), args[1].clone()));
2242        }
2243    }
2244    None
2245}
2246
2247/// Extract a float value from a literal expression
2248fn extract_float_literal(expr: &Expr) -> Option<f32> {
2249    match expr {
2250        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2251        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2252        _ => None,
2253    }
2254}
2255
2256/// Translates a parsed Cypher AST into a [`LogicalPlan`].
2257///
2258/// `QueryPlanner` applies semantic validation (variable scoping, label
2259/// resolution, type checking) and produces a plan tree that the executor
2260/// can run against storage.
2261#[derive(Debug)]
2262pub struct QueryPlanner {
2263    schema: Arc<Schema>,
2264    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2265    gen_expr_cache: HashMap<(String, String), Expr>,
2266    /// Counter for generating unique anonymous variable names.
2267    anon_counter: std::cell::Cell<usize>,
2268    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2269    params: HashMap<String, uni_common::Value>,
2270}
2271
2272struct TraverseParams<'a> {
2273    rel: &'a RelationshipPattern,
2274    target_node: &'a NodePattern,
2275    optional: bool,
2276    path_variable: Option<String>,
2277    /// All variables from this OPTIONAL MATCH pattern.
2278    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2279    optional_pattern_vars: HashSet<String>,
2280}
2281
2282impl QueryPlanner {
2283    /// Create a new planner for the given schema.
2284    ///
2285    /// Pre-parses all generation expressions defined in the schema so that
2286    /// repeated plan calls avoid redundant parsing.
2287    pub fn new(schema: Arc<Schema>) -> Self {
2288        // Pre-parse all generation expressions for caching
2289        let mut gen_expr_cache = HashMap::new();
2290        for (label, props) in &schema.properties {
2291            for (gen_col, meta) in props {
2292                if let Some(expr_str) = &meta.generation_expression
2293                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2294                {
2295                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2296                }
2297            }
2298        }
2299        Self {
2300            schema,
2301            gen_expr_cache,
2302            anon_counter: std::cell::Cell::new(0),
2303            params: HashMap::new(),
2304        }
2305    }
2306
2307    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2308    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2309        self.params = params;
2310        self
2311    }
2312
2313    /// Plan a Cypher query with no pre-bound variables.
2314    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2315        self.plan_with_scope(query, Vec::new())
2316    }
2317
2318    /// Plan a Cypher query with a set of externally pre-bound variable names.
2319    ///
2320    /// `vars` lists variable names already in scope before this query executes
2321    /// (e.g., from an enclosing Locy rule body).
2322    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2323        // Apply query rewrites before planning
2324        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2325        if Self::has_mixed_union_modes(&rewritten_query) {
2326            return Err(anyhow!(
2327                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2328            ));
2329        }
2330
2331        match rewritten_query {
2332            Query::Single(stmt) => self.plan_single(stmt, vars),
2333            Query::Union { left, right, all } => {
2334                let l = self.plan_with_scope(*left, vars.clone())?;
2335                let r = self.plan_with_scope(*right, vars)?;
2336
2337                // Validate that both sides have the same column names
2338                let left_cols = Self::extract_projection_columns(&l);
2339                let right_cols = Self::extract_projection_columns(&r);
2340
2341                if left_cols != right_cols {
2342                    return Err(anyhow!(
2343                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2344                    ));
2345                }
2346
2347                Ok(LogicalPlan::Union {
2348                    left: Box::new(l),
2349                    right: Box::new(r),
2350                    all,
2351                })
2352            }
2353            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2354            Query::Explain(inner) => {
2355                let inner_plan = self.plan_with_scope(*inner, vars)?;
2356                Ok(LogicalPlan::Explain {
2357                    plan: Box::new(inner_plan),
2358                })
2359            }
2360            Query::TimeTravel { .. } => {
2361                unreachable!("TimeTravel should be resolved at API layer before planning")
2362            }
2363        }
2364    }
2365
2366    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2367        match query {
2368            Query::Union { left, right, all } => {
2369                out.insert(*all);
2370                Self::collect_union_modes(left, out);
2371                Self::collect_union_modes(right, out);
2372            }
2373            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2374            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2375            Query::Single(_) | Query::Schema(_) => {}
2376        }
2377    }
2378
2379    fn has_mixed_union_modes(query: &Query) -> bool {
2380        let mut modes = HashSet::new();
2381        Self::collect_union_modes(query, &mut modes);
2382        modes.len() > 1
2383    }
2384
2385    fn next_anon_var(&self) -> String {
2386        let id = self.anon_counter.get();
2387        self.anon_counter.set(id + 1);
2388        format!("_anon_{}", id)
2389    }
2390
2391    /// Extract projection column names from a logical plan.
2392    /// Used for UNION column validation.
2393    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2394        match plan {
2395            LogicalPlan::Project { projections, .. } => projections
2396                .iter()
2397                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2398                .collect(),
2399            LogicalPlan::Limit { input, .. }
2400            | LogicalPlan::Sort { input, .. }
2401            | LogicalPlan::Distinct { input, .. }
2402            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2403            LogicalPlan::Union { left, right, .. } => {
2404                let left_cols = Self::extract_projection_columns(left);
2405                if left_cols.is_empty() {
2406                    Self::extract_projection_columns(right)
2407                } else {
2408                    left_cols
2409                }
2410            }
2411            LogicalPlan::Aggregate {
2412                group_by,
2413                aggregates,
2414                ..
2415            } => {
2416                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2417                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2418                cols
2419            }
2420            _ => Vec::new(),
2421        }
2422    }
2423
2424    fn plan_return_clause(
2425        &self,
2426        return_clause: &ReturnClause,
2427        plan: LogicalPlan,
2428        vars_in_scope: &[VariableInfo],
2429    ) -> Result<LogicalPlan> {
2430        let mut plan = plan;
2431        let mut group_by = Vec::new();
2432        let mut aggregates = Vec::new();
2433        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2434        let mut has_agg = false;
2435        let mut projections = Vec::new();
2436        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2437        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2438        let mut projected_aliases: HashSet<String> = HashSet::new();
2439
2440        for item in &return_clause.items {
2441            match item {
2442                ReturnItem::All => {
2443                    // RETURN * - add all user-named variables in scope
2444                    // (anonymous variables like _anon_0 are excluded)
2445                    let user_vars: Vec<_> = vars_in_scope
2446                        .iter()
2447                        .filter(|v| !v.name.starts_with("_anon_"))
2448                        .collect();
2449                    if user_vars.is_empty() {
2450                        return Err(anyhow!(
2451                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2452                        ));
2453                    }
2454                    for v in user_vars {
2455                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2456                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2457                            group_by.push(Expr::Variable(v.name.clone()));
2458                        }
2459                        projected_aliases.insert(v.name.clone());
2460                        projected_simple_reprs.insert(v.name.clone());
2461                    }
2462                }
2463                ReturnItem::Expr {
2464                    expr,
2465                    alias,
2466                    source_text,
2467                } => {
2468                    if matches!(expr, Expr::Wildcard) {
2469                        for v in vars_in_scope {
2470                            projections
2471                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2472                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2473                                group_by.push(Expr::Variable(v.name.clone()));
2474                            }
2475                            projected_aliases.insert(v.name.clone());
2476                            projected_simple_reprs.insert(v.name.clone());
2477                        }
2478                    } else {
2479                        // Validate expression variables are defined
2480                        validate_expression_variables(expr, vars_in_scope)?;
2481                        // Validate function argument types and boolean operators
2482                        validate_expression(expr, vars_in_scope)?;
2483                        // Pattern predicates are not allowed in RETURN
2484                        if contains_pattern_predicate(expr) {
2485                            return Err(anyhow!(
2486                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2487                            ));
2488                        }
2489
2490                        // Use source text as column name when no explicit alias
2491                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2492                        projections.push((expr.clone(), effective_alias));
2493                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2494                            // Bare aggregate — push directly
2495                            has_agg = true;
2496                            aggregates.push(expr.clone());
2497                            projected_aggregate_reprs.insert(expr.to_string_repr());
2498                        } else if !is_window_function(expr)
2499                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2500                        {
2501                            // Compound aggregate or expression containing aggregates —
2502                            // extract the inner bare aggregates for the Aggregate node
2503                            has_agg = true;
2504                            compound_agg_exprs.push(expr.clone());
2505                            for inner in extract_inner_aggregates(expr) {
2506                                let repr = inner.to_string_repr();
2507                                if !projected_aggregate_reprs.contains(&repr) {
2508                                    aggregates.push(inner);
2509                                    projected_aggregate_reprs.insert(repr);
2510                                }
2511                            }
2512                        } else if !group_by.contains(expr) {
2513                            group_by.push(expr.clone());
2514                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
2515                                projected_simple_reprs.insert(expr.to_string_repr());
2516                            }
2517                        }
2518
2519                        if let Some(a) = alias {
2520                            if projected_aliases.contains(a) {
2521                                return Err(anyhow!(
2522                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2523                                    a
2524                                ));
2525                            }
2526                            projected_aliases.insert(a.clone());
2527                        } else if let Expr::Variable(v) = expr {
2528                            if projected_aliases.contains(v) {
2529                                return Err(anyhow!(
2530                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2531                                    v
2532                                ));
2533                            }
2534                            projected_aliases.insert(v.clone());
2535                        }
2536                    }
2537                }
2538            }
2539        }
2540
2541        // Validate compound aggregate expressions: non-aggregate refs must be
2542        // individually present in the group_by as simple variables or properties.
2543        if has_agg {
2544            let group_by_reprs: HashSet<String> =
2545                group_by.iter().map(|e| e.to_string_repr()).collect();
2546            for expr in &compound_agg_exprs {
2547                let mut refs = Vec::new();
2548                collect_non_aggregate_refs(expr, false, &mut refs);
2549                for r in &refs {
2550                    let is_covered = match r {
2551                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
2552                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
2553                    };
2554                    if !is_covered {
2555                        return Err(anyhow!(
2556                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
2557                        ));
2558                    }
2559                }
2560            }
2561        }
2562
2563        if has_agg {
2564            plan = LogicalPlan::Aggregate {
2565                input: Box::new(plan),
2566                group_by,
2567                aggregates,
2568            };
2569        }
2570
2571        let mut window_exprs = Vec::new();
2572        for (expr, _) in &projections {
2573            Self::collect_window_functions(expr, &mut window_exprs);
2574        }
2575
2576        if let Some(order_by) = &return_clause.order_by {
2577            for item in order_by {
2578                Self::collect_window_functions(&item.expr, &mut window_exprs);
2579            }
2580        }
2581
2582        let has_window_exprs = !window_exprs.is_empty();
2583
2584        if has_window_exprs {
2585            // Before creating the Window node, we need to ensure all properties
2586            // referenced by window functions are available. Create a Project node
2587            // that loads these properties.
2588            let mut props_needed_for_window: Vec<Expr> = Vec::new();
2589            for window_expr in &window_exprs {
2590                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
2591            }
2592
2593            // Also include non-window expressions from projections that might be needed
2594            // Preserve qualified names (e.g., "e.salary") as aliases for properties
2595            let non_window_projections: Vec<_> = projections
2596                .iter()
2597                .filter_map(|(expr, alias)| {
2598                    // Keep expressions that don't have window_spec
2599                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
2600                        window_spec.is_none()
2601                    } else {
2602                        true
2603                    };
2604
2605                    if keep {
2606                        // For property references, use the qualified name as alias
2607                        let new_alias = if matches!(expr, Expr::Property(..)) {
2608                            Some(expr.to_string_repr())
2609                        } else {
2610                            alias.clone()
2611                        };
2612                        Some((expr.clone(), new_alias))
2613                    } else {
2614                        None
2615                    }
2616                })
2617                .collect();
2618
2619            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
2620                let mut intermediate_projections = non_window_projections;
2621                // Add any additional property references needed by window functions
2622                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
2623                for prop in &props_needed_for_window {
2624                    if !intermediate_projections
2625                        .iter()
2626                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
2627                    {
2628                        let qualified_name = prop.to_string_repr();
2629                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
2630                    }
2631                }
2632
2633                if !intermediate_projections.is_empty() {
2634                    plan = LogicalPlan::Project {
2635                        input: Box::new(plan),
2636                        projections: intermediate_projections,
2637                    };
2638                }
2639            }
2640
2641            // Transform property expressions in window functions to use qualified variable names
2642            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
2643            let transformed_window_exprs: Vec<Expr> = window_exprs
2644                .into_iter()
2645                .map(Self::transform_window_expr_properties)
2646                .collect();
2647
2648            plan = LogicalPlan::Window {
2649                input: Box::new(plan),
2650                window_exprs: transformed_window_exprs,
2651            };
2652        }
2653
2654        if let Some(order_by) = &return_clause.order_by {
2655            let alias_exprs: HashMap<String, Expr> = projections
2656                .iter()
2657                .filter_map(|(expr, alias)| {
2658                    alias.as_ref().map(|a| {
2659                        // ORDER BY is planned before the final RETURN projection.
2660                        // In aggregate contexts, aliases must resolve to the
2661                        // post-aggregate output columns, not raw aggregate calls.
2662                        let rewritten = if has_agg && !has_window_exprs {
2663                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
2664                                Expr::Variable(aggregate_column_name(expr))
2665                            } else if is_compound_aggregate(expr)
2666                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
2667                            {
2668                                replace_aggregates_with_columns(expr)
2669                            } else {
2670                                Expr::Variable(expr.to_string_repr())
2671                            }
2672                        } else {
2673                            expr.clone()
2674                        };
2675                        (a.clone(), rewritten)
2676                    })
2677                })
2678                .collect();
2679
2680            // Build an extended scope that includes RETURN aliases so ORDER BY
2681            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
2682            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
2683                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
2684                // Keep aliases and directly returned variables in scope.
2685                let mut scope = Vec::new();
2686                for (expr, alias) in &projections {
2687                    if let Some(a) = alias
2688                        && !is_var_in_scope(&scope, a)
2689                    {
2690                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
2691                    }
2692                    if let Expr::Variable(v) = expr
2693                        && !is_var_in_scope(&scope, v)
2694                    {
2695                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
2696                    }
2697                }
2698                scope
2699            } else {
2700                let mut scope = vars_in_scope.to_vec();
2701                for (expr, alias) in &projections {
2702                    if let Some(a) = alias
2703                        && !is_var_in_scope(&scope, a)
2704                    {
2705                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
2706                    } else if let Expr::Variable(v) = expr
2707                        && !is_var_in_scope(&scope, v)
2708                    {
2709                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
2710                    }
2711                }
2712                scope
2713            };
2714            // Validate ORDER BY expressions against the extended scope
2715            for item in order_by {
2716                // DISTINCT allows ORDER BY on the same projected expression
2717                // even when underlying variables are not otherwise visible.
2718                let matches_projected_expr = return_clause.distinct
2719                    && projections
2720                        .iter()
2721                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
2722                if !matches_projected_expr {
2723                    validate_expression_variables(&item.expr, &order_by_scope)?;
2724                    validate_expression(&item.expr, &order_by_scope)?;
2725                }
2726                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
2727                if has_aggregate_in_item && !has_agg {
2728                    return Err(anyhow!(
2729                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
2730                    ));
2731                }
2732                if has_agg && has_aggregate_in_item {
2733                    validate_with_order_by_aggregate_item(
2734                        &item.expr,
2735                        &projected_aggregate_reprs,
2736                        &projected_simple_reprs,
2737                        &projected_aliases,
2738                    )?;
2739                }
2740            }
2741            let rewritten_order_by: Vec<SortItem> = order_by
2742                .iter()
2743                .map(|item| SortItem {
2744                    expr: {
2745                        let mut rewritten =
2746                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
2747                        if has_agg && !has_window_exprs {
2748                            rewritten = replace_aggregates_with_columns(&rewritten);
2749                        }
2750                        rewritten
2751                    },
2752                    ascending: item.ascending,
2753                })
2754                .collect();
2755            plan = LogicalPlan::Sort {
2756                input: Box::new(plan),
2757                order_by: rewritten_order_by,
2758            };
2759        }
2760
2761        if return_clause.skip.is_some() || return_clause.limit.is_some() {
2762            let skip = return_clause
2763                .skip
2764                .as_ref()
2765                .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
2766                .transpose()?
2767                .flatten();
2768            let fetch = return_clause
2769                .limit
2770                .as_ref()
2771                .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
2772                .transpose()?
2773                .flatten();
2774
2775            plan = LogicalPlan::Limit {
2776                input: Box::new(plan),
2777                skip,
2778                fetch,
2779            };
2780        }
2781
2782        if !projections.is_empty() {
2783            // If we created an Aggregate or Window node, we need to adjust the final projections
2784            // to reference aggregate/window function results as columns instead of re-evaluating them
2785            let final_projections = if has_agg || has_window_exprs {
2786                projections
2787                    .into_iter()
2788                    .map(|(expr, alias)| {
2789                        // Check if this expression is an aggregate function
2790                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
2791                        {
2792                            // Bare aggregate — replace with column reference
2793                            let col_name = aggregate_column_name(&expr);
2794                            (Expr::Variable(col_name), alias)
2795                        } else if !has_window_exprs
2796                            && (is_compound_aggregate(&expr)
2797                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
2798                        {
2799                            // Compound aggregate — replace inner aggregates with
2800                            // column references, keep outer expression for Project
2801                            (replace_aggregates_with_columns(&expr), alias)
2802                        }
2803                        // For grouped RETURN projections, reference the pre-computed
2804                        // group-by output column instead of re-evaluating the expression
2805                        // against the aggregate schema (which no longer has original vars).
2806                        else if has_agg
2807                            && !has_window_exprs
2808                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
2809                        {
2810                            (Expr::Variable(expr.to_string_repr()), alias)
2811                        }
2812                        // Check if this expression is a window function
2813                        else if let Expr::FunctionCall {
2814                            window_spec: Some(_),
2815                            ..
2816                        } = &expr
2817                        {
2818                            // Replace window function with a column reference to its result
2819                            // The column name in the Window output is the full expression string
2820                            let window_col_name = expr.to_string_repr();
2821                            // Keep the original alias for the final output
2822                            (Expr::Variable(window_col_name), alias)
2823                        } else {
2824                            (expr, alias)
2825                        }
2826                    })
2827                    .collect()
2828            } else {
2829                projections
2830            };
2831
2832            plan = LogicalPlan::Project {
2833                input: Box::new(plan),
2834                projections: final_projections,
2835            };
2836        }
2837
2838        if return_clause.distinct {
2839            plan = LogicalPlan::Distinct {
2840                input: Box::new(plan),
2841            };
2842        }
2843
2844        Ok(plan)
2845    }
2846
2847    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
2848        let typed_vars: Vec<VariableInfo> = initial_vars
2849            .into_iter()
2850            .map(|name| VariableInfo::new(name, VariableType::Imported))
2851            .collect();
2852        self.plan_single_typed(query, typed_vars)
2853    }
2854
2855    /// Rewrite a query then plan it, preserving typed variable scope when possible.
2856    ///
2857    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
2858    /// variable info through and avoid false type-conflict errors in subqueries.
2859    /// For unions and other compound queries, falls back to `plan_with_scope`.
2860    fn rewrite_and_plan_typed(
2861        &self,
2862        query: Query,
2863        typed_vars: &[VariableInfo],
2864    ) -> Result<LogicalPlan> {
2865        let rewritten = crate::query::rewrite::rewrite_query(query)?;
2866        match rewritten {
2867            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
2868            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
2869        }
2870    }
2871
2872    fn plan_single_typed(
2873        &self,
2874        query: Statement,
2875        initial_vars: Vec<VariableInfo>,
2876    ) -> Result<LogicalPlan> {
2877        let mut plan = LogicalPlan::Empty;
2878
2879        if !initial_vars.is_empty() {
2880            // Project bound variables from outer scope as parameters.
2881            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
2882            // Use Parameter expressions to read from params, not Variable which would read from input row.
2883            let projections = initial_vars
2884                .iter()
2885                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
2886                .collect();
2887            plan = LogicalPlan::Project {
2888                input: Box::new(plan),
2889                projections,
2890            };
2891        }
2892
2893        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
2894        // Track variables introduced by CREATE clauses so we can distinguish
2895        // MATCH-introduced variables (which cannot be re-created as bare nodes)
2896        // from CREATE-introduced variables (which can be referenced as bare nodes).
2897        let mut create_introduced_vars: HashSet<String> = HashSet::new();
2898        // Track variables targeted by DELETE so we can reject property/label
2899        // access on deleted entities in subsequent RETURN clauses.
2900        let mut deleted_vars: HashSet<String> = HashSet::new();
2901
2902        let clause_count = query.clauses.len();
2903        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
2904            match clause {
2905                Clause::Match(match_clause) => {
2906                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
2907                }
2908                Clause::Unwind(unwind) => {
2909                    plan = LogicalPlan::Unwind {
2910                        input: Box::new(plan),
2911                        expr: unwind.expr.clone(),
2912                        variable: unwind.variable.clone(),
2913                    };
2914                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
2915                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
2916                }
2917                Clause::Call(call_clause) => {
2918                    match &call_clause.kind {
2919                        CallKind::Procedure {
2920                            procedure,
2921                            arguments,
2922                        } => {
2923                            // Validate that procedure arguments don't contain aggregation functions
2924                            for arg in arguments {
2925                                if contains_aggregate_recursive(arg) {
2926                                    return Err(anyhow!(
2927                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
2928                                    ));
2929                                }
2930                            }
2931
2932                            let has_yield_star = call_clause.yield_items.len() == 1
2933                                && call_clause.yield_items[0].name == "*"
2934                                && call_clause.yield_items[0].alias.is_none();
2935                            if has_yield_star && clause_idx + 1 < clause_count {
2936                                return Err(anyhow!(
2937                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
2938                                ));
2939                            }
2940
2941                            // Validate for duplicate yield names (VariableAlreadyBound)
2942                            let mut yield_names = Vec::new();
2943                            for item in &call_clause.yield_items {
2944                                if item.name == "*" {
2945                                    continue;
2946                                }
2947                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
2948                                if yield_names.contains(output_name) {
2949                                    return Err(anyhow!(
2950                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
2951                                        output_name
2952                                    ));
2953                                }
2954                                // Check against existing scope (in-query CALL must not shadow)
2955                                if clause_idx > 0
2956                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
2957                                {
2958                                    return Err(anyhow!(
2959                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
2960                                        output_name
2961                                    ));
2962                                }
2963                                yield_names.push(output_name.clone());
2964                            }
2965
2966                            let mut yields = Vec::new();
2967                            for item in &call_clause.yield_items {
2968                                if item.name == "*" {
2969                                    continue;
2970                                }
2971                                yields.push((item.name.clone(), item.alias.clone()));
2972                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
2973                                // Use Imported because procedure return types are unknown
2974                                // at plan time (could be nodes, edges, or scalars)
2975                                add_var_to_scope(
2976                                    &mut vars_in_scope,
2977                                    var_name,
2978                                    VariableType::Imported,
2979                                )?;
2980                            }
2981                            let proc_plan = LogicalPlan::ProcedureCall {
2982                                procedure_name: procedure.clone(),
2983                                arguments: arguments.clone(),
2984                                yield_items: yields.clone(),
2985                            };
2986
2987                            if matches!(plan, LogicalPlan::Empty) {
2988                                // Standalone CALL (first clause) — use directly
2989                                plan = proc_plan;
2990                            } else if yields.is_empty() {
2991                                // In-query CALL with no YIELD (void procedure):
2992                                // preserve the input rows unchanged
2993                            } else {
2994                                // In-query CALL with YIELD: cross-join input × procedure output
2995                                plan = LogicalPlan::Apply {
2996                                    input: Box::new(plan),
2997                                    subquery: Box::new(proc_plan),
2998                                    input_filter: None,
2999                                };
3000                            }
3001                        }
3002                        CallKind::Subquery(query) => {
3003                            let subquery_plan =
3004                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
3005
3006                            // Extract variables from subquery RETURN clause
3007                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
3008
3009                            // Add new variables to scope (as Scalar since they come from subquery projection)
3010                            for var in subquery_vars {
3011                                if !is_var_in_scope(&vars_in_scope, &var) {
3012                                    add_var_to_scope(
3013                                        &mut vars_in_scope,
3014                                        &var,
3015                                        VariableType::Scalar,
3016                                    )?;
3017                                }
3018                            }
3019
3020                            plan = LogicalPlan::SubqueryCall {
3021                                input: Box::new(plan),
3022                                subquery: Box::new(subquery_plan),
3023                            };
3024                        }
3025                    }
3026                }
3027                Clause::Merge(merge_clause) => {
3028                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
3029
3030                    plan = LogicalPlan::Merge {
3031                        input: Box::new(plan),
3032                        pattern: merge_clause.pattern.clone(),
3033                        on_match: Some(SetClause {
3034                            items: merge_clause.on_match.clone(),
3035                        }),
3036                        on_create: Some(SetClause {
3037                            items: merge_clause.on_create.clone(),
3038                        }),
3039                    };
3040
3041                    for path in &merge_clause.pattern.paths {
3042                        if let Some(path_var) = &path.variable
3043                            && !path_var.is_empty()
3044                            && !is_var_in_scope(&vars_in_scope, path_var)
3045                        {
3046                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3047                        }
3048                        for element in &path.elements {
3049                            if let PatternElement::Node(n) = element {
3050                                if let Some(v) = &n.variable
3051                                    && !is_var_in_scope(&vars_in_scope, v)
3052                                {
3053                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3054                                }
3055                            } else if let PatternElement::Relationship(r) = element
3056                                && let Some(v) = &r.variable
3057                                && !is_var_in_scope(&vars_in_scope, v)
3058                            {
3059                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3060                            }
3061                        }
3062                    }
3063                }
3064                Clause::Create(create_clause) => {
3065                    // Validate CREATE patterns:
3066                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3067                    // - Bare nodes (v) are "references" if bound, "creations" if not
3068                    // - Relationships are always creations - can't rebind
3069                    // - Within CREATE, each new variable can only be defined once
3070                    // - Variables used in properties must be defined
3071                    let mut create_vars: Vec<&str> = Vec::new();
3072                    for path in &create_clause.pattern.paths {
3073                        let is_standalone_node = path.elements.len() == 1;
3074                        for element in &path.elements {
3075                            match element {
3076                                PatternElement::Node(n) => {
3077                                    validate_property_variables(
3078                                        &n.properties,
3079                                        &vars_in_scope,
3080                                        &create_vars,
3081                                    )?;
3082
3083                                    if let Some(v) = n.variable.as_deref()
3084                                        && !v.is_empty()
3085                                    {
3086                                        // A node is a "creation" if it has labels or properties
3087                                        let is_creation =
3088                                            !n.labels.is_empty() || n.properties.is_some();
3089
3090                                        if is_creation {
3091                                            check_not_already_bound(
3092                                                v,
3093                                                &vars_in_scope,
3094                                                &create_vars,
3095                                            )?;
3096                                            create_vars.push(v);
3097                                        } else if is_standalone_node
3098                                            && is_var_in_scope(&vars_in_scope, v)
3099                                            && !create_introduced_vars.contains(v)
3100                                        {
3101                                            // Standalone bare node referencing a variable from a
3102                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3103                                            // Bare nodes used as relationship endpoints
3104                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3105                                            return Err(anyhow!(
3106                                                "SyntaxError: VariableAlreadyBound - '{}'",
3107                                                v
3108                                            ));
3109                                        } else if !create_vars.contains(&v) {
3110                                            // New bare variable — register it
3111                                            create_vars.push(v);
3112                                        }
3113                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3114                                    }
3115                                }
3116                                PatternElement::Relationship(r) => {
3117                                    validate_property_variables(
3118                                        &r.properties,
3119                                        &vars_in_scope,
3120                                        &create_vars,
3121                                    )?;
3122
3123                                    if let Some(v) = r.variable.as_deref()
3124                                        && !v.is_empty()
3125                                    {
3126                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3127                                        create_vars.push(v);
3128                                    }
3129
3130                                    // Validate relationship constraints for CREATE
3131                                    if r.types.len() != 1 {
3132                                        return Err(anyhow!(
3133                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3134                                        ));
3135                                    }
3136                                    if r.direction == Direction::Both {
3137                                        return Err(anyhow!(
3138                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3139                                        ));
3140                                    }
3141                                    if r.range.is_some() {
3142                                        return Err(anyhow!(
3143                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3144                                        ));
3145                                    }
3146                                }
3147                                PatternElement::Parenthesized { .. } => {}
3148                            }
3149                        }
3150                    }
3151
3152                    // Batch consecutive CREATEs to avoid deep recursion
3153                    match &mut plan {
3154                        LogicalPlan::CreateBatch { patterns, .. } => {
3155                            // Append to existing batch
3156                            patterns.push(create_clause.pattern.clone());
3157                        }
3158                        LogicalPlan::Create { input, pattern } => {
3159                            // Convert single Create to CreateBatch with both patterns
3160                            let first_pattern = pattern.clone();
3161                            plan = LogicalPlan::CreateBatch {
3162                                input: input.clone(),
3163                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3164                            };
3165                        }
3166                        _ => {
3167                            // Start new Create (may become batch if more CREATEs follow)
3168                            plan = LogicalPlan::Create {
3169                                input: Box::new(plan),
3170                                pattern: create_clause.pattern.clone(),
3171                            };
3172                        }
3173                    }
3174                    // Add variables from created nodes and relationships to scope
3175                    for path in &create_clause.pattern.paths {
3176                        for element in &path.elements {
3177                            match element {
3178                                PatternElement::Node(n) => {
3179                                    if let Some(var) = &n.variable
3180                                        && !var.is_empty()
3181                                    {
3182                                        create_introduced_vars.insert(var.clone());
3183                                        add_var_to_scope(
3184                                            &mut vars_in_scope,
3185                                            var,
3186                                            VariableType::Node,
3187                                        )?;
3188                                    }
3189                                }
3190                                PatternElement::Relationship(r) => {
3191                                    if let Some(var) = &r.variable
3192                                        && !var.is_empty()
3193                                    {
3194                                        create_introduced_vars.insert(var.clone());
3195                                        add_var_to_scope(
3196                                            &mut vars_in_scope,
3197                                            var,
3198                                            VariableType::Edge,
3199                                        )?;
3200                                    }
3201                                }
3202                                PatternElement::Parenthesized { .. } => {
3203                                    // Skip for now - not commonly used in CREATE
3204                                }
3205                            }
3206                        }
3207                    }
3208                }
3209                Clause::Set(set_clause) => {
3210                    // Validate SET value expressions
3211                    for item in &set_clause.items {
3212                        match item {
3213                            SetItem::Property { value, .. }
3214                            | SetItem::Variable { value, .. }
3215                            | SetItem::VariablePlus { value, .. } => {
3216                                validate_expression_variables(value, &vars_in_scope)?;
3217                                validate_expression(value, &vars_in_scope)?;
3218                                if contains_pattern_predicate(value) {
3219                                    return Err(anyhow!(
3220                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3221                                    ));
3222                                }
3223                            }
3224                            SetItem::Labels { .. } => {}
3225                        }
3226                    }
3227                    plan = LogicalPlan::Set {
3228                        input: Box::new(plan),
3229                        items: set_clause.items.clone(),
3230                    };
3231                }
3232                Clause::Remove(remove_clause) => {
3233                    plan = LogicalPlan::Remove {
3234                        input: Box::new(plan),
3235                        items: remove_clause.items.clone(),
3236                    };
3237                }
3238                Clause::Delete(delete_clause) => {
3239                    // Validate DELETE targets
3240                    for item in &delete_clause.items {
3241                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3242                        if matches!(item, Expr::LabelCheck { .. }) {
3243                            return Err(anyhow!(
3244                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3245                            ));
3246                        }
3247                        let vars_used = collect_expr_variables(item);
3248                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3249                        if vars_used.is_empty() {
3250                            return Err(anyhow!(
3251                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3252                            ));
3253                        }
3254                        for var in &vars_used {
3255                            // Check if variable is defined
3256                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3257                                return Err(anyhow!(
3258                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3259                                    var
3260                                ));
3261                            }
3262                        }
3263                        // Strict type check only for simple variable references —
3264                        // complex expressions (property access, array index, etc.)
3265                        // may resolve to a node/edge at runtime even if the base
3266                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3267                        if let Expr::Variable(name) = item
3268                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3269                            && matches!(
3270                                info.var_type,
3271                                VariableType::Scalar | VariableType::ScalarLiteral
3272                            )
3273                        {
3274                            return Err(anyhow!(
3275                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3276                                name
3277                            ));
3278                        }
3279                    }
3280                    // Track deleted variables for later validation
3281                    for item in &delete_clause.items {
3282                        if let Expr::Variable(name) = item {
3283                            deleted_vars.insert(name.clone());
3284                        }
3285                    }
3286                    plan = LogicalPlan::Delete {
3287                        input: Box::new(plan),
3288                        items: delete_clause.items.clone(),
3289                        detach: delete_clause.detach,
3290                    };
3291                }
3292                Clause::With(with_clause) => {
3293                    let (new_plan, new_vars) =
3294                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3295                    plan = new_plan;
3296                    vars_in_scope = new_vars;
3297                }
3298                Clause::WithRecursive(with_recursive) => {
3299                    // Plan the recursive CTE
3300                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3301                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3302                    add_var_to_scope(
3303                        &mut vars_in_scope,
3304                        &with_recursive.name,
3305                        VariableType::Scalar,
3306                    )?;
3307                }
3308                Clause::Return(return_clause) => {
3309                    // Check for property/label access on deleted entities
3310                    if !deleted_vars.is_empty() {
3311                        for item in &return_clause.items {
3312                            if let ReturnItem::Expr { expr, .. } = item {
3313                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3314                            }
3315                        }
3316                    }
3317                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3318                } // All Clause variants are handled above - no catch-all needed
3319            }
3320        }
3321
3322        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3323        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3324        // If RETURN was used, the plan will have been wrapped in Project, so we only
3325        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3326        let plan = match &plan {
3327            LogicalPlan::Create { .. }
3328            | LogicalPlan::CreateBatch { .. }
3329            | LogicalPlan::Delete { .. }
3330            | LogicalPlan::Set { .. }
3331            | LogicalPlan::Remove { .. }
3332            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3333                input: Box::new(plan),
3334                skip: None,
3335                fetch: Some(0),
3336            },
3337            _ => plan,
3338        };
3339
3340        Ok(plan)
3341    }
3342
3343    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3344        match expr {
3345            Expr::Property(_, _) => {
3346                if !collected
3347                    .iter()
3348                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3349                {
3350                    collected.push(expr.clone());
3351                }
3352            }
3353            Expr::Variable(_) => {
3354                // Variables are already available, don't need to project them
3355            }
3356            Expr::BinaryOp { left, right, .. } => {
3357                Self::collect_properties_from_expr(left, collected);
3358                Self::collect_properties_from_expr(right, collected);
3359            }
3360            Expr::FunctionCall {
3361                args, window_spec, ..
3362            } => {
3363                for arg in args {
3364                    Self::collect_properties_from_expr(arg, collected);
3365                }
3366                if let Some(spec) = window_spec {
3367                    for partition_expr in &spec.partition_by {
3368                        Self::collect_properties_from_expr(partition_expr, collected);
3369                    }
3370                    for sort_item in &spec.order_by {
3371                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3372                    }
3373                }
3374            }
3375            Expr::List(items) => {
3376                for item in items {
3377                    Self::collect_properties_from_expr(item, collected);
3378                }
3379            }
3380            Expr::UnaryOp { expr: e, .. }
3381            | Expr::IsNull(e)
3382            | Expr::IsNotNull(e)
3383            | Expr::IsUnique(e) => {
3384                Self::collect_properties_from_expr(e, collected);
3385            }
3386            Expr::Case {
3387                expr,
3388                when_then,
3389                else_expr,
3390            } => {
3391                if let Some(e) = expr {
3392                    Self::collect_properties_from_expr(e, collected);
3393                }
3394                for (w, t) in when_then {
3395                    Self::collect_properties_from_expr(w, collected);
3396                    Self::collect_properties_from_expr(t, collected);
3397                }
3398                if let Some(e) = else_expr {
3399                    Self::collect_properties_from_expr(e, collected);
3400                }
3401            }
3402            Expr::In { expr, list } => {
3403                Self::collect_properties_from_expr(expr, collected);
3404                Self::collect_properties_from_expr(list, collected);
3405            }
3406            Expr::ArrayIndex { array, index } => {
3407                Self::collect_properties_from_expr(array, collected);
3408                Self::collect_properties_from_expr(index, collected);
3409            }
3410            Expr::ArraySlice { array, start, end } => {
3411                Self::collect_properties_from_expr(array, collected);
3412                if let Some(s) = start {
3413                    Self::collect_properties_from_expr(s, collected);
3414                }
3415                if let Some(e) = end {
3416                    Self::collect_properties_from_expr(e, collected);
3417                }
3418            }
3419            _ => {}
3420        }
3421    }
3422
3423    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3424        if let Expr::FunctionCall { window_spec, .. } = expr {
3425            // Collect any function with a window spec (OVER clause)
3426            if window_spec.is_some() {
3427                if !collected
3428                    .iter()
3429                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3430                {
3431                    collected.push(expr.clone());
3432                }
3433                return;
3434            }
3435        }
3436
3437        match expr {
3438            Expr::BinaryOp { left, right, .. } => {
3439                Self::collect_window_functions(left, collected);
3440                Self::collect_window_functions(right, collected);
3441            }
3442            Expr::FunctionCall { args, .. } => {
3443                for arg in args {
3444                    Self::collect_window_functions(arg, collected);
3445                }
3446            }
3447            Expr::List(items) => {
3448                for i in items {
3449                    Self::collect_window_functions(i, collected);
3450                }
3451            }
3452            Expr::Map(items) => {
3453                for (_, i) in items {
3454                    Self::collect_window_functions(i, collected);
3455                }
3456            }
3457            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
3458                Self::collect_window_functions(e, collected);
3459            }
3460            Expr::Case {
3461                expr,
3462                when_then,
3463                else_expr,
3464            } => {
3465                if let Some(e) = expr {
3466                    Self::collect_window_functions(e, collected);
3467                }
3468                for (w, t) in when_then {
3469                    Self::collect_window_functions(w, collected);
3470                    Self::collect_window_functions(t, collected);
3471                }
3472                if let Some(e) = else_expr {
3473                    Self::collect_window_functions(e, collected);
3474                }
3475            }
3476            Expr::Reduce {
3477                init, list, expr, ..
3478            } => {
3479                Self::collect_window_functions(init, collected);
3480                Self::collect_window_functions(list, collected);
3481                Self::collect_window_functions(expr, collected);
3482            }
3483            Expr::Quantifier {
3484                list, predicate, ..
3485            } => {
3486                Self::collect_window_functions(list, collected);
3487                Self::collect_window_functions(predicate, collected);
3488            }
3489            Expr::In { expr, list } => {
3490                Self::collect_window_functions(expr, collected);
3491                Self::collect_window_functions(list, collected);
3492            }
3493            Expr::ArrayIndex { array, index } => {
3494                Self::collect_window_functions(array, collected);
3495                Self::collect_window_functions(index, collected);
3496            }
3497            Expr::ArraySlice { array, start, end } => {
3498                Self::collect_window_functions(array, collected);
3499                if let Some(s) = start {
3500                    Self::collect_window_functions(s, collected);
3501                }
3502                if let Some(e) = end {
3503                    Self::collect_window_functions(e, collected);
3504                }
3505            }
3506            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
3507            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
3508            _ => {}
3509        }
3510    }
3511
3512    /// Transform property expressions in manual window functions to use qualified variable names.
3513    ///
3514    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
3515    /// so the executor can look up values directly from the row HashMap after the
3516    /// intermediate projection has materialized these properties with qualified names.
3517    ///
3518    /// Transforms ALL window functions (both manual and aggregate).
3519    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
3520    fn transform_window_expr_properties(expr: Expr) -> Expr {
3521        let Expr::FunctionCall {
3522            name,
3523            args,
3524            window_spec: Some(spec),
3525            distinct,
3526        } = expr
3527        else {
3528            return expr;
3529        };
3530
3531        // Transform arguments for ALL window functions
3532        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
3533        let transformed_args = args
3534            .into_iter()
3535            .map(Self::transform_property_to_variable)
3536            .collect();
3537
3538        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
3539        let transformed_partition_by = spec
3540            .partition_by
3541            .into_iter()
3542            .map(Self::transform_property_to_variable)
3543            .collect();
3544
3545        let transformed_order_by = spec
3546            .order_by
3547            .into_iter()
3548            .map(|item| SortItem {
3549                expr: Self::transform_property_to_variable(item.expr),
3550                ascending: item.ascending,
3551            })
3552            .collect();
3553
3554        Expr::FunctionCall {
3555            name,
3556            args: transformed_args,
3557            window_spec: Some(WindowSpec {
3558                partition_by: transformed_partition_by,
3559                order_by: transformed_order_by,
3560            }),
3561            distinct,
3562        }
3563    }
3564
3565    /// Transform a property expression to a variable expression with qualified name.
3566    ///
3567    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
3568    fn transform_property_to_variable(expr: Expr) -> Expr {
3569        let Expr::Property(base, prop) = expr else {
3570            return expr;
3571        };
3572
3573        match *base {
3574            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
3575            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
3576        }
3577    }
3578
3579    /// Transform VALID_AT macro into function call
3580    ///
3581    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
3582    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
3583    fn transform_valid_at_to_function(expr: Expr) -> Expr {
3584        match expr {
3585            Expr::ValidAt {
3586                entity,
3587                timestamp,
3588                start_prop,
3589                end_prop,
3590            } => {
3591                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
3592                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
3593
3594                Expr::FunctionCall {
3595                    name: "uni.temporal.validAt".to_string(),
3596                    args: vec![
3597                        Self::transform_valid_at_to_function(*entity),
3598                        Expr::Literal(CypherLiteral::String(start)),
3599                        Expr::Literal(CypherLiteral::String(end)),
3600                        Self::transform_valid_at_to_function(*timestamp),
3601                    ],
3602                    distinct: false,
3603                    window_spec: None,
3604                }
3605            }
3606            // Recursively transform nested expressions
3607            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
3608                left: Box::new(Self::transform_valid_at_to_function(*left)),
3609                op,
3610                right: Box::new(Self::transform_valid_at_to_function(*right)),
3611            },
3612            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
3613                op,
3614                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
3615            },
3616            Expr::FunctionCall {
3617                name,
3618                args,
3619                distinct,
3620                window_spec,
3621            } => Expr::FunctionCall {
3622                name,
3623                args: args
3624                    .into_iter()
3625                    .map(Self::transform_valid_at_to_function)
3626                    .collect(),
3627                distinct,
3628                window_spec,
3629            },
3630            Expr::Property(base, prop) => {
3631                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
3632            }
3633            Expr::List(items) => Expr::List(
3634                items
3635                    .into_iter()
3636                    .map(Self::transform_valid_at_to_function)
3637                    .collect(),
3638            ),
3639            Expr::In { expr, list } => Expr::In {
3640                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
3641                list: Box::new(Self::transform_valid_at_to_function(*list)),
3642            },
3643            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
3644            Expr::IsNotNull(e) => {
3645                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
3646            }
3647            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
3648            // Other cases: return as-is
3649            other => other,
3650        }
3651    }
3652
3653    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
3654    fn plan_match_clause(
3655        &self,
3656        match_clause: &MatchClause,
3657        plan: LogicalPlan,
3658        vars_in_scope: &mut Vec<VariableInfo>,
3659    ) -> Result<LogicalPlan> {
3660        let mut plan = plan;
3661
3662        if match_clause.pattern.paths.is_empty() {
3663            return Err(anyhow!("Empty pattern"));
3664        }
3665
3666        // Track variables introduced by this OPTIONAL MATCH
3667        let vars_before_pattern = vars_in_scope.len();
3668
3669        for path in &match_clause.pattern.paths {
3670            if let Some(mode) = &path.shortest_path_mode {
3671                plan =
3672                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
3673            } else {
3674                plan = self.plan_path(
3675                    path,
3676                    plan,
3677                    vars_in_scope,
3678                    match_clause.optional,
3679                    vars_before_pattern,
3680                )?;
3681            }
3682        }
3683
3684        // Collect variables introduced by this OPTIONAL MATCH pattern
3685        let optional_vars: HashSet<String> = if match_clause.optional {
3686            vars_in_scope[vars_before_pattern..]
3687                .iter()
3688                .map(|v| v.name.clone())
3689                .collect()
3690        } else {
3691            HashSet::new()
3692        };
3693
3694        // Handle WHERE clause with vector_similarity and predicate pushdown
3695        if let Some(predicate) = &match_clause.where_clause {
3696            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
3697        }
3698
3699        Ok(plan)
3700    }
3701
3702    /// Plan a shortestPath pattern.
3703    fn plan_shortest_path(
3704        &self,
3705        path: &PathPattern,
3706        plan: LogicalPlan,
3707        vars_in_scope: &mut Vec<VariableInfo>,
3708        mode: &ShortestPathMode,
3709        _vars_before_pattern: usize,
3710    ) -> Result<LogicalPlan> {
3711        let mut plan = plan;
3712        let elements = &path.elements;
3713
3714        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
3715        if elements.len() < 3 || elements.len().is_multiple_of(2) {
3716            return Err(anyhow!(
3717                "shortestPath requires at least one relationship: (a)-[*]->(b)"
3718            ));
3719        }
3720
3721        let source_node = match &elements[0] {
3722            PatternElement::Node(n) => n,
3723            _ => return Err(anyhow!("ShortestPath must start with a node")),
3724        };
3725        let rel = match &elements[1] {
3726            PatternElement::Relationship(r) => r,
3727            _ => {
3728                return Err(anyhow!(
3729                    "ShortestPath middle element must be a relationship"
3730                ));
3731            }
3732        };
3733        let target_node = match &elements[2] {
3734            PatternElement::Node(n) => n,
3735            _ => return Err(anyhow!("ShortestPath must end with a node")),
3736        };
3737
3738        let source_var = source_node
3739            .variable
3740            .clone()
3741            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
3742        let target_var = target_node
3743            .variable
3744            .clone()
3745            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
3746        let path_var = path
3747            .variable
3748            .clone()
3749            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
3750
3751        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
3752        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
3753
3754        // Plan source node if not bound
3755        if !source_bound {
3756            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
3757        } else if let Some(prop_filter) =
3758            self.properties_to_expr(&source_var, &source_node.properties)
3759        {
3760            plan = LogicalPlan::Filter {
3761                input: Box::new(plan),
3762                predicate: prop_filter,
3763                optional_variables: HashSet::new(),
3764            };
3765        }
3766
3767        // Plan target node if not bound
3768        let target_label_id = if !target_bound {
3769            // Use first label for target_label_id
3770            let target_label_name = target_node
3771                .labels
3772                .first()
3773                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
3774            let target_label_meta = self
3775                .schema
3776                .get_label_case_insensitive(target_label_name)
3777                .ok_or_else(|| anyhow!("Label {} not found", target_label_name))?;
3778
3779            let target_scan = LogicalPlan::Scan {
3780                label_id: target_label_meta.id,
3781                labels: target_node.labels.clone(),
3782                variable: target_var.clone(),
3783                filter: self.properties_to_expr(&target_var, &target_node.properties),
3784                optional: false,
3785            };
3786
3787            plan = Self::join_with_plan(plan, target_scan);
3788            target_label_meta.id
3789        } else {
3790            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
3791            {
3792                plan = LogicalPlan::Filter {
3793                    input: Box::new(plan),
3794                    predicate: prop_filter,
3795                    optional_variables: HashSet::new(),
3796                };
3797            }
3798            0 // Wildcard for already-bound target
3799        };
3800
3801        // Add ShortestPath operator
3802        let edge_type_ids = if rel.types.is_empty() {
3803            // If no type specified, fetch all edge types (both schema and schemaless)
3804            self.schema.all_edge_type_ids()
3805        } else {
3806            let mut ids = Vec::new();
3807            for type_name in &rel.types {
3808                let edge_meta = self
3809                    .schema
3810                    .edge_types
3811                    .get(type_name)
3812                    .ok_or_else(|| anyhow!("Edge type {} not found", type_name))?;
3813                ids.push(edge_meta.id);
3814            }
3815            ids
3816        };
3817
3818        // Extract hop constraints from relationship pattern
3819        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
3820        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
3821
3822        let sp_plan = match mode {
3823            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
3824                input: Box::new(plan),
3825                edge_type_ids,
3826                direction: rel.direction.clone(),
3827                source_variable: source_var.clone(),
3828                target_variable: target_var.clone(),
3829                target_label_id,
3830                path_variable: path_var.clone(),
3831                min_hops,
3832                max_hops,
3833            },
3834            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
3835                input: Box::new(plan),
3836                edge_type_ids,
3837                direction: rel.direction.clone(),
3838                source_variable: source_var.clone(),
3839                target_variable: target_var.clone(),
3840                target_label_id,
3841                path_variable: path_var.clone(),
3842                min_hops,
3843                max_hops,
3844            },
3845        };
3846
3847        if !source_bound {
3848            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
3849        }
3850        if !target_bound {
3851            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
3852        }
3853        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
3854
3855        Ok(sp_plan)
3856    }
3857    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
3858    ///
3859    /// This is a public entry point for the Locy plan builder to reuse the
3860    /// existing pattern-planning logic for clause bodies.
3861    pub fn plan_pattern(
3862        &self,
3863        pattern: &Pattern,
3864        initial_vars: &[VariableInfo],
3865    ) -> Result<LogicalPlan> {
3866        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
3867        let vars_before_pattern = vars_in_scope.len();
3868        let mut plan = LogicalPlan::Empty;
3869        for path in &pattern.paths {
3870            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
3871        }
3872        Ok(plan)
3873    }
3874
3875    /// Plan a regular MATCH path (not shortestPath).
3876    fn plan_path(
3877        &self,
3878        path: &PathPattern,
3879        plan: LogicalPlan,
3880        vars_in_scope: &mut Vec<VariableInfo>,
3881        optional: bool,
3882        vars_before_pattern: usize,
3883    ) -> Result<LogicalPlan> {
3884        let mut plan = plan;
3885        let elements = &path.elements;
3886        let mut i = 0;
3887
3888        let path_variable = path.variable.clone();
3889
3890        // Check for VariableAlreadyBound: path variable already in scope
3891        if let Some(pv) = &path_variable
3892            && !pv.is_empty()
3893            && is_var_in_scope(vars_in_scope, pv)
3894        {
3895            return Err(anyhow!(
3896                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3897                pv
3898            ));
3899        }
3900
3901        // Check for VariableAlreadyBound: path variable conflicts with element variables
3902        if let Some(pv) = &path_variable
3903            && !pv.is_empty()
3904        {
3905            for element in elements {
3906                match element {
3907                    PatternElement::Node(n) => {
3908                        if let Some(v) = &n.variable
3909                            && v == pv
3910                        {
3911                            return Err(anyhow!(
3912                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3913                                pv
3914                            ));
3915                        }
3916                    }
3917                    PatternElement::Relationship(r) => {
3918                        if let Some(v) = &r.variable
3919                            && v == pv
3920                        {
3921                            return Err(anyhow!(
3922                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3923                                pv
3924                            ));
3925                        }
3926                    }
3927                    PatternElement::Parenthesized { .. } => {}
3928                }
3929            }
3930        }
3931
3932        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
3933        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
3934        let mut optional_pattern_vars: HashSet<String> = if optional {
3935            let mut vars = HashSet::new();
3936            for element in elements {
3937                match element {
3938                    PatternElement::Node(n) => {
3939                        if let Some(v) = &n.variable
3940                            && !v.is_empty()
3941                            && !is_var_in_scope(vars_in_scope, v)
3942                        {
3943                            vars.insert(v.clone());
3944                        }
3945                    }
3946                    PatternElement::Relationship(r) => {
3947                        if let Some(v) = &r.variable
3948                            && !v.is_empty()
3949                            && !is_var_in_scope(vars_in_scope, v)
3950                        {
3951                            vars.insert(v.clone());
3952                        }
3953                    }
3954                    PatternElement::Parenthesized { pattern, .. } => {
3955                        // Also check nested patterns
3956                        for nested_elem in &pattern.elements {
3957                            match nested_elem {
3958                                PatternElement::Node(n) => {
3959                                    if let Some(v) = &n.variable
3960                                        && !v.is_empty()
3961                                        && !is_var_in_scope(vars_in_scope, v)
3962                                    {
3963                                        vars.insert(v.clone());
3964                                    }
3965                                }
3966                                PatternElement::Relationship(r) => {
3967                                    if let Some(v) = &r.variable
3968                                        && !v.is_empty()
3969                                        && !is_var_in_scope(vars_in_scope, v)
3970                                    {
3971                                        vars.insert(v.clone());
3972                                    }
3973                                }
3974                                _ => {}
3975                            }
3976                        }
3977                    }
3978                }
3979            }
3980            // Include path variable if present
3981            if let Some(pv) = &path_variable
3982                && !pv.is_empty()
3983            {
3984                vars.insert(pv.clone());
3985            }
3986            vars
3987        } else {
3988            HashSet::new()
3989        };
3990
3991        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
3992        // These must participate in Trail mode (relationship uniqueness) enforcement
3993        // across ALL segments in this path, so that VLP segments like [*0..1] don't
3994        // traverse through edges already claimed by a bound relationship [r].
3995        let path_bound_edge_vars: HashSet<String> = {
3996            let mut bound = HashSet::new();
3997            for element in elements {
3998                if let PatternElement::Relationship(rel) = element
3999                    && let Some(ref var_name) = rel.variable
4000                    && !var_name.is_empty()
4001                    && vars_in_scope[..vars_before_pattern]
4002                        .iter()
4003                        .any(|v| v.name == *var_name)
4004                {
4005                    bound.insert(var_name.clone());
4006                }
4007            }
4008            bound
4009        };
4010
4011        // Track if any traverses were added (for zero-length path detection)
4012        let mut had_traverses = false;
4013        // Track the node variable for zero-length path binding
4014        let mut single_node_variable: Option<String> = None;
4015        // Collect node/edge variables for BindPath (fixed-length path binding)
4016        let mut path_node_vars: Vec<String> = Vec::new();
4017        let mut path_edge_vars: Vec<String> = Vec::new();
4018        // Track the last processed outer node variable for QPP source binding.
4019        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
4020        let mut last_outer_node_var: Option<String> = None;
4021
4022        // Multi-hop path variables are now supported - path is accumulated across hops
4023        while i < elements.len() {
4024            let element = &elements[i];
4025            match element {
4026                PatternElement::Node(n) => {
4027                    let mut variable = n.variable.clone().unwrap_or_default();
4028                    if variable.is_empty() {
4029                        variable = self.next_anon_var();
4030                    }
4031                    // Track first node variable for zero-length path
4032                    if single_node_variable.is_none() {
4033                        single_node_variable = Some(variable.clone());
4034                    }
4035                    let is_bound =
4036                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4037                    if optional && !is_bound {
4038                        optional_pattern_vars.insert(variable.clone());
4039                    }
4040
4041                    if is_bound {
4042                        // Check for type conflict - can't use an Edge/Path as a Node
4043                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4044                            && !info.var_type.is_compatible_with(VariableType::Node)
4045                        {
4046                            return Err(anyhow!(
4047                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4048                                variable,
4049                                info.var_type
4050                            ));
4051                        }
4052                        if let Some(node_filter) =
4053                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4054                        {
4055                            plan = LogicalPlan::Filter {
4056                                input: Box::new(plan),
4057                                predicate: node_filter,
4058                                optional_variables: HashSet::new(),
4059                            };
4060                        }
4061                    } else {
4062                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4063                        if !variable.is_empty() {
4064                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4065                        }
4066                    }
4067
4068                    // Track source node for BindPath
4069                    if path_variable.is_some() && path_node_vars.is_empty() {
4070                        path_node_vars.push(variable.clone());
4071                    }
4072
4073                    // Look ahead for relationships
4074                    let mut current_source_var = variable;
4075                    last_outer_node_var = Some(current_source_var.clone());
4076                    i += 1;
4077                    while i < elements.len() {
4078                        if let PatternElement::Relationship(r) = &elements[i] {
4079                            if i + 1 < elements.len() {
4080                                let target_node_part = &elements[i + 1];
4081                                if let PatternElement::Node(n_target) = target_node_part {
4082                                    // For VLP traversals, pass path_variable through
4083                                    // For fixed-length, we use BindPath instead
4084                                    let is_vlp = r.range.is_some();
4085                                    let traverse_path_var =
4086                                        if is_vlp { path_variable.clone() } else { None };
4087
4088                                    // If we're about to start a VLP segment and there are
4089                                    // collected fixed-hop path vars, create an intermediate
4090                                    // BindPath for the fixed prefix first. The VLP will then
4091                                    // extend this existing path.
4092                                    if is_vlp
4093                                        && let Some(pv) = path_variable.as_ref()
4094                                        && !path_node_vars.is_empty()
4095                                    {
4096                                        plan = LogicalPlan::BindPath {
4097                                            input: Box::new(plan),
4098                                            node_variables: std::mem::take(&mut path_node_vars),
4099                                            edge_variables: std::mem::take(&mut path_edge_vars),
4100                                            path_variable: pv.clone(),
4101                                        };
4102                                        if !is_var_in_scope(vars_in_scope, pv) {
4103                                            add_var_to_scope(
4104                                                vars_in_scope,
4105                                                pv,
4106                                                VariableType::Path,
4107                                            )?;
4108                                        }
4109                                    }
4110
4111                                    // Plan the traverse from the current source node
4112                                    let target_was_bound =
4113                                        n_target.variable.as_ref().is_some_and(|v| {
4114                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4115                                        });
4116                                    let (new_plan, target_var, effective_target) = self
4117                                        .plan_traverse_with_source(
4118                                            plan,
4119                                            vars_in_scope,
4120                                            TraverseParams {
4121                                                rel: r,
4122                                                target_node: n_target,
4123                                                optional,
4124                                                path_variable: traverse_path_var,
4125                                                optional_pattern_vars: optional_pattern_vars
4126                                                    .clone(),
4127                                            },
4128                                            &current_source_var,
4129                                            vars_before_pattern,
4130                                            &path_bound_edge_vars,
4131                                        )?;
4132                                    plan = new_plan;
4133                                    if optional && !target_was_bound {
4134                                        optional_pattern_vars.insert(target_var.clone());
4135                                    }
4136
4137                                    // Track edge/target node for BindPath
4138                                    if path_variable.is_some() && !is_vlp {
4139                                        // Use the edge variable if given, otherwise use
4140                                        // the internal tracking column pattern.
4141                                        // Use effective_target (which may be __rebound_x
4142                                        // for bound-target traversals) to match the actual
4143                                        // column name produced by GraphTraverseExec.
4144                                        if let Some(ev) = &r.variable {
4145                                            path_edge_vars.push(ev.clone());
4146                                        } else {
4147                                            path_edge_vars
4148                                                .push(format!("__eid_to_{}", effective_target));
4149                                        }
4150                                        path_node_vars.push(target_var.clone());
4151                                    }
4152
4153                                    current_source_var = target_var;
4154                                    last_outer_node_var = Some(current_source_var.clone());
4155                                    had_traverses = true;
4156                                    i += 2;
4157                                } else {
4158                                    return Err(anyhow!("Relationship must be followed by a node"));
4159                                }
4160                            } else {
4161                                return Err(anyhow!("Relationship cannot be the last element"));
4162                            }
4163                        } else {
4164                            break;
4165                        }
4166                    }
4167                }
4168                PatternElement::Relationship(_) => {
4169                    return Err(anyhow!("Pattern must start with a node"));
4170                }
4171                PatternElement::Parenthesized { pattern, range } => {
4172                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4173                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4174                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4175                        return Err(anyhow!(
4176                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4177                        ));
4178                    }
4179
4180                    let source_node = match &pattern.elements[0] {
4181                        PatternElement::Node(n) => n,
4182                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4183                    };
4184
4185                    // Extract all relationship-node pairs (QPP steps)
4186                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4187                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4188                        let rel = match &pattern.elements[pair_idx] {
4189                            PatternElement::Relationship(r) => r,
4190                            _ => {
4191                                return Err(anyhow!(
4192                                    "Quantified pattern element at position {} must be a relationship",
4193                                    pair_idx
4194                                ));
4195                            }
4196                        };
4197                        let node = match &pattern.elements[pair_idx + 1] {
4198                            PatternElement::Node(n) => n,
4199                            _ => {
4200                                return Err(anyhow!(
4201                                    "Quantified pattern element at position {} must be a node",
4202                                    pair_idx + 1
4203                                ));
4204                            }
4205                        };
4206                        // Reject nested quantifiers
4207                        if rel.range.is_some() {
4208                            return Err(anyhow!(
4209                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4210                            ));
4211                        }
4212                        qpp_rels.push((rel, node));
4213                    }
4214
4215                    // Check if there's an outer target node after the Parenthesized element.
4216                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4217                    // target that should receive the traversal result.
4218                    let inner_target_node = qpp_rels.last().unwrap().1;
4219                    let outer_target_node = if i + 1 < elements.len() {
4220                        match &elements[i + 1] {
4221                            PatternElement::Node(n) => Some(n),
4222                            _ => None,
4223                        }
4224                    } else {
4225                        None
4226                    };
4227                    // Use the outer target for variable binding and filters; inner target
4228                    // labels are used for state constraints within the NFA.
4229                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4230
4231                    // For simple 3-element single-hop QPP without intermediate label constraints,
4232                    // fall back to existing VLP behavior (copy range to relationship).
4233                    let use_simple_vlp = qpp_rels.len() == 1
4234                        && inner_target_node
4235                            .labels
4236                            .first()
4237                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4238                            .is_none();
4239
4240                    // Plan source node.
4241                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4242                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4243                    // node variable, use it; otherwise fall back to the inner source.
4244                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4245                        // The preceding outer node is already bound and in scope
4246                        // Apply any property filters from the inner source node
4247                        if let Some(prop_filter) =
4248                            self.properties_to_expr(outer_src, &source_node.properties)
4249                        {
4250                            plan = LogicalPlan::Filter {
4251                                input: Box::new(plan),
4252                                predicate: prop_filter,
4253                                optional_variables: HashSet::new(),
4254                            };
4255                        }
4256                        outer_src.clone()
4257                    } else {
4258                        let sv = source_node
4259                            .variable
4260                            .clone()
4261                            .filter(|v| !v.is_empty())
4262                            .unwrap_or_else(|| self.next_anon_var());
4263
4264                        if is_var_in_scope(vars_in_scope, &sv) {
4265                            // Source is already bound, apply property filter if needed
4266                            if let Some(prop_filter) =
4267                                self.properties_to_expr(&sv, &source_node.properties)
4268                            {
4269                                plan = LogicalPlan::Filter {
4270                                    input: Box::new(plan),
4271                                    predicate: prop_filter,
4272                                    optional_variables: HashSet::new(),
4273                                };
4274                            }
4275                        } else {
4276                            // Source is unbound, scan it
4277                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4278                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4279                            if optional {
4280                                optional_pattern_vars.insert(sv.clone());
4281                            }
4282                        }
4283                        sv
4284                    };
4285
4286                    if use_simple_vlp {
4287                        // Simple single-hop QPP: apply range to relationship and use VLP path
4288                        let mut relationship = qpp_rels[0].0.clone();
4289                        relationship.range = range.clone();
4290
4291                        let target_was_bound = target_node
4292                            .variable
4293                            .as_ref()
4294                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4295                        let (new_plan, target_var, _effective_target) = self
4296                            .plan_traverse_with_source(
4297                                plan,
4298                                vars_in_scope,
4299                                TraverseParams {
4300                                    rel: &relationship,
4301                                    target_node,
4302                                    optional,
4303                                    path_variable: path_variable.clone(),
4304                                    optional_pattern_vars: optional_pattern_vars.clone(),
4305                                },
4306                                &source_variable,
4307                                vars_before_pattern,
4308                                &path_bound_edge_vars,
4309                            )?;
4310                        plan = new_plan;
4311                        if optional && !target_was_bound {
4312                            optional_pattern_vars.insert(target_var);
4313                        }
4314                    } else {
4315                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4316                        let mut qpp_step_infos = Vec::new();
4317                        let mut all_edge_type_ids = Vec::new();
4318
4319                        for (rel, node) in &qpp_rels {
4320                            let mut step_edge_type_ids = Vec::new();
4321                            if rel.types.is_empty() {
4322                                step_edge_type_ids = self.schema.all_edge_type_ids();
4323                            } else {
4324                                for type_name in &rel.types {
4325                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4326                                        step_edge_type_ids.push(edge_meta.id);
4327                                    }
4328                                }
4329                            }
4330                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4331
4332                            let target_label = node.labels.first().and_then(|l| {
4333                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4334                            });
4335
4336                            qpp_step_infos.push(QppStepInfo {
4337                                edge_type_ids: step_edge_type_ids,
4338                                direction: rel.direction.clone(),
4339                                target_label,
4340                            });
4341                        }
4342
4343                        // Deduplicate edge type IDs for adjacency warming
4344                        all_edge_type_ids.sort_unstable();
4345                        all_edge_type_ids.dedup();
4346
4347                        // Compute iteration bounds from range
4348                        let hops_per_iter = qpp_step_infos.len();
4349                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4350                        let (min_iter, max_iter) = if let Some(range) = range {
4351                            let min = range.min.unwrap_or(1) as usize;
4352                            let max = range
4353                                .max
4354                                .map(|m| m as usize)
4355                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4356                            (min, max)
4357                        } else {
4358                            (1, 1)
4359                        };
4360                        let min_hops = min_iter * hops_per_iter;
4361                        let max_hops = max_iter * hops_per_iter;
4362
4363                        // Target variable from the last node in the QPP sub-pattern
4364                        let target_variable = target_node
4365                            .variable
4366                            .clone()
4367                            .filter(|v| !v.is_empty())
4368                            .unwrap_or_else(|| self.next_anon_var());
4369
4370                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
4371
4372                        // Determine target label for the final node
4373                        let target_label_meta = target_node
4374                            .labels
4375                            .first()
4376                            .and_then(|l| self.schema.get_label_case_insensitive(l));
4377
4378                        // Collect scope match variables
4379                        let mut scope_match_variables: HashSet<String> = vars_in_scope
4380                            [vars_before_pattern..]
4381                            .iter()
4382                            .map(|v| v.name.clone())
4383                            .collect();
4384                        scope_match_variables.insert(target_variable.clone());
4385
4386                        // Handle bound target: use rebound variable for traverse
4387                        let rebound_target_var = if target_is_bound {
4388                            Some(target_variable.clone())
4389                        } else {
4390                            None
4391                        };
4392                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4393                            format!("__rebound_{}", bv)
4394                        } else {
4395                            target_variable.clone()
4396                        };
4397
4398                        plan = LogicalPlan::Traverse {
4399                            input: Box::new(plan),
4400                            edge_type_ids: all_edge_type_ids,
4401                            direction: qpp_rels[0].0.direction.clone(),
4402                            source_variable: source_variable.to_string(),
4403                            target_variable: effective_target_var.clone(),
4404                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4405                            step_variable: None, // QPP doesn't expose intermediate edges
4406                            min_hops,
4407                            max_hops,
4408                            optional,
4409                            target_filter: self.node_filter_expr(
4410                                &target_variable,
4411                                &target_node.labels,
4412                                &target_node.properties,
4413                            ),
4414                            path_variable: path_variable.clone(),
4415                            edge_properties: HashSet::new(),
4416                            is_variable_length: true,
4417                            optional_pattern_vars: optional_pattern_vars.clone(),
4418                            scope_match_variables,
4419                            edge_filter_expr: None,
4420                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4421                            qpp_steps: Some(qpp_step_infos),
4422                        };
4423
4424                        // Handle bound target: filter rebound results against original variable
4425                        if let Some(ref btv) = rebound_target_var {
4426                            // Filter: __rebound_x._vid = x._vid
4427                            let filter_pred = Expr::BinaryOp {
4428                                left: Box::new(Expr::Property(
4429                                    Box::new(Expr::Variable(effective_target_var.clone())),
4430                                    "_vid".to_string(),
4431                                )),
4432                                op: BinaryOp::Eq,
4433                                right: Box::new(Expr::Property(
4434                                    Box::new(Expr::Variable(btv.clone())),
4435                                    "_vid".to_string(),
4436                                )),
4437                            };
4438                            plan = LogicalPlan::Filter {
4439                                input: Box::new(plan),
4440                                predicate: filter_pred,
4441                                optional_variables: if optional {
4442                                    optional_pattern_vars.clone()
4443                                } else {
4444                                    HashSet::new()
4445                                },
4446                            };
4447                        }
4448
4449                        // Add target variable to scope
4450                        if !target_is_bound {
4451                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4452                        }
4453
4454                        // Add path variable to scope
4455                        if let Some(ref pv) = path_variable
4456                            && !pv.is_empty()
4457                            && !is_var_in_scope(vars_in_scope, pv)
4458                        {
4459                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4460                        }
4461                    }
4462                    had_traverses = true;
4463
4464                    // Skip the outer target node if we consumed it
4465                    if outer_target_node.is_some() {
4466                        i += 2; // skip both Parenthesized and the following Node
4467                    } else {
4468                        i += 1;
4469                    }
4470                }
4471            }
4472        }
4473
4474        // If this is a single-node pattern with a path variable, bind the zero-length path
4475        // E.g., `p = (a)` should create a Path with one node and zero edges
4476        if let Some(ref path_var) = path_variable
4477            && !path_var.is_empty()
4478            && !had_traverses
4479            && let Some(node_var) = single_node_variable
4480        {
4481            plan = LogicalPlan::BindZeroLengthPath {
4482                input: Box::new(plan),
4483                node_variable: node_var,
4484                path_variable: path_var.clone(),
4485            };
4486            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
4487        }
4488
4489        // Bind fixed-length path from collected node/edge variables
4490        if let Some(ref path_var) = path_variable
4491            && !path_var.is_empty()
4492            && had_traverses
4493            && !path_node_vars.is_empty()
4494            && !is_var_in_scope(vars_in_scope, path_var)
4495        {
4496            plan = LogicalPlan::BindPath {
4497                input: Box::new(plan),
4498                node_variables: path_node_vars,
4499                edge_variables: path_edge_vars,
4500                path_variable: path_var.clone(),
4501            };
4502            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
4503        }
4504
4505        Ok(plan)
4506    }
4507
4508    /// Plan a traverse with an explicit source variable name.
4509    ///
4510    /// Returns `(plan, target_variable, effective_target_variable)` where:
4511    /// - `target_variable` is the semantic variable name for downstream scope
4512    /// - `effective_target_variable` is the actual column-name prefix used by
4513    ///   the traverse (may be `__rebound_x` for bound-target patterns)
4514    fn plan_traverse_with_source(
4515        &self,
4516        plan: LogicalPlan,
4517        vars_in_scope: &mut Vec<VariableInfo>,
4518        params: TraverseParams<'_>,
4519        source_variable: &str,
4520        vars_before_pattern: usize,
4521        path_bound_edge_vars: &HashSet<String>,
4522    ) -> Result<(LogicalPlan, String, String)> {
4523        // Check for parameter used as relationship predicate
4524        if let Some(Expr::Parameter(_)) = &params.rel.properties {
4525            return Err(anyhow!(
4526                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
4527            ));
4528        }
4529
4530        let mut edge_type_ids = Vec::new();
4531        let mut dst_labels = Vec::new();
4532        let mut unknown_types = Vec::new();
4533
4534        if params.rel.types.is_empty() {
4535            // All types - include both schema and schemaless edge types
4536            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
4537            edge_type_ids = self.schema.all_edge_type_ids();
4538            for meta in self.schema.edge_types.values() {
4539                dst_labels.extend(meta.dst_labels.iter().cloned());
4540            }
4541        } else {
4542            for type_name in &params.rel.types {
4543                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4544                    // Known type - use standard Traverse with type_id
4545                    edge_type_ids.push(edge_meta.id);
4546                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
4547                } else {
4548                    // Unknown type - will use TraverseMainByType
4549                    unknown_types.push(type_name.clone());
4550                }
4551            }
4552        }
4553
4554        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
4555        edge_type_ids.sort_unstable();
4556        edge_type_ids.dedup();
4557        unknown_types.sort_unstable();
4558        unknown_types.dedup();
4559
4560        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
4561        if target_variable.is_empty() {
4562            target_variable = self.next_anon_var();
4563        }
4564        let target_is_bound =
4565            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
4566
4567        // Check for VariableTypeConflict: relationship variable used as node
4568        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
4569        if let Some(rel_var) = &params.rel.variable
4570            && !rel_var.is_empty()
4571            && rel_var == &target_variable
4572        {
4573            return Err(anyhow!(
4574                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
4575                rel_var
4576            ));
4577        }
4578
4579        // Check for VariableTypeConflict/RelationshipUniquenessViolation
4580        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
4581        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
4582        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
4583        let mut bound_edge_var: Option<String> = None;
4584        let mut bound_edge_list_var: Option<String> = None;
4585        if let Some(rel_var) = &params.rel.variable
4586            && !rel_var.is_empty()
4587            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
4588        {
4589            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
4590                .iter()
4591                .any(|v| v.name == *rel_var);
4592
4593            if info.var_type == VariableType::Edge {
4594                // Check if this edge variable comes from a previous clause (before this MATCH)
4595                if is_from_previous_clause {
4596                    // Edge variable bound from previous clause - this is allowed
4597                    // We'll filter the traversal to match this specific edge
4598                    bound_edge_var = Some(rel_var.clone());
4599                } else {
4600                    // Same relationship variable used twice in the same MATCH clause
4601                    return Err(anyhow!(
4602                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
4603                        rel_var
4604                    ));
4605                }
4606            } else if params.rel.range.is_some()
4607                && is_from_previous_clause
4608                && matches!(
4609                    info.var_type,
4610                    VariableType::Scalar | VariableType::ScalarLiteral
4611                )
4612            {
4613                // Allow VLP rebound against a previously bound relationship list
4614                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
4615                bound_edge_list_var = Some(rel_var.clone());
4616            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
4617                return Err(anyhow!(
4618                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
4619                    rel_var,
4620                    info.var_type
4621                ));
4622            }
4623        }
4624
4625        // Check for VariableTypeConflict: target node variable already bound as non-Node
4626        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
4627        if target_is_bound
4628            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4629            && !info.var_type.is_compatible_with(VariableType::Node)
4630        {
4631            return Err(anyhow!(
4632                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4633                target_variable,
4634                info.var_type
4635            ));
4636        }
4637
4638        // If all requested types are unknown (schemaless), use TraverseMainByType
4639        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
4640        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
4641        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
4642            // All types are unknown - use schemaless traversal
4643
4644            let is_variable_length = params.rel.range.is_some();
4645
4646            const DEFAULT_MAX_HOPS: usize = 100;
4647            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
4648                let min = range.min.unwrap_or(1) as usize;
4649                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
4650                (min, max)
4651            } else {
4652                (1, 1)
4653            };
4654
4655            // For both single-hop and variable-length paths:
4656            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
4657            //   Single-hop: step_var holds a single edge object
4658            //   VLP: step_var holds a list of edge objects
4659            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
4660            let step_var = params.rel.variable.clone();
4661            let path_var = params.path_variable.clone();
4662
4663            // Compute scope_match_variables for relationship uniqueness scoping.
4664            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
4665                .iter()
4666                .map(|v| v.name.clone())
4667                .collect();
4668            if let Some(ref sv) = step_var {
4669                // Only add the step variable to scope if it's NOT rebound from a previous clause.
4670                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
4671                // filtering because the second MATCH intentionally reuses the same edge.
4672                if bound_edge_var.is_none() {
4673                    scope_match_variables.insert(sv.clone());
4674                }
4675            }
4676            scope_match_variables.insert(target_variable.clone());
4677            // Include bound edge variables from this path for cross-segment Trail mode
4678            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
4679            // edges already claimed by a bound relationship [r] in the same path.
4680            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
4681            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
4682            scope_match_variables.extend(
4683                path_bound_edge_vars
4684                    .iter()
4685                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
4686                    .cloned(),
4687            );
4688
4689            let mut plan = LogicalPlan::TraverseMainByType {
4690                type_names: unknown_types,
4691                input: Box::new(plan),
4692                direction: params.rel.direction.clone(),
4693                source_variable: source_variable.to_string(),
4694                target_variable: target_variable.clone(),
4695                step_variable: step_var.clone(),
4696                min_hops,
4697                max_hops,
4698                optional: params.optional,
4699                target_filter: self.node_filter_expr(
4700                    &target_variable,
4701                    &params.target_node.labels,
4702                    &params.target_node.properties,
4703                ),
4704                path_variable: path_var.clone(),
4705                is_variable_length,
4706                optional_pattern_vars: params.optional_pattern_vars.clone(),
4707                scope_match_variables,
4708                edge_filter_expr: if is_variable_length {
4709                    let filter_var = step_var
4710                        .clone()
4711                        .unwrap_or_else(|| "__anon_edge".to_string());
4712                    self.properties_to_expr(&filter_var, &params.rel.properties)
4713                } else {
4714                    None
4715                },
4716                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4717            };
4718
4719            // Only apply bound target filter for Imported variables (from outer scope/subquery).
4720            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
4721            // uses Parameter which requires the value to be in params (subquery context).
4722            if target_is_bound
4723                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4724                && info.var_type == VariableType::Imported
4725            {
4726                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
4727            }
4728
4729            // Apply relationship property predicates for fixed-length schemaless
4730            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
4731            // For VLP, predicates are stored inline in edge_filter_expr (above).
4732            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
4733            if !is_variable_length
4734                && let Some(edge_var_name) = step_var.as_ref()
4735                && let Some(edge_prop_filter) =
4736                    self.properties_to_expr(edge_var_name, &params.rel.properties)
4737            {
4738                let filter_optional_vars = if params.optional {
4739                    params.optional_pattern_vars.clone()
4740                } else {
4741                    HashSet::new()
4742                };
4743                plan = LogicalPlan::Filter {
4744                    input: Box::new(plan),
4745                    predicate: edge_prop_filter,
4746                    optional_variables: filter_optional_vars,
4747                };
4748            }
4749
4750            // Add the bound variables to scope
4751            if let Some(sv) = &step_var {
4752                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
4753                if is_variable_length
4754                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
4755                {
4756                    info.is_vlp = true;
4757                }
4758            }
4759            if let Some(pv) = &path_var
4760                && !is_var_in_scope(vars_in_scope, pv)
4761            {
4762                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4763            }
4764            if !is_var_in_scope(vars_in_scope, &target_variable) {
4765                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4766            }
4767
4768            return Ok((plan, target_variable.clone(), target_variable));
4769        }
4770
4771        // If we have a mix of known and unknown types, error for now
4772        // (could be extended to Union of Traverse + TraverseMainByType)
4773        if !unknown_types.is_empty() {
4774            return Err(anyhow!(
4775                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
4776                unknown_types
4777            ));
4778        }
4779
4780        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
4781            // Use first label for target_label_id
4782            // For schemaless support, allow unknown target labels
4783            self.schema.get_label_case_insensitive(label_name)
4784        } else if !target_is_bound {
4785            // Infer from edge type(s)
4786            let unique_dsts: Vec<_> = dst_labels
4787                .into_iter()
4788                .collect::<HashSet<_>>()
4789                .into_iter()
4790                .collect();
4791            if unique_dsts.len() == 1 {
4792                let label_name = &unique_dsts[0];
4793                self.schema.get_label_case_insensitive(label_name)
4794            } else {
4795                // Multiple or no destination labels inferred - allow any target
4796                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
4797                // where the edge type can connect to multiple labels
4798                None
4799            }
4800        } else {
4801            None
4802        };
4803
4804        // Check if this is a variable-length pattern (has range specifier like *1..3)
4805        let is_variable_length = params.rel.range.is_some();
4806
4807        // For VLP patterns, default min to 1 and max to a reasonable limit.
4808        // For single-hop patterns (no range), both are 1.
4809        const DEFAULT_MAX_HOPS: usize = 100;
4810        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
4811            let min = range.min.unwrap_or(1) as usize;
4812            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
4813            (min, max)
4814        } else {
4815            (1, 1)
4816        };
4817
4818        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
4819        //   Single-hop: step_var holds a single edge object
4820        //   VLP: step_var holds a list of edge objects
4821        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
4822        let step_var = params.rel.variable.clone();
4823        let path_var = params.path_variable.clone();
4824
4825        // If we have a bound edge variable from a previous clause, use a temp variable
4826        // for the Traverse step, then filter to match the bound edge
4827        let rebound_var = bound_edge_var
4828            .as_ref()
4829            .or(bound_edge_list_var.as_ref())
4830            .cloned();
4831        let effective_step_var = if let Some(ref bv) = rebound_var {
4832            Some(format!("__rebound_{}", bv))
4833        } else {
4834            step_var.clone()
4835        };
4836
4837        // If we have a bound target variable from a previous clause (e.g. WITH),
4838        // use a temp variable for the Traverse step, then filter to match the bound
4839        // target — mirroring the bound edge pattern above.
4840        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
4841            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
4842                .map(|info| info.var_type == VariableType::Imported)
4843                .unwrap_or(false);
4844            if !is_imported {
4845                Some(target_variable.clone())
4846            } else {
4847                None
4848            }
4849        } else {
4850            None
4851        };
4852
4853        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4854            format!("__rebound_{}", bv)
4855        } else {
4856            target_variable.clone()
4857        };
4858
4859        // Collect all variables (node + edge) from the current MATCH clause scope
4860        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
4861        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
4862        // if their associated variable is in this set. This prevents relationship
4863        // uniqueness from being enforced across disconnected MATCH clauses.
4864        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
4865            .iter()
4866            .map(|v| v.name.clone())
4867            .collect();
4868        // Include the current traverse's edge variable (not yet added to vars_in_scope)
4869        if let Some(ref sv) = effective_step_var {
4870            scope_match_variables.insert(sv.clone());
4871        }
4872        // Include the target variable (not yet added to vars_in_scope)
4873        scope_match_variables.insert(effective_target_var.clone());
4874        // Include bound edge variables from this path for cross-segment Trail mode
4875        // enforcement (same as the schemaless path above).
4876        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
4877
4878        let mut plan = LogicalPlan::Traverse {
4879            input: Box::new(plan),
4880            edge_type_ids,
4881            direction: params.rel.direction.clone(),
4882            source_variable: source_variable.to_string(),
4883            target_variable: effective_target_var.clone(),
4884            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4885            step_variable: effective_step_var.clone(),
4886            min_hops,
4887            max_hops,
4888            optional: params.optional,
4889            target_filter: self.node_filter_expr(
4890                &target_variable,
4891                &params.target_node.labels,
4892                &params.target_node.properties,
4893            ),
4894            path_variable: path_var.clone(),
4895            edge_properties: HashSet::new(),
4896            is_variable_length,
4897            optional_pattern_vars: params.optional_pattern_vars.clone(),
4898            scope_match_variables,
4899            edge_filter_expr: if is_variable_length {
4900                // Use the step variable name, or a fallback for anonymous edges.
4901                // The variable name is used by properties_to_expr to build
4902                // `var.prop = value` expressions. For BFS property checking,
4903                // only the property name and value matter (the variable name
4904                // is stripped during extraction).
4905                let filter_var = effective_step_var
4906                    .clone()
4907                    .unwrap_or_else(|| "__anon_edge".to_string());
4908                self.properties_to_expr(&filter_var, &params.rel.properties)
4909            } else {
4910                None
4911            },
4912            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4913            qpp_steps: None,
4914        };
4915
4916        // Pre-compute optional variables set for filter nodes in this traverse.
4917        // Used by relationship property filters and bound-edge filters below.
4918        let filter_optional_vars = if params.optional {
4919            params.optional_pattern_vars.clone()
4920        } else {
4921            HashSet::new()
4922        };
4923
4924        // Apply relationship property predicates (e.g. [r {k: v}]).
4925        // For VLP, predicates are stored inline in edge_filter_expr (above).
4926        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
4927        if !is_variable_length
4928            && let Some(edge_var_name) = effective_step_var.as_ref()
4929            && let Some(edge_prop_filter) =
4930                self.properties_to_expr(edge_var_name, &params.rel.properties)
4931        {
4932            plan = LogicalPlan::Filter {
4933                input: Box::new(plan),
4934                predicate: edge_prop_filter,
4935                optional_variables: filter_optional_vars.clone(),
4936            };
4937        }
4938
4939        // Only apply bound target filter for Imported variables (from outer scope/subquery).
4940        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
4941        // uses Parameter which requires the value to be in params (subquery context).
4942        if target_is_bound
4943            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4944            && info.var_type == VariableType::Imported
4945        {
4946            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
4947        }
4948
4949        // If we have a bound edge variable, add a filter to match it
4950        if let Some(ref bv) = bound_edge_var {
4951            let temp_var = format!("__rebound_{}", bv);
4952            let bound_check = Expr::BinaryOp {
4953                left: Box::new(Expr::Property(
4954                    Box::new(Expr::Variable(temp_var)),
4955                    "_eid".to_string(),
4956                )),
4957                op: BinaryOp::Eq,
4958                right: Box::new(Expr::Property(
4959                    Box::new(Expr::Variable(bv.clone())),
4960                    "_eid".to_string(),
4961                )),
4962            };
4963            plan = LogicalPlan::Filter {
4964                input: Box::new(plan),
4965                predicate: bound_check,
4966                optional_variables: filter_optional_vars.clone(),
4967            };
4968        }
4969
4970        // If we have a bound relationship list variable for a VLP pattern,
4971        // add a filter to match the traversed relationship list exactly.
4972        if let Some(ref bv) = bound_edge_list_var {
4973            let temp_var = format!("__rebound_{}", bv);
4974            let temp_eids = Expr::ListComprehension {
4975                variable: "__rebound_edge".to_string(),
4976                list: Box::new(Expr::Variable(temp_var)),
4977                where_clause: None,
4978                map_expr: Box::new(Expr::FunctionCall {
4979                    name: "toInteger".to_string(),
4980                    args: vec![Expr::Property(
4981                        Box::new(Expr::Variable("__rebound_edge".to_string())),
4982                        "_eid".to_string(),
4983                    )],
4984                    distinct: false,
4985                    window_spec: None,
4986                }),
4987            };
4988            let bound_eids = Expr::ListComprehension {
4989                variable: "__bound_edge".to_string(),
4990                list: Box::new(Expr::Variable(bv.clone())),
4991                where_clause: None,
4992                map_expr: Box::new(Expr::FunctionCall {
4993                    name: "toInteger".to_string(),
4994                    args: vec![Expr::Property(
4995                        Box::new(Expr::Variable("__bound_edge".to_string())),
4996                        "_eid".to_string(),
4997                    )],
4998                    distinct: false,
4999                    window_spec: None,
5000                }),
5001            };
5002            let bound_list_check = Expr::BinaryOp {
5003                left: Box::new(temp_eids),
5004                op: BinaryOp::Eq,
5005                right: Box::new(bound_eids),
5006            };
5007            plan = LogicalPlan::Filter {
5008                input: Box::new(plan),
5009                predicate: bound_list_check,
5010                optional_variables: filter_optional_vars.clone(),
5011            };
5012        }
5013
5014        // If we have a bound target variable (non-imported), add a filter to constrain
5015        // the traversal output to match the previously bound target node.
5016        if let Some(ref bv) = rebound_target_var {
5017            let temp_var = format!("__rebound_{}", bv);
5018            let bound_check = Expr::BinaryOp {
5019                left: Box::new(Expr::Property(
5020                    Box::new(Expr::Variable(temp_var.clone())),
5021                    "_vid".to_string(),
5022                )),
5023                op: BinaryOp::Eq,
5024                right: Box::new(Expr::Property(
5025                    Box::new(Expr::Variable(bv.clone())),
5026                    "_vid".to_string(),
5027                )),
5028            };
5029            // For OPTIONAL MATCH, include the rebound variable in optional_variables
5030            // so that OptionalFilterExec excludes it from the grouping key and
5031            // properly nullifies it in recovery rows when all matches are filtered out.
5032            // Without this, each traverse result creates its own group (keyed by
5033            // __rebound_c._vid), and null-row recovery emits a spurious null row
5034            // for every non-matching target instead of one per source group.
5035            let mut rebound_filter_vars = filter_optional_vars;
5036            if params.optional {
5037                rebound_filter_vars.insert(temp_var);
5038            }
5039            plan = LogicalPlan::Filter {
5040                input: Box::new(plan),
5041                predicate: bound_check,
5042                optional_variables: rebound_filter_vars,
5043            };
5044        }
5045
5046        // Add the bound variables to scope
5047        // Skip adding the edge variable if it's already bound from a previous clause
5048        if let Some(sv) = &step_var
5049            && bound_edge_var.is_none()
5050            && bound_edge_list_var.is_none()
5051        {
5052            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5053            if is_variable_length
5054                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5055            {
5056                info.is_vlp = true;
5057            }
5058        }
5059        if let Some(pv) = &path_var
5060            && !is_var_in_scope(vars_in_scope, pv)
5061        {
5062            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5063        }
5064        if !is_var_in_scope(vars_in_scope, &target_variable) {
5065            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5066        }
5067
5068        Ok((plan, target_variable, effective_target_var))
5069    }
5070
5071    /// Combine a new scan plan with an existing plan.
5072    ///
5073    /// If the existing plan is `Empty`, returns the new plan directly.
5074    /// Otherwise, wraps them in a `CrossJoin`.
5075    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5076        if matches!(existing, LogicalPlan::Empty) {
5077            new
5078        } else {
5079            LogicalPlan::CrossJoin {
5080                left: Box::new(existing),
5081                right: Box::new(new),
5082            }
5083        }
5084    }
5085
5086    /// Split node map predicates into scan-pushable and residual filters.
5087    ///
5088    /// A predicate is scan-pushable when its value expression references only
5089    /// the node variable itself (or no variables). Predicates referencing other
5090    /// in-scope variables (correlated predicates) are returned as residual so
5091    /// they can be applied after joining with the existing plan.
5092    fn split_node_property_filters_for_scan(
5093        &self,
5094        variable: &str,
5095        properties: &Option<Expr>,
5096    ) -> (Option<Expr>, Option<Expr>) {
5097        let entries = match properties {
5098            Some(Expr::Map(entries)) => entries,
5099            _ => return (None, None),
5100        };
5101
5102        if entries.is_empty() {
5103            return (None, None);
5104        }
5105
5106        let mut pushdown_entries = Vec::new();
5107        let mut residual_entries = Vec::new();
5108
5109        for (prop, val_expr) in entries {
5110            let vars = collect_expr_variables(val_expr);
5111            if vars.iter().all(|v| v == variable) {
5112                pushdown_entries.push((prop.clone(), val_expr.clone()));
5113            } else {
5114                residual_entries.push((prop.clone(), val_expr.clone()));
5115            }
5116        }
5117
5118        let pushdown_map = if pushdown_entries.is_empty() {
5119            None
5120        } else {
5121            Some(Expr::Map(pushdown_entries))
5122        };
5123        let residual_map = if residual_entries.is_empty() {
5124            None
5125        } else {
5126            Some(Expr::Map(residual_entries))
5127        };
5128
5129        (
5130            self.properties_to_expr(variable, &pushdown_map),
5131            self.properties_to_expr(variable, &residual_map),
5132        )
5133    }
5134
5135    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5136    fn plan_unbound_node(
5137        &self,
5138        node: &NodePattern,
5139        variable: &str,
5140        plan: LogicalPlan,
5141        optional: bool,
5142    ) -> Result<LogicalPlan> {
5143        // Properties handling
5144        let properties = match &node.properties {
5145            Some(Expr::Map(entries)) => entries.as_slice(),
5146            Some(Expr::Parameter(_)) => {
5147                return Err(anyhow!(
5148                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5149                ));
5150            }
5151            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5152            None => &[],
5153        };
5154
5155        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5156
5157        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5158            if let Some(predicate) = residual {
5159                LogicalPlan::Filter {
5160                    input: Box::new(input),
5161                    predicate,
5162                    optional_variables: HashSet::new(),
5163                }
5164            } else {
5165                input
5166            }
5167        };
5168
5169        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5170            self.split_node_property_filters_for_scan(variable, &node.properties)
5171        } else {
5172            (self.properties_to_expr(variable, &node.properties), None)
5173        };
5174
5175        // Check for ext_id in properties when no label is specified
5176        if node.labels.is_empty() {
5177            // Try to find ext_id property for main table lookup
5178            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5179                // Extract the ext_id value as a string
5180                let ext_id = match ext_id_value {
5181                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5182                    _ => {
5183                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5184                    }
5185                };
5186
5187                // Build filter for remaining properties (excluding ext_id)
5188                let remaining_props: Vec<_> = properties
5189                    .iter()
5190                    .filter(|(k, _)| k != "ext_id")
5191                    .cloned()
5192                    .collect();
5193
5194                let remaining_expr = if remaining_props.is_empty() {
5195                    None
5196                } else {
5197                    Some(Expr::Map(remaining_props))
5198                };
5199
5200                let (prop_filter, residual_filter) = if has_existing_scope {
5201                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5202                } else {
5203                    (self.properties_to_expr(variable, &remaining_expr), None)
5204                };
5205
5206                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5207                    variable: variable.to_string(),
5208                    ext_id,
5209                    filter: prop_filter,
5210                    optional,
5211                };
5212
5213                let joined = Self::join_with_plan(plan, ext_id_lookup);
5214                return Ok(apply_residual_filter(joined, residual_filter));
5215            }
5216
5217            // No ext_id: create ScanAll for unlabeled node pattern
5218            let scan_all = LogicalPlan::ScanAll {
5219                variable: variable.to_string(),
5220                filter: node_scan_filter,
5221                optional,
5222            };
5223
5224            let joined = Self::join_with_plan(plan, scan_all);
5225            return Ok(apply_residual_filter(joined, node_residual_filter));
5226        }
5227
5228        // Use first label for label_id (primary label for dataset selection)
5229        let label_name = &node.labels[0];
5230
5231        // Check if label exists in schema
5232        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
5233            // Known label: use standard Scan
5234            let scan = LogicalPlan::Scan {
5235                label_id: label_meta.id,
5236                labels: node.labels.clone(),
5237                variable: variable.to_string(),
5238                filter: node_scan_filter,
5239                optional,
5240            };
5241
5242            let joined = Self::join_with_plan(plan, scan);
5243            Ok(apply_residual_filter(joined, node_residual_filter))
5244        } else {
5245            // Unknown label: use ScanMainByLabels for schemaless support
5246            let scan_main = LogicalPlan::ScanMainByLabels {
5247                labels: node.labels.clone(),
5248                variable: variable.to_string(),
5249                filter: node_scan_filter,
5250                optional,
5251            };
5252
5253            let joined = Self::join_with_plan(plan, scan_main);
5254            Ok(apply_residual_filter(joined, node_residual_filter))
5255        }
5256    }
5257
5258    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
5259    ///
5260    /// When `optional_vars` is non-empty, the Filter will preserve rows where
5261    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
5262    fn plan_where_clause(
5263        &self,
5264        predicate: &Expr,
5265        plan: LogicalPlan,
5266        vars_in_scope: &[VariableInfo],
5267        optional_vars: HashSet<String>,
5268    ) -> Result<LogicalPlan> {
5269        // Validate no aggregation functions in WHERE clause
5270        validate_no_aggregation_in_where(predicate)?;
5271
5272        // Validate all variables used are in scope
5273        validate_expression_variables(predicate, vars_in_scope)?;
5274
5275        // Validate expression types (function args, boolean operators)
5276        validate_expression(predicate, vars_in_scope)?;
5277
5278        // Check that WHERE predicate isn't a bare node/edge/path variable
5279        if let Expr::Variable(var_name) = predicate
5280            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
5281            && matches!(
5282                info.var_type,
5283                VariableType::Node | VariableType::Edge | VariableType::Path
5284            )
5285        {
5286            return Err(anyhow!(
5287                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
5288                info.var_type
5289            ));
5290        }
5291
5292        let mut plan = plan;
5293
5294        // Transform VALID_AT macro to function call
5295        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
5296
5297        let mut current_predicate =
5298            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
5299
5300        // 1. Try to extract vector_similarity predicate for optimization
5301        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
5302            let vs = &extraction.predicate;
5303            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
5304                plan = Self::replace_scan_with_knn(
5305                    plan,
5306                    &vs.variable,
5307                    &vs.property,
5308                    vs.query.clone(),
5309                    vs.threshold,
5310                );
5311                if let Some(residual) = extraction.residual {
5312                    current_predicate = residual;
5313                } else {
5314                    current_predicate = Expr::TRUE;
5315                }
5316            }
5317        }
5318
5319        // 3. Push eligible predicates to Scan OR Traverse filters
5320        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
5321        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
5322        // rows. Let them stay in the Filter operator which handles NULL preservation.
5323        for var in vars_in_scope {
5324            // Skip pushdown for optional variables - they need NULL preservation in Filter
5325            if optional_vars.contains(&var.name) {
5326                continue;
5327            }
5328
5329            // Check if var is produced by a Scan
5330            if Self::find_scan_label_id(&plan, &var.name).is_some() {
5331                let (pushable, residual) =
5332                    Self::extract_variable_predicates(&current_predicate, &var.name);
5333
5334                for pred in pushable {
5335                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
5336                }
5337
5338                if let Some(r) = residual {
5339                    current_predicate = r;
5340                } else {
5341                    current_predicate = Expr::TRUE;
5342                }
5343            } else if Self::is_traverse_target(&plan, &var.name) {
5344                // Push to Traverse
5345                let (pushable, residual) =
5346                    Self::extract_variable_predicates(&current_predicate, &var.name);
5347
5348                for pred in pushable {
5349                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
5350                }
5351
5352                if let Some(r) = residual {
5353                    current_predicate = r;
5354                } else {
5355                    current_predicate = Expr::TRUE;
5356                }
5357            }
5358        }
5359
5360        // 4. Push predicates to Apply.input_filter
5361        // This filters input rows BEFORE executing correlated subqueries.
5362        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
5363
5364        // 5. Add Filter node for any remaining predicates
5365        if !current_predicate.is_true_literal() {
5366            plan = LogicalPlan::Filter {
5367                input: Box::new(plan),
5368                predicate: current_predicate,
5369                optional_variables: optional_vars,
5370            };
5371        }
5372
5373        Ok(plan)
5374    }
5375
5376    fn rewrite_predicates_using_indexes(
5377        &self,
5378        predicate: &Expr,
5379        plan: &LogicalPlan,
5380        vars_in_scope: &[VariableInfo],
5381    ) -> Result<Expr> {
5382        let mut rewritten = predicate.clone();
5383
5384        for var in vars_in_scope {
5385            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
5386                // Find label name
5387                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
5388
5389                if let Some(label) = label_name
5390                    && let Some(props) = self.schema.properties.get(&label)
5391                {
5392                    for (gen_col, meta) in props {
5393                        if meta.generation_expression.is_some() {
5394                            // Use cached parsed expression
5395                            if let Some(schema_expr) =
5396                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
5397                            {
5398                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
5399                                rewritten = Self::replace_expression(
5400                                    rewritten,
5401                                    schema_expr,
5402                                    &var.name,
5403                                    gen_col,
5404                                );
5405                            }
5406                        }
5407                    }
5408                }
5409            }
5410        }
5411        Ok(rewritten)
5412    }
5413
5414    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
5415        // First, normalize schema_expr to use query_var
5416        let schema_var = schema_expr.extract_variable();
5417
5418        if let Some(s_var) = schema_var {
5419            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
5420
5421            if expr == target_expr {
5422                return Expr::Property(
5423                    Box::new(Expr::Variable(query_var.to_string())),
5424                    gen_col.to_string(),
5425                );
5426            }
5427        }
5428
5429        // Recurse
5430        match expr {
5431            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
5432                left: Box::new(Self::replace_expression(
5433                    *left,
5434                    schema_expr,
5435                    query_var,
5436                    gen_col,
5437                )),
5438                op,
5439                right: Box::new(Self::replace_expression(
5440                    *right,
5441                    schema_expr,
5442                    query_var,
5443                    gen_col,
5444                )),
5445            },
5446            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
5447                op,
5448                expr: Box::new(Self::replace_expression(
5449                    *expr,
5450                    schema_expr,
5451                    query_var,
5452                    gen_col,
5453                )),
5454            },
5455            Expr::FunctionCall {
5456                name,
5457                args,
5458                distinct,
5459                window_spec,
5460            } => Expr::FunctionCall {
5461                name,
5462                args: args
5463                    .into_iter()
5464                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
5465                    .collect(),
5466                distinct,
5467                window_spec,
5468            },
5469            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
5470                *expr,
5471                schema_expr,
5472                query_var,
5473                gen_col,
5474            ))),
5475            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
5476                *expr,
5477                schema_expr,
5478                query_var,
5479                gen_col,
5480            ))),
5481            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
5482                *expr,
5483                schema_expr,
5484                query_var,
5485                gen_col,
5486            ))),
5487            Expr::ArrayIndex {
5488                array: e,
5489                index: idx,
5490            } => Expr::ArrayIndex {
5491                array: Box::new(Self::replace_expression(
5492                    *e,
5493                    schema_expr,
5494                    query_var,
5495                    gen_col,
5496                )),
5497                index: Box::new(Self::replace_expression(
5498                    *idx,
5499                    schema_expr,
5500                    query_var,
5501                    gen_col,
5502                )),
5503            },
5504            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
5505                array: Box::new(Self::replace_expression(
5506                    *array,
5507                    schema_expr,
5508                    query_var,
5509                    gen_col,
5510                )),
5511                start: start.map(|s| {
5512                    Box::new(Self::replace_expression(
5513                        *s,
5514                        schema_expr,
5515                        query_var,
5516                        gen_col,
5517                    ))
5518                }),
5519                end: end.map(|e| {
5520                    Box::new(Self::replace_expression(
5521                        *e,
5522                        schema_expr,
5523                        query_var,
5524                        gen_col,
5525                    ))
5526                }),
5527            },
5528            Expr::List(exprs) => Expr::List(
5529                exprs
5530                    .into_iter()
5531                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
5532                    .collect(),
5533            ),
5534            Expr::Map(entries) => Expr::Map(
5535                entries
5536                    .into_iter()
5537                    .map(|(k, v)| {
5538                        (
5539                            k,
5540                            Self::replace_expression(v, schema_expr, query_var, gen_col),
5541                        )
5542                    })
5543                    .collect(),
5544            ),
5545            Expr::Property(e, prop) => Expr::Property(
5546                Box::new(Self::replace_expression(
5547                    *e,
5548                    schema_expr,
5549                    query_var,
5550                    gen_col,
5551                )),
5552                prop,
5553            ),
5554            Expr::Case {
5555                expr: case_expr,
5556                when_then,
5557                else_expr,
5558            } => Expr::Case {
5559                expr: case_expr.map(|e| {
5560                    Box::new(Self::replace_expression(
5561                        *e,
5562                        schema_expr,
5563                        query_var,
5564                        gen_col,
5565                    ))
5566                }),
5567                when_then: when_then
5568                    .into_iter()
5569                    .map(|(w, t)| {
5570                        (
5571                            Self::replace_expression(w, schema_expr, query_var, gen_col),
5572                            Self::replace_expression(t, schema_expr, query_var, gen_col),
5573                        )
5574                    })
5575                    .collect(),
5576                else_expr: else_expr.map(|e| {
5577                    Box::new(Self::replace_expression(
5578                        *e,
5579                        schema_expr,
5580                        query_var,
5581                        gen_col,
5582                    ))
5583                }),
5584            },
5585            Expr::Reduce {
5586                accumulator,
5587                init,
5588                variable: reduce_var,
5589                list,
5590                expr: reduce_expr,
5591            } => Expr::Reduce {
5592                accumulator,
5593                init: Box::new(Self::replace_expression(
5594                    *init,
5595                    schema_expr,
5596                    query_var,
5597                    gen_col,
5598                )),
5599                variable: reduce_var,
5600                list: Box::new(Self::replace_expression(
5601                    *list,
5602                    schema_expr,
5603                    query_var,
5604                    gen_col,
5605                )),
5606                expr: Box::new(Self::replace_expression(
5607                    *reduce_expr,
5608                    schema_expr,
5609                    query_var,
5610                    gen_col,
5611                )),
5612            },
5613
5614            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
5615            _ => expr,
5616        }
5617    }
5618
5619    /// Check if the variable is the target of a Traverse node
5620    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
5621        match plan {
5622            LogicalPlan::Traverse {
5623                target_variable,
5624                input,
5625                ..
5626            } => target_variable == variable || Self::is_traverse_target(input, variable),
5627            LogicalPlan::Filter { input, .. }
5628            | LogicalPlan::Project { input, .. }
5629            | LogicalPlan::Sort { input, .. }
5630            | LogicalPlan::Limit { input, .. }
5631            | LogicalPlan::Aggregate { input, .. }
5632            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
5633            LogicalPlan::CrossJoin { left, right } => {
5634                Self::is_traverse_target(left, variable)
5635                    || Self::is_traverse_target(right, variable)
5636            }
5637            _ => false,
5638        }
5639    }
5640
5641    /// Push a predicate into a Traverse's target_filter for the specified variable
5642    fn push_predicate_to_traverse(
5643        plan: LogicalPlan,
5644        variable: &str,
5645        predicate: Expr,
5646    ) -> LogicalPlan {
5647        match plan {
5648            LogicalPlan::Traverse {
5649                input,
5650                edge_type_ids,
5651                direction,
5652                source_variable,
5653                target_variable,
5654                target_label_id,
5655                step_variable,
5656                min_hops,
5657                max_hops,
5658                optional,
5659                target_filter,
5660                path_variable,
5661                edge_properties,
5662                is_variable_length,
5663                optional_pattern_vars,
5664                scope_match_variables,
5665                edge_filter_expr,
5666                path_mode,
5667                qpp_steps,
5668            } => {
5669                if target_variable == variable {
5670                    // Found the traverse producing this variable
5671                    let new_filter = match target_filter {
5672                        Some(existing) => Some(Expr::BinaryOp {
5673                            left: Box::new(existing),
5674                            op: BinaryOp::And,
5675                            right: Box::new(predicate),
5676                        }),
5677                        None => Some(predicate),
5678                    };
5679                    LogicalPlan::Traverse {
5680                        input,
5681                        edge_type_ids,
5682                        direction,
5683                        source_variable,
5684                        target_variable,
5685                        target_label_id,
5686                        step_variable,
5687                        min_hops,
5688                        max_hops,
5689                        optional,
5690                        target_filter: new_filter,
5691                        path_variable,
5692                        edge_properties,
5693                        is_variable_length,
5694                        optional_pattern_vars,
5695                        scope_match_variables,
5696                        edge_filter_expr,
5697                        path_mode,
5698                        qpp_steps,
5699                    }
5700                } else {
5701                    // Recurse into input
5702                    LogicalPlan::Traverse {
5703                        input: Box::new(Self::push_predicate_to_traverse(
5704                            *input, variable, predicate,
5705                        )),
5706                        edge_type_ids,
5707                        direction,
5708                        source_variable,
5709                        target_variable,
5710                        target_label_id,
5711                        step_variable,
5712                        min_hops,
5713                        max_hops,
5714                        optional,
5715                        target_filter,
5716                        path_variable,
5717                        edge_properties,
5718                        is_variable_length,
5719                        optional_pattern_vars,
5720                        scope_match_variables,
5721                        edge_filter_expr,
5722                        path_mode,
5723                        qpp_steps,
5724                    }
5725                }
5726            }
5727            LogicalPlan::Filter {
5728                input,
5729                predicate: p,
5730                optional_variables: opt_vars,
5731            } => LogicalPlan::Filter {
5732                input: Box::new(Self::push_predicate_to_traverse(
5733                    *input, variable, predicate,
5734                )),
5735                predicate: p,
5736                optional_variables: opt_vars,
5737            },
5738            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
5739                input: Box::new(Self::push_predicate_to_traverse(
5740                    *input, variable, predicate,
5741                )),
5742                projections,
5743            },
5744            LogicalPlan::CrossJoin { left, right } => {
5745                // Check which side has the variable
5746                if Self::is_traverse_target(&left, variable) {
5747                    LogicalPlan::CrossJoin {
5748                        left: Box::new(Self::push_predicate_to_traverse(
5749                            *left, variable, predicate,
5750                        )),
5751                        right,
5752                    }
5753                } else {
5754                    LogicalPlan::CrossJoin {
5755                        left,
5756                        right: Box::new(Self::push_predicate_to_traverse(
5757                            *right, variable, predicate,
5758                        )),
5759                    }
5760                }
5761            }
5762            other => other,
5763        }
5764    }
5765
5766    /// Plan a WITH clause, handling aggregations and projections.
5767    fn plan_with_clause(
5768        &self,
5769        with_clause: &WithClause,
5770        plan: LogicalPlan,
5771        vars_in_scope: &[VariableInfo],
5772    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
5773        let mut plan = plan;
5774        let mut group_by: Vec<Expr> = Vec::new();
5775        let mut aggregates: Vec<Expr> = Vec::new();
5776        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
5777        let mut has_agg = false;
5778        let mut projections = Vec::new();
5779        let mut new_vars: Vec<VariableInfo> = Vec::new();
5780        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
5781        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
5782        let mut projected_aliases: HashSet<String> = HashSet::new();
5783        let mut has_unaliased_non_variable_expr = false;
5784
5785        for item in &with_clause.items {
5786            match item {
5787                ReturnItem::All => {
5788                    // WITH * - add all variables in scope
5789                    for v in vars_in_scope {
5790                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
5791                        projected_aliases.insert(v.name.clone());
5792                        projected_simple_reprs.insert(v.name.clone());
5793                    }
5794                    new_vars.extend(vars_in_scope.iter().cloned());
5795                }
5796                ReturnItem::Expr { expr, alias, .. } => {
5797                    if matches!(expr, Expr::Wildcard) {
5798                        for v in vars_in_scope {
5799                            projections
5800                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
5801                            projected_aliases.insert(v.name.clone());
5802                            projected_simple_reprs.insert(v.name.clone());
5803                        }
5804                        new_vars.extend(vars_in_scope.iter().cloned());
5805                    } else {
5806                        // Validate expression variables and syntax
5807                        validate_expression_variables(expr, vars_in_scope)?;
5808                        validate_expression(expr, vars_in_scope)?;
5809                        // Pattern predicates are not allowed in WITH
5810                        if contains_pattern_predicate(expr) {
5811                            return Err(anyhow!(
5812                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
5813                            ));
5814                        }
5815
5816                        projections.push((expr.clone(), alias.clone()));
5817                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
5818                            // Bare aggregate — push directly
5819                            has_agg = true;
5820                            aggregates.push(expr.clone());
5821                            projected_aggregate_reprs.insert(expr.to_string_repr());
5822                        } else if !is_window_function(expr)
5823                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
5824                        {
5825                            // Compound aggregate or expression containing aggregates
5826                            has_agg = true;
5827                            compound_agg_exprs.push(expr.clone());
5828                            for inner in extract_inner_aggregates(expr) {
5829                                let repr = inner.to_string_repr();
5830                                if !projected_aggregate_reprs.contains(&repr) {
5831                                    aggregates.push(inner);
5832                                    projected_aggregate_reprs.insert(repr);
5833                                }
5834                            }
5835                        } else if !group_by.contains(expr) {
5836                            group_by.push(expr.clone());
5837                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
5838                                projected_simple_reprs.insert(expr.to_string_repr());
5839                            }
5840                        }
5841
5842                        // Preserve non-scalar type information when WITH aliases
5843                        // entity/path-capable expressions.
5844                        if let Some(a) = alias {
5845                            if projected_aliases.contains(a) {
5846                                return Err(anyhow!(
5847                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
5848                                    a
5849                                ));
5850                            }
5851                            let inferred = infer_with_output_type(expr, vars_in_scope);
5852                            new_vars.push(VariableInfo::new(a.clone(), inferred));
5853                            projected_aliases.insert(a.clone());
5854                        } else if let Expr::Variable(v) = expr {
5855                            if projected_aliases.contains(v) {
5856                                return Err(anyhow!(
5857                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
5858                                    v
5859                                ));
5860                            }
5861                            // Preserve the original type if the variable is just passed through
5862                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
5863                                new_vars.push(existing.clone());
5864                            } else {
5865                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
5866                            }
5867                            projected_aliases.insert(v.clone());
5868                        } else {
5869                            has_unaliased_non_variable_expr = true;
5870                        }
5871                    }
5872                }
5873            }
5874        }
5875
5876        // Collect extra variables that need to survive the projection stage
5877        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
5878        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
5879        let mut passthrough_extras: Vec<String> = Vec::new();
5880        let mut seen_passthrough: HashSet<String> = HashSet::new();
5881
5882        if let Some(predicate) = &with_clause.where_clause {
5883            for name in collect_expr_variables(predicate) {
5884                if !projected_names.contains(name.as_str())
5885                    && find_var_in_scope(vars_in_scope, &name).is_some()
5886                    && seen_passthrough.insert(name.clone())
5887                {
5888                    passthrough_extras.push(name);
5889                }
5890            }
5891        }
5892
5893        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
5894        // Carry those variables through the projection so Sort can resolve them.
5895        if !has_agg && let Some(order_by) = &with_clause.order_by {
5896            for item in order_by {
5897                for name in collect_expr_variables(&item.expr) {
5898                    if !projected_names.contains(name.as_str())
5899                        && find_var_in_scope(vars_in_scope, &name).is_some()
5900                        && seen_passthrough.insert(name.clone())
5901                    {
5902                        passthrough_extras.push(name);
5903                    }
5904                }
5905            }
5906        }
5907
5908        let needs_cleanup = !passthrough_extras.is_empty();
5909        for extra in &passthrough_extras {
5910            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
5911        }
5912
5913        // Validate compound aggregate expressions: non-aggregate refs must be
5914        // individually present in the group_by as simple variables or properties.
5915        if has_agg {
5916            let group_by_reprs: HashSet<String> =
5917                group_by.iter().map(|e| e.to_string_repr()).collect();
5918            for expr in &compound_agg_exprs {
5919                let mut refs = Vec::new();
5920                collect_non_aggregate_refs(expr, false, &mut refs);
5921                for r in &refs {
5922                    let is_covered = match r {
5923                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
5924                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
5925                    };
5926                    if !is_covered {
5927                        return Err(anyhow!(
5928                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
5929                        ));
5930                    }
5931                }
5932            }
5933        }
5934
5935        if has_agg {
5936            plan = LogicalPlan::Aggregate {
5937                input: Box::new(plan),
5938                group_by,
5939                aggregates,
5940            };
5941
5942            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
5943            // can reference the WITH aliases instead of raw column names.
5944            let rename_projections: Vec<(Expr, Option<String>)> = projections
5945                .iter()
5946                .map(|(expr, alias)| {
5947                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
5948                        // Bare aggregate — reference by column name
5949                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
5950                    } else if is_compound_aggregate(expr)
5951                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
5952                    {
5953                        // Compound aggregate — replace inner aggregates with
5954                        // column references, keep outer expression
5955                        (replace_aggregates_with_columns(expr), alias.clone())
5956                    } else {
5957                        (Expr::Variable(expr.to_string_repr()), alias.clone())
5958                    }
5959                })
5960                .collect();
5961            plan = LogicalPlan::Project {
5962                input: Box::new(plan),
5963                projections: rename_projections,
5964            };
5965        } else if !projections.is_empty() {
5966            plan = LogicalPlan::Project {
5967                input: Box::new(plan),
5968                projections: projections.clone(),
5969            };
5970        }
5971
5972        // Apply the WHERE filter (post-projection, with extras still visible).
5973        if let Some(predicate) = &with_clause.where_clause {
5974            plan = LogicalPlan::Filter {
5975                input: Box::new(plan),
5976                predicate: predicate.clone(),
5977                optional_variables: HashSet::new(),
5978            };
5979        }
5980
5981        // Validate and apply ORDER BY for WITH clause.
5982        // Keep pre-WITH vars in scope for parser compatibility, then apply
5983        // stricter checks for aggregate-containing ORDER BY items.
5984        if let Some(order_by) = &with_clause.order_by {
5985            // Build a mapping from aliases and projected expression reprs to
5986            // output columns of the preceding Project/Aggregate pipeline.
5987            let with_order_aliases: HashMap<String, Expr> = projections
5988                .iter()
5989                .flat_map(|(expr, alias)| {
5990                    let output_col = if let Some(a) = alias {
5991                        a.clone()
5992                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
5993                        aggregate_column_name(expr)
5994                    } else {
5995                        expr.to_string_repr()
5996                    };
5997
5998                    let mut entries = Vec::new();
5999                    // ORDER BY alias
6000                    if let Some(a) = alias {
6001                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
6002                    }
6003                    // ORDER BY projected expression (e.g. me.age)
6004                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
6005                    entries
6006                })
6007                .collect();
6008
6009            let order_by_scope: Vec<VariableInfo> = {
6010                let mut scope = new_vars.clone();
6011                for v in vars_in_scope {
6012                    if !is_var_in_scope(&scope, &v.name) {
6013                        scope.push(v.clone());
6014                    }
6015                }
6016                scope
6017            };
6018            for item in order_by {
6019                validate_expression_variables(&item.expr, &order_by_scope)?;
6020                validate_expression(&item.expr, &order_by_scope)?;
6021                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
6022                if has_aggregate_in_item && !has_agg {
6023                    return Err(anyhow!(
6024                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
6025                    ));
6026                }
6027                if has_agg && has_aggregate_in_item {
6028                    validate_with_order_by_aggregate_item(
6029                        &item.expr,
6030                        &projected_aggregate_reprs,
6031                        &projected_simple_reprs,
6032                        &projected_aliases,
6033                    )?;
6034                }
6035            }
6036            let rewritten_order_by: Vec<SortItem> = order_by
6037                .iter()
6038                .map(|item| {
6039                    let mut expr =
6040                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
6041                    if has_agg {
6042                        // Rewrite any aggregate calls to the aggregate output
6043                        // columns produced by Aggregate.
6044                        expr = replace_aggregates_with_columns(&expr);
6045                        // Then re-map projected property expressions to aliases
6046                        // from the WITH projection.
6047                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
6048                    }
6049                    SortItem {
6050                        expr,
6051                        ascending: item.ascending,
6052                    }
6053                })
6054                .collect();
6055            plan = LogicalPlan::Sort {
6056                input: Box::new(plan),
6057                order_by: rewritten_order_by,
6058            };
6059        }
6060
6061        // Non-variable expressions in WITH must be aliased.
6062        // This check is intentionally placed after ORDER BY validation so
6063        // higher-priority semantic errors (e.g., ambiguous aggregation in
6064        // ORDER BY) can surface first.
6065        if has_unaliased_non_variable_expr {
6066            return Err(anyhow!(
6067                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
6068            ));
6069        }
6070
6071        // Validate and apply SKIP/LIMIT for WITH clause
6072        let skip = with_clause
6073            .skip
6074            .as_ref()
6075            .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
6076            .transpose()?
6077            .flatten();
6078        let fetch = with_clause
6079            .limit
6080            .as_ref()
6081            .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
6082            .transpose()?
6083            .flatten();
6084
6085        if skip.is_some() || fetch.is_some() {
6086            plan = LogicalPlan::Limit {
6087                input: Box::new(plan),
6088                skip,
6089                fetch,
6090            };
6091        }
6092
6093        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
6094        if needs_cleanup {
6095            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
6096                .iter()
6097                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
6098                .collect();
6099            plan = LogicalPlan::Project {
6100                input: Box::new(plan),
6101                projections: cleanup_projections,
6102            };
6103        }
6104
6105        if with_clause.distinct {
6106            plan = LogicalPlan::Distinct {
6107                input: Box::new(plan),
6108            };
6109        }
6110
6111        Ok((plan, new_vars))
6112    }
6113
6114    fn plan_with_recursive(
6115        &self,
6116        with_recursive: &WithRecursiveClause,
6117        _prev_plan: LogicalPlan,
6118        vars_in_scope: &[VariableInfo],
6119    ) -> Result<LogicalPlan> {
6120        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
6121        match &*with_recursive.query {
6122            Query::Union { left, right, .. } => {
6123                // Plan the anchor (initial) query with current scope
6124                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
6125
6126                // Plan the recursive query with the CTE name added to scope
6127                // so it can reference itself
6128                let mut recursive_scope = vars_in_scope.to_vec();
6129                recursive_scope.push(VariableInfo::new(
6130                    with_recursive.name.clone(),
6131                    VariableType::Scalar,
6132                ));
6133                let recursive_plan =
6134                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
6135
6136                Ok(LogicalPlan::RecursiveCTE {
6137                    cte_name: with_recursive.name.clone(),
6138                    initial: Box::new(initial_plan),
6139                    recursive: Box::new(recursive_plan),
6140                })
6141            }
6142            _ => Err(anyhow::anyhow!(
6143                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
6144            )),
6145        }
6146    }
6147
6148    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
6149        let entries = match properties {
6150            Some(Expr::Map(entries)) => entries,
6151            _ => return None,
6152        };
6153
6154        if entries.is_empty() {
6155            return None;
6156        }
6157        let mut final_expr = None;
6158        for (prop, val_expr) in entries {
6159            let eq_expr = Expr::BinaryOp {
6160                left: Box::new(Expr::Property(
6161                    Box::new(Expr::Variable(variable.to_string())),
6162                    prop.clone(),
6163                )),
6164                op: BinaryOp::Eq,
6165                right: Box::new(val_expr.clone()),
6166            };
6167
6168            if let Some(e) = final_expr {
6169                final_expr = Some(Expr::BinaryOp {
6170                    left: Box::new(e),
6171                    op: BinaryOp::And,
6172                    right: Box::new(eq_expr),
6173                });
6174            } else {
6175                final_expr = Some(eq_expr);
6176            }
6177        }
6178        final_expr
6179    }
6180
6181    /// Build a filter expression from node properties and labels.
6182    ///
6183    /// This is used for TraverseMainByType where we need to filter target nodes
6184    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
6185    pub fn node_filter_expr(
6186        &self,
6187        variable: &str,
6188        labels: &[String],
6189        properties: &Option<Expr>,
6190    ) -> Option<Expr> {
6191        let mut final_expr = None;
6192
6193        // Add label checks using hasLabel(variable, 'label')
6194        for label in labels {
6195            let label_check = Expr::FunctionCall {
6196                name: "hasLabel".to_string(),
6197                args: vec![
6198                    Expr::Variable(variable.to_string()),
6199                    Expr::Literal(CypherLiteral::String(label.clone())),
6200                ],
6201                distinct: false,
6202                window_spec: None,
6203            };
6204
6205            final_expr = match final_expr {
6206                Some(e) => Some(Expr::BinaryOp {
6207                    left: Box::new(e),
6208                    op: BinaryOp::And,
6209                    right: Box::new(label_check),
6210                }),
6211                None => Some(label_check),
6212            };
6213        }
6214
6215        // Add property checks
6216        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
6217            final_expr = match final_expr {
6218                Some(e) => Some(Expr::BinaryOp {
6219                    left: Box::new(e),
6220                    op: BinaryOp::And,
6221                    right: Box::new(prop_expr),
6222                }),
6223                None => Some(prop_expr),
6224            };
6225        }
6226
6227        final_expr
6228    }
6229
6230    /// Create a filter plan that ensures traversed target matches a bound variable.
6231    ///
6232    /// Used in EXISTS subquery patterns where the target is already bound.
6233    /// Compares the target's VID against the bound variable's VID.
6234    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
6235        // Compare the traverse-discovered target's VID against the bound variable's VID.
6236        // Left side: Property access on the variable from current scope.
6237        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
6238        // We use Variable("{var}._vid") to access the VID column from the traverse output,
6239        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
6240        let bound_check = Expr::BinaryOp {
6241            left: Box::new(Expr::Property(
6242                Box::new(Expr::Variable(target_variable.to_string())),
6243                "_vid".to_string(),
6244            )),
6245            op: BinaryOp::Eq,
6246            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
6247        };
6248        LogicalPlan::Filter {
6249            input: Box::new(plan),
6250            predicate: bound_check,
6251            optional_variables: HashSet::new(),
6252        }
6253    }
6254
6255    /// Replace a Scan node matching the variable with a VectorKnn node
6256    fn replace_scan_with_knn(
6257        plan: LogicalPlan,
6258        variable: &str,
6259        property: &str,
6260        query: Expr,
6261        threshold: Option<f32>,
6262    ) -> LogicalPlan {
6263        match plan {
6264            LogicalPlan::Scan {
6265                label_id,
6266                labels,
6267                variable: scan_var,
6268                filter,
6269                optional,
6270            } => {
6271                if scan_var == variable {
6272                    // Inject any existing scan filter into VectorKnn?
6273                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
6274                    // Typically filter is applied post-Knn or during Knn if supported.
6275                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
6276                    // If `filter` is present on Scan, it must be preserved.
6277                    // We can wrap VectorKnn in Filter if Scan had filter.
6278
6279                    let knn = LogicalPlan::VectorKnn {
6280                        label_id,
6281                        variable: variable.to_string(),
6282                        property: property.to_string(),
6283                        query,
6284                        k: 100, // Default K, should push down LIMIT
6285                        threshold,
6286                    };
6287
6288                    if let Some(f) = filter {
6289                        LogicalPlan::Filter {
6290                            input: Box::new(knn),
6291                            predicate: f,
6292                            optional_variables: HashSet::new(),
6293                        }
6294                    } else {
6295                        knn
6296                    }
6297                } else {
6298                    LogicalPlan::Scan {
6299                        label_id,
6300                        labels,
6301                        variable: scan_var,
6302                        filter,
6303                        optional,
6304                    }
6305                }
6306            }
6307            LogicalPlan::Filter {
6308                input,
6309                predicate,
6310                optional_variables,
6311            } => LogicalPlan::Filter {
6312                input: Box::new(Self::replace_scan_with_knn(
6313                    *input, variable, property, query, threshold,
6314                )),
6315                predicate,
6316                optional_variables,
6317            },
6318            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6319                input: Box::new(Self::replace_scan_with_knn(
6320                    *input, variable, property, query, threshold,
6321                )),
6322                projections,
6323            },
6324            LogicalPlan::Limit { input, skip, fetch } => {
6325                // If we encounter Limit, we should ideally push K down to VectorKnn
6326                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
6327                // Limit comes later.
6328                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
6329                LogicalPlan::Limit {
6330                    input: Box::new(Self::replace_scan_with_knn(
6331                        *input, variable, property, query, threshold,
6332                    )),
6333                    skip,
6334                    fetch,
6335                }
6336            }
6337            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6338                left: Box::new(Self::replace_scan_with_knn(
6339                    *left,
6340                    variable,
6341                    property,
6342                    query.clone(),
6343                    threshold,
6344                )),
6345                right: Box::new(Self::replace_scan_with_knn(
6346                    *right, variable, property, query, threshold,
6347                )),
6348            },
6349            other => other,
6350        }
6351    }
6352
6353    /// Find the label_id for a Scan node matching the given variable
6354    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
6355        match plan {
6356            LogicalPlan::Scan {
6357                label_id,
6358                variable: var,
6359                ..
6360            } if var == variable => Some(*label_id),
6361            LogicalPlan::Filter { input, .. }
6362            | LogicalPlan::Project { input, .. }
6363            | LogicalPlan::Sort { input, .. }
6364            | LogicalPlan::Limit { input, .. }
6365            | LogicalPlan::Aggregate { input, .. }
6366            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
6367            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
6368                .or_else(|| Self::find_scan_label_id(right, variable)),
6369            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
6370            _ => None,
6371        }
6372    }
6373
6374    /// Push a predicate into a Scan's filter for the specified variable
6375    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
6376        match plan {
6377            LogicalPlan::Scan {
6378                label_id,
6379                labels,
6380                variable: var,
6381                filter,
6382                optional,
6383            } if var == variable => {
6384                // Merge the predicate with existing filter
6385                let new_filter = match filter {
6386                    Some(existing) => Some(Expr::BinaryOp {
6387                        left: Box::new(existing),
6388                        op: BinaryOp::And,
6389                        right: Box::new(predicate),
6390                    }),
6391                    None => Some(predicate),
6392                };
6393                LogicalPlan::Scan {
6394                    label_id,
6395                    labels,
6396                    variable: var,
6397                    filter: new_filter,
6398                    optional,
6399                }
6400            }
6401            LogicalPlan::Filter {
6402                input,
6403                predicate: p,
6404                optional_variables: opt_vars,
6405            } => LogicalPlan::Filter {
6406                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6407                predicate: p,
6408                optional_variables: opt_vars,
6409            },
6410            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6411                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6412                projections,
6413            },
6414            LogicalPlan::CrossJoin { left, right } => {
6415                // Check which side has the variable
6416                if Self::find_scan_label_id(&left, variable).is_some() {
6417                    LogicalPlan::CrossJoin {
6418                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
6419                        right,
6420                    }
6421                } else {
6422                    LogicalPlan::CrossJoin {
6423                        left,
6424                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
6425                    }
6426                }
6427            }
6428            LogicalPlan::Traverse {
6429                input,
6430                edge_type_ids,
6431                direction,
6432                source_variable,
6433                target_variable,
6434                target_label_id,
6435                step_variable,
6436                min_hops,
6437                max_hops,
6438                optional,
6439                target_filter,
6440                path_variable,
6441                edge_properties,
6442                is_variable_length,
6443                optional_pattern_vars,
6444                scope_match_variables,
6445                edge_filter_expr,
6446                path_mode,
6447                qpp_steps,
6448            } => LogicalPlan::Traverse {
6449                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6450                edge_type_ids,
6451                direction,
6452                source_variable,
6453                target_variable,
6454                target_label_id,
6455                step_variable,
6456                min_hops,
6457                max_hops,
6458                optional,
6459                target_filter,
6460                path_variable,
6461                edge_properties,
6462                is_variable_length,
6463                optional_pattern_vars,
6464                scope_match_variables,
6465                edge_filter_expr,
6466                path_mode,
6467                qpp_steps,
6468            },
6469            other => other,
6470        }
6471    }
6472
6473    /// Extract predicates that reference only the specified variable
6474    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
6475        let analyzer = PredicateAnalyzer::new();
6476        let analysis = analyzer.analyze(predicate, variable);
6477
6478        // Return pushable predicates and combined residual
6479        let residual = if analysis.residual.is_empty() {
6480            None
6481        } else {
6482            let mut iter = analysis.residual.into_iter();
6483            let first = iter.next().unwrap();
6484            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
6485                left: Box::new(acc),
6486                op: BinaryOp::And,
6487                right: Box::new(e),
6488            }))
6489        };
6490
6491        (analysis.pushable, residual)
6492    }
6493
6494    // =====================================================================
6495    // Apply Predicate Pushdown - Helper Functions
6496    // =====================================================================
6497
6498    /// Split AND-connected predicates into a list.
6499    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
6500        match expr {
6501            Expr::BinaryOp {
6502                left,
6503                op: BinaryOp::And,
6504                right,
6505            } => {
6506                let mut result = Self::split_and_conjuncts(left);
6507                result.extend(Self::split_and_conjuncts(right));
6508                result
6509            }
6510            _ => vec![expr.clone()],
6511        }
6512    }
6513
6514    /// Combine predicates with AND.
6515    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
6516        if predicates.is_empty() {
6517            return None;
6518        }
6519        let mut result = predicates[0].clone();
6520        for pred in predicates.iter().skip(1) {
6521            result = Expr::BinaryOp {
6522                left: Box::new(result),
6523                op: BinaryOp::And,
6524                right: Box::new(pred.clone()),
6525            };
6526        }
6527        Some(result)
6528    }
6529
6530    /// Collect all variable names referenced in an expression.
6531    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
6532        let mut vars = HashSet::new();
6533        Self::collect_expr_variables_impl(expr, &mut vars);
6534        vars
6535    }
6536
6537    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
6538        match expr {
6539            Expr::Variable(name) => {
6540                vars.insert(name.clone());
6541            }
6542            Expr::Property(inner, _) => {
6543                if let Expr::Variable(name) = inner.as_ref() {
6544                    vars.insert(name.clone());
6545                } else {
6546                    Self::collect_expr_variables_impl(inner, vars);
6547                }
6548            }
6549            Expr::BinaryOp { left, right, .. } => {
6550                Self::collect_expr_variables_impl(left, vars);
6551                Self::collect_expr_variables_impl(right, vars);
6552            }
6553            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
6554            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
6555            Expr::FunctionCall { args, .. } => {
6556                for arg in args {
6557                    Self::collect_expr_variables_impl(arg, vars);
6558                }
6559            }
6560            Expr::List(items) => {
6561                for item in items {
6562                    Self::collect_expr_variables_impl(item, vars);
6563                }
6564            }
6565            Expr::Case {
6566                expr,
6567                when_then,
6568                else_expr,
6569            } => {
6570                if let Some(e) = expr {
6571                    Self::collect_expr_variables_impl(e, vars);
6572                }
6573                for (w, t) in when_then {
6574                    Self::collect_expr_variables_impl(w, vars);
6575                    Self::collect_expr_variables_impl(t, vars);
6576                }
6577                if let Some(e) = else_expr {
6578                    Self::collect_expr_variables_impl(e, vars);
6579                }
6580            }
6581            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
6582            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
6583            // they introduce local variable bindings not in outer scope.
6584            _ => {}
6585        }
6586    }
6587
6588    /// Collect all variables produced by a logical plan.
6589    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
6590        let mut vars = HashSet::new();
6591        Self::collect_plan_variables_impl(plan, &mut vars);
6592        vars
6593    }
6594
6595    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
6596        match plan {
6597            LogicalPlan::Scan { variable, .. } => {
6598                vars.insert(variable.clone());
6599            }
6600            LogicalPlan::Traverse {
6601                target_variable,
6602                step_variable,
6603                input,
6604                path_variable,
6605                ..
6606            } => {
6607                vars.insert(target_variable.clone());
6608                if let Some(sv) = step_variable {
6609                    vars.insert(sv.clone());
6610                }
6611                if let Some(pv) = path_variable {
6612                    vars.insert(pv.clone());
6613                }
6614                Self::collect_plan_variables_impl(input, vars);
6615            }
6616            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
6617            LogicalPlan::Project { input, projections } => {
6618                for (expr, alias) in projections {
6619                    if let Some(a) = alias {
6620                        vars.insert(a.clone());
6621                    } else if let Expr::Variable(v) = expr {
6622                        vars.insert(v.clone());
6623                    }
6624                }
6625                Self::collect_plan_variables_impl(input, vars);
6626            }
6627            LogicalPlan::Apply {
6628                input, subquery, ..
6629            } => {
6630                Self::collect_plan_variables_impl(input, vars);
6631                Self::collect_plan_variables_impl(subquery, vars);
6632            }
6633            LogicalPlan::CrossJoin { left, right } => {
6634                Self::collect_plan_variables_impl(left, vars);
6635                Self::collect_plan_variables_impl(right, vars);
6636            }
6637            LogicalPlan::Unwind {
6638                input, variable, ..
6639            } => {
6640                vars.insert(variable.clone());
6641                Self::collect_plan_variables_impl(input, vars);
6642            }
6643            LogicalPlan::Aggregate { input, .. } => {
6644                Self::collect_plan_variables_impl(input, vars);
6645            }
6646            LogicalPlan::Distinct { input } => {
6647                Self::collect_plan_variables_impl(input, vars);
6648            }
6649            LogicalPlan::Sort { input, .. } => {
6650                Self::collect_plan_variables_impl(input, vars);
6651            }
6652            LogicalPlan::Limit { input, .. } => {
6653                Self::collect_plan_variables_impl(input, vars);
6654            }
6655            LogicalPlan::VectorKnn { variable, .. } => {
6656                vars.insert(variable.clone());
6657            }
6658            LogicalPlan::ProcedureCall { yield_items, .. } => {
6659                for (name, alias) in yield_items {
6660                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
6661                }
6662            }
6663            LogicalPlan::ShortestPath {
6664                input,
6665                path_variable,
6666                ..
6667            } => {
6668                vars.insert(path_variable.clone());
6669                Self::collect_plan_variables_impl(input, vars);
6670            }
6671            LogicalPlan::AllShortestPaths {
6672                input,
6673                path_variable,
6674                ..
6675            } => {
6676                vars.insert(path_variable.clone());
6677                Self::collect_plan_variables_impl(input, vars);
6678            }
6679            LogicalPlan::RecursiveCTE {
6680                initial, recursive, ..
6681            } => {
6682                Self::collect_plan_variables_impl(initial, vars);
6683                Self::collect_plan_variables_impl(recursive, vars);
6684            }
6685            LogicalPlan::SubqueryCall {
6686                input, subquery, ..
6687            } => {
6688                Self::collect_plan_variables_impl(input, vars);
6689                Self::collect_plan_variables_impl(subquery, vars);
6690            }
6691            _ => {}
6692        }
6693    }
6694
6695    /// Extract predicates that only reference variables from Apply's input.
6696    /// Returns (input_only_predicates, remaining_predicates).
6697    fn extract_apply_input_predicates(
6698        predicate: &Expr,
6699        input_variables: &HashSet<String>,
6700        subquery_new_variables: &HashSet<String>,
6701    ) -> (Vec<Expr>, Vec<Expr>) {
6702        let conjuncts = Self::split_and_conjuncts(predicate);
6703        let mut input_preds = Vec::new();
6704        let mut remaining = Vec::new();
6705
6706        for conj in conjuncts {
6707            let vars = Self::collect_expr_variables(&conj);
6708
6709            // Predicate only references input variables (none from subquery)
6710            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
6711            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
6712
6713            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
6714                input_preds.push(conj);
6715            } else {
6716                remaining.push(conj);
6717            }
6718        }
6719
6720        (input_preds, remaining)
6721    }
6722
6723    /// Push eligible predicates into Apply.input_filter.
6724    /// This filters input rows BEFORE executing the correlated subquery.
6725    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
6726        match plan {
6727            LogicalPlan::Apply {
6728                input,
6729                subquery,
6730                input_filter,
6731            } => {
6732                // Collect variables from input plan
6733                let input_vars = Self::collect_plan_variables(&input);
6734
6735                // Collect NEW variables introduced by subquery (not in input)
6736                let subquery_vars = Self::collect_plan_variables(&subquery);
6737                let new_subquery_vars: HashSet<String> =
6738                    subquery_vars.difference(&input_vars).cloned().collect();
6739
6740                // Extract predicates that only reference input variables
6741                let (input_preds, remaining) = Self::extract_apply_input_predicates(
6742                    current_predicate,
6743                    &input_vars,
6744                    &new_subquery_vars,
6745                );
6746
6747                // Update current_predicate to only remaining predicates
6748                *current_predicate = if remaining.is_empty() {
6749                    Expr::TRUE
6750                } else {
6751                    Self::combine_predicates(remaining).unwrap()
6752                };
6753
6754                // Combine extracted predicates with existing input_filter
6755                let new_input_filter = if input_preds.is_empty() {
6756                    input_filter
6757                } else {
6758                    let extracted = Self::combine_predicates(input_preds).unwrap();
6759                    match input_filter {
6760                        Some(existing) => Some(Expr::BinaryOp {
6761                            left: Box::new(existing),
6762                            op: BinaryOp::And,
6763                            right: Box::new(extracted),
6764                        }),
6765                        None => Some(extracted),
6766                    }
6767                };
6768
6769                // Recurse into input plan
6770                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
6771
6772                LogicalPlan::Apply {
6773                    input: Box::new(new_input),
6774                    subquery,
6775                    input_filter: new_input_filter,
6776                }
6777            }
6778            // Recurse into other plan nodes
6779            LogicalPlan::Filter {
6780                input,
6781                predicate,
6782                optional_variables,
6783            } => LogicalPlan::Filter {
6784                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6785                predicate,
6786                optional_variables,
6787            },
6788            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6789                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6790                projections,
6791            },
6792            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
6793                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6794                order_by,
6795            },
6796            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
6797                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6798                skip,
6799                fetch,
6800            },
6801            LogicalPlan::Aggregate {
6802                input,
6803                group_by,
6804                aggregates,
6805            } => LogicalPlan::Aggregate {
6806                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6807                group_by,
6808                aggregates,
6809            },
6810            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6811                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
6812                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
6813            },
6814            LogicalPlan::Traverse {
6815                input,
6816                edge_type_ids,
6817                direction,
6818                source_variable,
6819                target_variable,
6820                target_label_id,
6821                step_variable,
6822                min_hops,
6823                max_hops,
6824                optional,
6825                target_filter,
6826                path_variable,
6827                edge_properties,
6828                is_variable_length,
6829                optional_pattern_vars,
6830                scope_match_variables,
6831                edge_filter_expr,
6832                path_mode,
6833                qpp_steps,
6834            } => LogicalPlan::Traverse {
6835                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6836                edge_type_ids,
6837                direction,
6838                source_variable,
6839                target_variable,
6840                target_label_id,
6841                step_variable,
6842                min_hops,
6843                max_hops,
6844                optional,
6845                target_filter,
6846                path_variable,
6847                edge_properties,
6848                is_variable_length,
6849                optional_pattern_vars,
6850                scope_match_variables,
6851                edge_filter_expr,
6852                path_mode,
6853                qpp_steps,
6854            },
6855            other => other,
6856        }
6857    }
6858}
6859
6860/// Get the expected column name for an aggregate expression.
6861///
6862/// This is the single source of truth for aggregate column naming, used by:
6863/// - Logical planner (to create column references)
6864/// - Physical planner (to rename DataFusion's auto-generated column names)
6865/// - Fallback executor (to name result columns)
6866pub fn aggregate_column_name(expr: &Expr) -> String {
6867    expr.to_string_repr()
6868}
6869
6870/// Output produced by `EXPLAIN` — a human-readable plan with index and cost info.
6871#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6872pub struct ExplainOutput {
6873    /// Debug-formatted logical plan tree.
6874    pub plan_text: String,
6875    /// Index availability report for each scan in the plan.
6876    pub index_usage: Vec<IndexUsage>,
6877    /// Rough row and cost estimates for the full plan.
6878    pub cost_estimates: CostEstimates,
6879    /// Planner warnings (e.g., missing index, forced full scan).
6880    pub warnings: Vec<String>,
6881    /// Suggested indexes that would improve this query.
6882    pub suggestions: Vec<IndexSuggestion>,
6883}
6884
6885/// Suggestion for creating an index to improve query performance.
6886#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6887pub struct IndexSuggestion {
6888    /// Label or edge type that would benefit from the index.
6889    pub label_or_type: String,
6890    /// Property to index.
6891    pub property: String,
6892    /// Recommended index type (e.g., `"SCALAR"`, `"VECTOR"`).
6893    pub index_type: String,
6894    /// Human-readable explanation of the performance benefit.
6895    pub reason: String,
6896    /// Ready-to-execute Cypher statement to create the index.
6897    pub create_statement: String,
6898}
6899
6900/// Index availability report for a single scan operator.
6901#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6902pub struct IndexUsage {
6903    pub label_or_type: String,
6904    pub property: String,
6905    pub index_type: String,
6906    /// Whether the index was actually used for this scan.
6907    pub used: bool,
6908    /// Human-readable explanation of why the index was or was not used.
6909    pub reason: Option<String>,
6910}
6911
6912/// Rough cost and row count estimates for a complete logical plan.
6913#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6914pub struct CostEstimates {
6915    /// Estimated number of rows the plan will produce.
6916    pub estimated_rows: f64,
6917    /// Abstract cost units (lower is cheaper).
6918    pub estimated_cost: f64,
6919}
6920
6921impl QueryPlanner {
6922    /// Plan a query and produce an EXPLAIN report (plan text, index usage, costs).
6923    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
6924        let plan = self.plan(ast)?;
6925        self.explain_logical_plan(&plan)
6926    }
6927
6928    /// Produce an EXPLAIN report for an already-planned logical plan.
6929    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
6930        let index_usage = self.analyze_index_usage(plan)?;
6931        let cost_estimates = self.estimate_costs(plan)?;
6932        let suggestions = self.collect_index_suggestions(plan);
6933        let warnings = Vec::new();
6934        let plan_text = format!("{:#?}", plan);
6935
6936        Ok(ExplainOutput {
6937            plan_text,
6938            index_usage,
6939            cost_estimates,
6940            warnings,
6941            suggestions,
6942        })
6943    }
6944
6945    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
6946        let mut usage = Vec::new();
6947        self.collect_index_usage(plan, &mut usage);
6948        Ok(usage)
6949    }
6950
6951    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
6952        match plan {
6953            LogicalPlan::Scan { .. } => {
6954                // Placeholder: Scan might use index if it was optimized
6955                // Ideally LogicalPlan::Scan should store if it uses index.
6956                // But typically Planner converts Scan to specific index scan or we infer it here.
6957            }
6958            LogicalPlan::VectorKnn {
6959                label_id, property, ..
6960            } => {
6961                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
6962                usage.push(IndexUsage {
6963                    label_or_type: label_name.to_string(),
6964                    property: property.clone(),
6965                    index_type: "VECTOR".to_string(),
6966                    used: true,
6967                    reason: None,
6968                });
6969            }
6970            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
6971            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
6972            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
6973            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
6974            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
6975            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
6976            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
6977            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
6978                self.collect_index_usage(left, usage);
6979                self.collect_index_usage(right, usage);
6980            }
6981            _ => {}
6982        }
6983    }
6984
6985    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
6986        Ok(CostEstimates {
6987            estimated_rows: 100.0,
6988            estimated_cost: 10.0,
6989        })
6990    }
6991
6992    /// Collect index suggestions based on query patterns.
6993    ///
6994    /// Currently detects:
6995    /// - Temporal predicates from `uni.validAt()` function calls
6996    /// - Temporal predicates from `VALID_AT` macro expansion
6997    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
6998        let mut suggestions = Vec::new();
6999        self.collect_temporal_suggestions(plan, &mut suggestions);
7000        suggestions
7001    }
7002
7003    /// Recursively collect temporal index suggestions from the plan.
7004    fn collect_temporal_suggestions(
7005        &self,
7006        plan: &LogicalPlan,
7007        suggestions: &mut Vec<IndexSuggestion>,
7008    ) {
7009        match plan {
7010            LogicalPlan::Filter {
7011                input, predicate, ..
7012            } => {
7013                // Check for temporal patterns in the predicate
7014                self.detect_temporal_pattern(predicate, suggestions);
7015                // Recurse into input
7016                self.collect_temporal_suggestions(input, suggestions);
7017            }
7018            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
7019            LogicalPlan::Project { input, .. } => {
7020                self.collect_temporal_suggestions(input, suggestions)
7021            }
7022            LogicalPlan::Limit { input, .. } => {
7023                self.collect_temporal_suggestions(input, suggestions)
7024            }
7025            LogicalPlan::Sort { input, .. } => {
7026                self.collect_temporal_suggestions(input, suggestions)
7027            }
7028            LogicalPlan::Aggregate { input, .. } => {
7029                self.collect_temporal_suggestions(input, suggestions)
7030            }
7031            LogicalPlan::Traverse { input, .. } => {
7032                self.collect_temporal_suggestions(input, suggestions)
7033            }
7034            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
7035                self.collect_temporal_suggestions(left, suggestions);
7036                self.collect_temporal_suggestions(right, suggestions);
7037            }
7038            _ => {}
7039        }
7040    }
7041
7042    /// Detect temporal predicate patterns and suggest indexes.
7043    ///
7044    /// Detects two patterns:
7045    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
7046    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
7047    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
7048        match expr {
7049            // Pattern 1: uni.temporal.validAt() function call
7050            Expr::FunctionCall { name, args, .. }
7051                if name.eq_ignore_ascii_case("uni.temporal.validAt")
7052                    || name.eq_ignore_ascii_case("validAt") =>
7053            {
7054                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
7055                if args.len() >= 2 {
7056                    let start_prop =
7057                        if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1) {
7058                            s.clone()
7059                        } else {
7060                            "valid_from".to_string()
7061                        };
7062
7063                    // Try to extract label from the node expression
7064                    if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
7065                        self.suggest_temporal_index(&var, &start_prop, suggestions);
7066                    }
7067                }
7068            }
7069
7070            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
7071            Expr::BinaryOp {
7072                left,
7073                op: BinaryOp::And,
7074                right,
7075            } => {
7076                // Check left side for `prop <= time` pattern (temporal start condition)
7077                if let Expr::BinaryOp {
7078                    left: prop_expr,
7079                    op: BinaryOp::LtEq,
7080                    ..
7081                } = left.as_ref()
7082                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
7083                    && (prop_name == "valid_from"
7084                        || prop_name.contains("start")
7085                        || prop_name.contains("from")
7086                        || prop_name.contains("begin"))
7087                    && let Some(var) = base.extract_variable()
7088                {
7089                    self.suggest_temporal_index(&var, prop_name, suggestions);
7090                }
7091
7092                // Recurse into both sides of AND
7093                self.detect_temporal_pattern(left.as_ref(), suggestions);
7094                self.detect_temporal_pattern(right.as_ref(), suggestions);
7095            }
7096
7097            // Recurse into other binary ops
7098            Expr::BinaryOp { left, right, .. } => {
7099                self.detect_temporal_pattern(left.as_ref(), suggestions);
7100                self.detect_temporal_pattern(right.as_ref(), suggestions);
7101            }
7102
7103            _ => {}
7104        }
7105    }
7106
7107    /// Suggest a scalar index for a temporal property if one doesn't already exist.
7108    fn suggest_temporal_index(
7109        &self,
7110        _variable: &str,
7111        property: &str,
7112        suggestions: &mut Vec<IndexSuggestion>,
7113    ) {
7114        // Check if a scalar index already exists for this property
7115        // We need to check all labels since we may not know the exact label from the variable
7116        let mut has_index = false;
7117
7118        for index in &self.schema.indexes {
7119            if let IndexDefinition::Scalar(config) = index
7120                && config.properties.contains(&property.to_string())
7121            {
7122                has_index = true;
7123                break;
7124            }
7125        }
7126
7127        if !has_index {
7128            // Avoid duplicate suggestions
7129            let already_suggested = suggestions.iter().any(|s| s.property == property);
7130            if !already_suggested {
7131                suggestions.push(IndexSuggestion {
7132                    label_or_type: "(detected from temporal query)".to_string(),
7133                    property: property.to_string(),
7134                    index_type: "SCALAR (BTree)".to_string(),
7135                    reason: format!(
7136                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
7137                        property
7138                    ),
7139                    create_statement: format!(
7140                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
7141                        property, property
7142                    ),
7143                });
7144            }
7145        }
7146    }
7147
7148    /// Helper functions for expression normalization
7149    /// Normalize an expression for storage: strip variable prefixes
7150    /// For simple property: u.email -> "email"
7151    /// For expressions: lower(u.email) -> "lower(email)"
7152    fn normalize_expression_for_storage(expr: &Expr) -> String {
7153        match expr {
7154            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
7155            _ => {
7156                // Serialize expression and strip variable prefix
7157                let expr_str = expr.to_string_repr();
7158                Self::strip_variable_prefix(&expr_str)
7159            }
7160        }
7161    }
7162
7163    /// Strip variable references like "u.prop" from expression strings
7164    /// Converts "lower(u.email)" to "lower(email)"
7165    fn strip_variable_prefix(expr_str: &str) -> String {
7166        use regex::Regex;
7167        // Match patterns like "word.property" and replace with just "property"
7168        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
7169        re.replace_all(expr_str, "$1").to_string()
7170    }
7171
7172    /// Plan a schema command from the new AST
7173    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
7174        match cmd {
7175            SchemaCommand::CreateVectorIndex(c) => {
7176                // Parse index type from options (default: IvfPq)
7177                let index_type = if let Some(type_val) = c.options.get("type") {
7178                    match type_val.as_str() {
7179                        Some("hnsw") => VectorIndexType::Hnsw {
7180                            m: 16,
7181                            ef_construction: 200,
7182                            ef_search: 100,
7183                        },
7184                        Some("flat") => VectorIndexType::Flat,
7185                        _ => VectorIndexType::IvfPq {
7186                            num_partitions: 256,
7187                            num_sub_vectors: 16,
7188                            bits_per_subvector: 8,
7189                        },
7190                    }
7191                } else {
7192                    VectorIndexType::IvfPq {
7193                        num_partitions: 256,
7194                        num_sub_vectors: 16,
7195                        bits_per_subvector: 8,
7196                    }
7197                };
7198
7199                // Parse embedding config from options
7200                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
7201                    Self::parse_embedding_config(emb_val)?
7202                } else {
7203                    None
7204                };
7205
7206                let config = VectorIndexConfig {
7207                    name: c.name,
7208                    label: c.label,
7209                    property: c.property,
7210                    metric: DistanceMetric::Cosine,
7211                    index_type,
7212                    embedding_config,
7213                    metadata: Default::default(),
7214                };
7215                Ok(LogicalPlan::CreateVectorIndex {
7216                    config,
7217                    if_not_exists: c.if_not_exists,
7218                })
7219            }
7220            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
7221                config: FullTextIndexConfig {
7222                    name: cfg.name,
7223                    label: cfg.label,
7224                    properties: cfg.properties,
7225                    tokenizer: TokenizerConfig::Standard,
7226                    with_positions: true,
7227                    metadata: Default::default(),
7228                },
7229                if_not_exists: cfg.if_not_exists,
7230            }),
7231            SchemaCommand::CreateScalarIndex(cfg) => {
7232                // Convert expressions to storage strings (strip variable prefix)
7233                let properties: Vec<String> = cfg
7234                    .expressions
7235                    .iter()
7236                    .map(Self::normalize_expression_for_storage)
7237                    .collect();
7238
7239                Ok(LogicalPlan::CreateScalarIndex {
7240                    config: ScalarIndexConfig {
7241                        name: cfg.name,
7242                        label: cfg.label,
7243                        properties,
7244                        index_type: ScalarIndexType::BTree,
7245                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
7246                        metadata: Default::default(),
7247                    },
7248                    if_not_exists: cfg.if_not_exists,
7249                })
7250            }
7251            SchemaCommand::CreateJsonFtsIndex(cfg) => {
7252                let with_positions = cfg
7253                    .options
7254                    .get("with_positions")
7255                    .and_then(|v| v.as_bool())
7256                    .unwrap_or(false);
7257                Ok(LogicalPlan::CreateJsonFtsIndex {
7258                    config: JsonFtsIndexConfig {
7259                        name: cfg.name,
7260                        label: cfg.label,
7261                        column: cfg.column,
7262                        paths: Vec::new(),
7263                        with_positions,
7264                        metadata: Default::default(),
7265                    },
7266                    if_not_exists: cfg.if_not_exists,
7267                })
7268            }
7269            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
7270                name: drop.name,
7271                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
7272            }),
7273            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
7274            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
7275            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
7276            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
7277            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
7278            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
7279            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
7280            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
7281            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
7282            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
7283            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
7284            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
7285            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
7286            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
7287            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
7288            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
7289                destination: path,
7290                options: HashMap::new(),
7291            }),
7292            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
7293                label: cmd.label,
7294                path: cmd.path,
7295                format: cmd.format,
7296                options: cmd.options,
7297            }),
7298            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
7299                label: cmd.label,
7300                path: cmd.path,
7301                format: cmd.format,
7302                options: cmd.options,
7303            }),
7304        }
7305    }
7306
7307    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
7308        let obj = emb_val
7309            .as_object()
7310            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
7311
7312        // Parse alias (required)
7313        let alias = obj
7314            .get("alias")
7315            .and_then(|v| v.as_str())
7316            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
7317
7318        // Parse source properties (required)
7319        let source_properties = obj
7320            .get("source")
7321            .and_then(|v| v.as_array())
7322            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
7323            .iter()
7324            .filter_map(|v| v.as_str().map(|s| s.to_string()))
7325            .collect::<Vec<_>>();
7326
7327        if source_properties.is_empty() {
7328            return Err(anyhow!(
7329                "embedding.source must contain at least one property"
7330            ));
7331        }
7332
7333        let batch_size = obj
7334            .get("batch_size")
7335            .and_then(|v| v.as_u64())
7336            .map(|v| v as usize)
7337            .unwrap_or(32);
7338
7339        Ok(Some(EmbeddingConfig {
7340            alias: alias.to_string(),
7341            source_properties,
7342            batch_size,
7343        }))
7344    }
7345}
7346
7347/// Collect all properties referenced anywhere in the LogicalPlan tree.
7348///
7349/// This is critical for window functions: properties must be materialized
7350/// at the Scan node so they're available for window operations later.
7351///
7352/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
7353pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
7354    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
7355    collect_properties_recursive(plan, &mut properties);
7356    properties
7357}
7358
7359/// Recursively walk the LogicalPlan tree and collect all property references.
7360fn collect_properties_recursive(
7361    plan: &LogicalPlan,
7362    properties: &mut HashMap<String, HashSet<String>>,
7363) {
7364    match plan {
7365        LogicalPlan::Window {
7366            input,
7367            window_exprs,
7368        } => {
7369            // Collect from window expressions
7370            for expr in window_exprs {
7371                collect_properties_from_expr_into(expr, properties);
7372            }
7373            collect_properties_recursive(input, properties);
7374        }
7375        LogicalPlan::Project { input, projections } => {
7376            for (expr, _alias) in projections {
7377                collect_properties_from_expr_into(expr, properties);
7378            }
7379            collect_properties_recursive(input, properties);
7380        }
7381        LogicalPlan::Sort { input, order_by } => {
7382            for sort_item in order_by {
7383                collect_properties_from_expr_into(&sort_item.expr, properties);
7384            }
7385            collect_properties_recursive(input, properties);
7386        }
7387        LogicalPlan::Filter {
7388            input, predicate, ..
7389        } => {
7390            collect_properties_from_expr_into(predicate, properties);
7391            collect_properties_recursive(input, properties);
7392        }
7393        LogicalPlan::Aggregate {
7394            input,
7395            group_by,
7396            aggregates,
7397        } => {
7398            for expr in group_by {
7399                collect_properties_from_expr_into(expr, properties);
7400            }
7401            for expr in aggregates {
7402                collect_properties_from_expr_into(expr, properties);
7403            }
7404            collect_properties_recursive(input, properties);
7405        }
7406        LogicalPlan::Scan {
7407            filter: Some(expr), ..
7408        } => {
7409            collect_properties_from_expr_into(expr, properties);
7410        }
7411        LogicalPlan::Scan { filter: None, .. } => {}
7412        LogicalPlan::ExtIdLookup {
7413            filter: Some(expr), ..
7414        } => {
7415            collect_properties_from_expr_into(expr, properties);
7416        }
7417        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
7418        LogicalPlan::ScanAll {
7419            filter: Some(expr), ..
7420        } => {
7421            collect_properties_from_expr_into(expr, properties);
7422        }
7423        LogicalPlan::ScanAll { filter: None, .. } => {}
7424        LogicalPlan::ScanMainByLabels {
7425            filter: Some(expr), ..
7426        } => {
7427            collect_properties_from_expr_into(expr, properties);
7428        }
7429        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
7430        LogicalPlan::TraverseMainByType {
7431            input,
7432            target_filter,
7433            ..
7434        } => {
7435            if let Some(expr) = target_filter {
7436                collect_properties_from_expr_into(expr, properties);
7437            }
7438            collect_properties_recursive(input, properties);
7439        }
7440        LogicalPlan::Traverse {
7441            input,
7442            target_filter,
7443            step_variable: _,
7444            ..
7445        } => {
7446            if let Some(expr) = target_filter {
7447                collect_properties_from_expr_into(expr, properties);
7448            }
7449            // Note: Edge properties (step_variable) will be collected from expressions
7450            // that reference them. The edge_properties field in LogicalPlan is populated
7451            // later during physical planning based on this collected map.
7452            collect_properties_recursive(input, properties);
7453        }
7454        LogicalPlan::Unwind { input, expr, .. } => {
7455            collect_properties_from_expr_into(expr, properties);
7456            collect_properties_recursive(input, properties);
7457        }
7458        LogicalPlan::Create { input, pattern } => {
7459            // Mark variables referenced in CREATE patterns with "*" so plan_scan
7460            // adds structural projections (bare entity columns). Without this,
7461            // execute_create_pattern() can't find bound variables and creates
7462            // spurious new nodes instead of using existing MATCH'd ones.
7463            mark_pattern_variables(pattern, properties);
7464            collect_properties_recursive(input, properties);
7465        }
7466        LogicalPlan::CreateBatch { input, patterns } => {
7467            for pattern in patterns {
7468                mark_pattern_variables(pattern, properties);
7469            }
7470            collect_properties_recursive(input, properties);
7471        }
7472        LogicalPlan::Merge {
7473            input,
7474            pattern,
7475            on_match,
7476            on_create,
7477        } => {
7478            mark_pattern_variables(pattern, properties);
7479            if let Some(set_clause) = on_match {
7480                mark_set_item_variables(&set_clause.items, properties);
7481            }
7482            if let Some(set_clause) = on_create {
7483                mark_set_item_variables(&set_clause.items, properties);
7484            }
7485            collect_properties_recursive(input, properties);
7486        }
7487        LogicalPlan::Set { input, items } => {
7488            mark_set_item_variables(items, properties);
7489            collect_properties_recursive(input, properties);
7490        }
7491        LogicalPlan::Remove { input, items } => {
7492            for item in items {
7493                match item {
7494                    RemoveItem::Property(expr) => {
7495                        // REMOVE n.prop — collect the property and mark the variable
7496                        // with "*" so full structural projection is applied.
7497                        collect_properties_from_expr_into(expr, properties);
7498                        if let Expr::Property(base, _) = expr
7499                            && let Expr::Variable(var) = base.as_ref()
7500                        {
7501                            properties
7502                                .entry(var.clone())
7503                                .or_default()
7504                                .insert("*".to_string());
7505                        }
7506                    }
7507                    RemoveItem::Labels { variable, .. } => {
7508                        // REMOVE n:Label — mark n with "*"
7509                        properties
7510                            .entry(variable.clone())
7511                            .or_default()
7512                            .insert("*".to_string());
7513                    }
7514                }
7515            }
7516            collect_properties_recursive(input, properties);
7517        }
7518        LogicalPlan::Delete { input, items, .. } => {
7519            for expr in items {
7520                collect_properties_from_expr_into(expr, properties);
7521            }
7522            collect_properties_recursive(input, properties);
7523        }
7524        LogicalPlan::Foreach {
7525            input, list, body, ..
7526        } => {
7527            collect_properties_from_expr_into(list, properties);
7528            for plan in body {
7529                collect_properties_recursive(plan, properties);
7530            }
7531            collect_properties_recursive(input, properties);
7532        }
7533        LogicalPlan::Limit { input, .. } => {
7534            collect_properties_recursive(input, properties);
7535        }
7536        LogicalPlan::CrossJoin { left, right } => {
7537            collect_properties_recursive(left, properties);
7538            collect_properties_recursive(right, properties);
7539        }
7540        LogicalPlan::Apply {
7541            input,
7542            subquery,
7543            input_filter,
7544        } => {
7545            if let Some(expr) = input_filter {
7546                collect_properties_from_expr_into(expr, properties);
7547            }
7548            collect_properties_recursive(input, properties);
7549            collect_properties_recursive(subquery, properties);
7550        }
7551        LogicalPlan::Union { left, right, .. } => {
7552            collect_properties_recursive(left, properties);
7553            collect_properties_recursive(right, properties);
7554        }
7555        LogicalPlan::RecursiveCTE {
7556            initial, recursive, ..
7557        } => {
7558            collect_properties_recursive(initial, properties);
7559            collect_properties_recursive(recursive, properties);
7560        }
7561        LogicalPlan::ProcedureCall { arguments, .. } => {
7562            for arg in arguments {
7563                collect_properties_from_expr_into(arg, properties);
7564            }
7565        }
7566        LogicalPlan::VectorKnn { query, .. } => {
7567            collect_properties_from_expr_into(query, properties);
7568        }
7569        LogicalPlan::InvertedIndexLookup { terms, .. } => {
7570            collect_properties_from_expr_into(terms, properties);
7571        }
7572        LogicalPlan::ShortestPath { input, .. } => {
7573            collect_properties_recursive(input, properties);
7574        }
7575        LogicalPlan::AllShortestPaths { input, .. } => {
7576            collect_properties_recursive(input, properties);
7577        }
7578        LogicalPlan::Distinct { input } => {
7579            collect_properties_recursive(input, properties);
7580        }
7581        LogicalPlan::QuantifiedPattern {
7582            input,
7583            pattern_plan,
7584            ..
7585        } => {
7586            collect_properties_recursive(input, properties);
7587            collect_properties_recursive(pattern_plan, properties);
7588        }
7589        LogicalPlan::BindZeroLengthPath { input, .. } => {
7590            collect_properties_recursive(input, properties);
7591        }
7592        LogicalPlan::BindPath { input, .. } => {
7593            collect_properties_recursive(input, properties);
7594        }
7595        LogicalPlan::SubqueryCall { input, subquery } => {
7596            collect_properties_recursive(input, properties);
7597            collect_properties_recursive(subquery, properties);
7598        }
7599        LogicalPlan::LocyProject {
7600            input, projections, ..
7601        } => {
7602            for (expr, _alias) in projections {
7603                match expr {
7604                    // Bare variable in LocyProject: only need _vid for node variables
7605                    // (plan_locy_project extracts VID directly). Adding "*" would create
7606                    // a structural Struct column that conflicts with derived scan columns.
7607                    Expr::Variable(name) if !name.contains('.') => {
7608                        properties
7609                            .entry(name.clone())
7610                            .or_default()
7611                            .insert("_vid".to_string());
7612                    }
7613                    _ => collect_properties_from_expr_into(expr, properties),
7614                }
7615            }
7616            collect_properties_recursive(input, properties);
7617        }
7618        LogicalPlan::LocyFold {
7619            input,
7620            fold_bindings,
7621            ..
7622        } => {
7623            for (_name, expr) in fold_bindings {
7624                collect_properties_from_expr_into(expr, properties);
7625            }
7626            collect_properties_recursive(input, properties);
7627        }
7628        LogicalPlan::LocyBestBy {
7629            input, criteria, ..
7630        } => {
7631            for (expr, _asc) in criteria {
7632                collect_properties_from_expr_into(expr, properties);
7633            }
7634            collect_properties_recursive(input, properties);
7635        }
7636        LogicalPlan::LocyPriority { input, .. } => {
7637            collect_properties_recursive(input, properties);
7638        }
7639        // DDL and other plans don't reference properties
7640        _ => {}
7641    }
7642}
7643
7644/// Mark target variables from SET items with "*" and collect value expressions.
7645fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
7646    for item in items {
7647        match item {
7648            SetItem::Property { expr, value } => {
7649                // SET n.prop = val — mark n via the property expr, collect from value.
7650                // Also mark the variable with "*" for full structural projection so
7651                // edge identity fields (_src/_dst) are available for write operations.
7652                collect_properties_from_expr_into(expr, properties);
7653                collect_properties_from_expr_into(value, properties);
7654                if let Expr::Property(base, _) = expr
7655                    && let Expr::Variable(var) = base.as_ref()
7656                {
7657                    properties
7658                        .entry(var.clone())
7659                        .or_default()
7660                        .insert("*".to_string());
7661                }
7662            }
7663            SetItem::Labels { variable, .. } => {
7664                // SET n:Label — need full access to n
7665                properties
7666                    .entry(variable.clone())
7667                    .or_default()
7668                    .insert("*".to_string());
7669            }
7670            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
7671                // SET n = {props} or SET n += {props}
7672                properties
7673                    .entry(variable.clone())
7674                    .or_default()
7675                    .insert("*".to_string());
7676                collect_properties_from_expr_into(value, properties);
7677            }
7678        }
7679    }
7680}
7681
7682/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
7683/// adds structural projections (bare entity Struct columns) for them.
7684/// This is needed so that execute_create_pattern() can find bound variables
7685/// in the row HashMap and reuse existing nodes instead of creating new ones.
7686fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
7687    for path in &pattern.paths {
7688        if let Some(ref v) = path.variable {
7689            properties
7690                .entry(v.clone())
7691                .or_default()
7692                .insert("*".to_string());
7693        }
7694        for element in &path.elements {
7695            match element {
7696                PatternElement::Node(n) => {
7697                    if let Some(ref v) = n.variable {
7698                        properties
7699                            .entry(v.clone())
7700                            .or_default()
7701                            .insert("*".to_string());
7702                    }
7703                    // Also collect properties from inline property expressions
7704                    if let Some(ref props) = n.properties {
7705                        collect_properties_from_expr_into(props, properties);
7706                    }
7707                }
7708                PatternElement::Relationship(r) => {
7709                    if let Some(ref v) = r.variable {
7710                        properties
7711                            .entry(v.clone())
7712                            .or_default()
7713                            .insert("*".to_string());
7714                    }
7715                    if let Some(ref props) = r.properties {
7716                        collect_properties_from_expr_into(props, properties);
7717                    }
7718                }
7719                PatternElement::Parenthesized { pattern, .. } => {
7720                    let sub = Pattern {
7721                        paths: vec![pattern.as_ref().clone()],
7722                    };
7723                    mark_pattern_variables(&sub, properties);
7724                }
7725            }
7726        }
7727    }
7728}
7729
7730/// Collect properties from an expression into a HashMap.
7731fn collect_properties_from_expr_into(
7732    expr: &Expr,
7733    properties: &mut HashMap<String, HashSet<String>>,
7734) {
7735    match expr {
7736        Expr::PatternComprehension {
7737            where_clause,
7738            map_expr,
7739            ..
7740        } => {
7741            // Collect properties from the WHERE clause and map expression.
7742            // The pattern itself creates local bindings that don't need
7743            // property collection from the outer scope.
7744            if let Some(where_expr) = where_clause {
7745                collect_properties_from_expr_into(where_expr, properties);
7746            }
7747            collect_properties_from_expr_into(map_expr, properties);
7748        }
7749        Expr::Variable(name) => {
7750            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
7751            if let Some((var, prop)) = name.split_once('.') {
7752                properties
7753                    .entry(var.to_string())
7754                    .or_default()
7755                    .insert(prop.to_string());
7756            } else {
7757                // Bare variable (e.g., RETURN n) — needs all properties materialized
7758                properties
7759                    .entry(name.clone())
7760                    .or_default()
7761                    .insert("*".to_string());
7762            }
7763        }
7764        Expr::Property(base, name) => {
7765            // Extract variable name from the base expression
7766            if let Expr::Variable(var) = base.as_ref() {
7767                properties
7768                    .entry(var.clone())
7769                    .or_default()
7770                    .insert(name.clone());
7771                // Don't recurse into Variable — that would mark it as a bare
7772                // variable reference (adding "*") when it's just a property base.
7773            } else {
7774                // Recurse for complex base expressions (nested property, function call, etc.)
7775                collect_properties_from_expr_into(base, properties);
7776            }
7777        }
7778        Expr::BinaryOp { left, right, .. } => {
7779            collect_properties_from_expr_into(left, properties);
7780            collect_properties_from_expr_into(right, properties);
7781        }
7782        Expr::FunctionCall {
7783            name,
7784            args,
7785            window_spec,
7786            ..
7787        } => {
7788            // Analyze function for property requirements (pushdown hydration)
7789            analyze_function_property_requirements(name, args, properties);
7790
7791            // Collect from arguments
7792            for arg in args {
7793                collect_properties_from_expr_into(arg, properties);
7794            }
7795
7796            // Collect from window spec (PARTITION BY, ORDER BY)
7797            if let Some(spec) = window_spec {
7798                for part_expr in &spec.partition_by {
7799                    collect_properties_from_expr_into(part_expr, properties);
7800                }
7801                for sort_item in &spec.order_by {
7802                    collect_properties_from_expr_into(&sort_item.expr, properties);
7803                }
7804            }
7805        }
7806        Expr::UnaryOp { expr, .. } => {
7807            collect_properties_from_expr_into(expr, properties);
7808        }
7809        Expr::List(items) => {
7810            for item in items {
7811                collect_properties_from_expr_into(item, properties);
7812            }
7813        }
7814        Expr::Map(entries) => {
7815            for (_key, value) in entries {
7816                collect_properties_from_expr_into(value, properties);
7817            }
7818        }
7819        Expr::ListComprehension {
7820            list,
7821            where_clause,
7822            map_expr,
7823            ..
7824        } => {
7825            collect_properties_from_expr_into(list, properties);
7826            if let Some(where_expr) = where_clause {
7827                collect_properties_from_expr_into(where_expr, properties);
7828            }
7829            collect_properties_from_expr_into(map_expr, properties);
7830        }
7831        Expr::Case {
7832            expr,
7833            when_then,
7834            else_expr,
7835        } => {
7836            if let Some(scrutinee_expr) = expr {
7837                collect_properties_from_expr_into(scrutinee_expr, properties);
7838            }
7839            for (when, then) in when_then {
7840                collect_properties_from_expr_into(when, properties);
7841                collect_properties_from_expr_into(then, properties);
7842            }
7843            if let Some(default_expr) = else_expr {
7844                collect_properties_from_expr_into(default_expr, properties);
7845            }
7846        }
7847        Expr::Quantifier {
7848            list, predicate, ..
7849        } => {
7850            collect_properties_from_expr_into(list, properties);
7851            collect_properties_from_expr_into(predicate, properties);
7852        }
7853        Expr::Reduce {
7854            init, list, expr, ..
7855        } => {
7856            collect_properties_from_expr_into(init, properties);
7857            collect_properties_from_expr_into(list, properties);
7858            collect_properties_from_expr_into(expr, properties);
7859        }
7860        Expr::Exists { query, .. } => {
7861            // Walk into EXISTS body to collect property references for outer-scope variables.
7862            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
7863            // are included in the outer scan's property list. Extra properties collected for
7864            // inner-only variables are harmless — the outer scan ignores unknown variable names.
7865            collect_properties_from_subquery(query, properties);
7866        }
7867        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
7868            collect_properties_from_subquery(query, properties);
7869        }
7870        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
7871            collect_properties_from_expr_into(expr, properties);
7872        }
7873        Expr::In { expr, list } => {
7874            collect_properties_from_expr_into(expr, properties);
7875            collect_properties_from_expr_into(list, properties);
7876        }
7877        Expr::ArrayIndex { array, index } => {
7878            if let Expr::Variable(var) = array.as_ref() {
7879                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
7880                    // Static string key: e['name'] → only need that specific property
7881                    properties
7882                        .entry(var.clone())
7883                        .or_default()
7884                        .insert(prop_name.clone());
7885                } else {
7886                    // Dynamic property access: e[prop] → need all properties
7887                    properties
7888                        .entry(var.clone())
7889                        .or_default()
7890                        .insert("*".to_string());
7891                }
7892            }
7893            collect_properties_from_expr_into(array, properties);
7894            collect_properties_from_expr_into(index, properties);
7895        }
7896        Expr::ArraySlice { array, start, end } => {
7897            collect_properties_from_expr_into(array, properties);
7898            if let Some(start_expr) = start {
7899                collect_properties_from_expr_into(start_expr, properties);
7900            }
7901            if let Some(end_expr) = end {
7902                collect_properties_from_expr_into(end_expr, properties);
7903            }
7904        }
7905        Expr::ValidAt {
7906            entity,
7907            timestamp,
7908            start_prop,
7909            end_prop,
7910        } => {
7911            // Extract property requirements from ValidAt expression
7912            if let Expr::Variable(var) = entity.as_ref() {
7913                if let Some(prop) = start_prop {
7914                    properties
7915                        .entry(var.clone())
7916                        .or_default()
7917                        .insert(prop.clone());
7918                }
7919                if let Some(prop) = end_prop {
7920                    properties
7921                        .entry(var.clone())
7922                        .or_default()
7923                        .insert(prop.clone());
7924                }
7925            }
7926            collect_properties_from_expr_into(entity, properties);
7927            collect_properties_from_expr_into(timestamp, properties);
7928        }
7929        Expr::MapProjection { base, items } => {
7930            collect_properties_from_expr_into(base, properties);
7931            for item in items {
7932                match item {
7933                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
7934                        if let Expr::Variable(var) = base.as_ref() {
7935                            properties
7936                                .entry(var.clone())
7937                                .or_default()
7938                                .insert(prop.clone());
7939                        }
7940                    }
7941                    uni_cypher::ast::MapProjectionItem::AllProperties => {
7942                        if let Expr::Variable(var) = base.as_ref() {
7943                            properties
7944                                .entry(var.clone())
7945                                .or_default()
7946                                .insert("*".to_string());
7947                        }
7948                    }
7949                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
7950                        collect_properties_from_expr_into(expr, properties);
7951                    }
7952                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
7953                }
7954            }
7955        }
7956        Expr::LabelCheck { expr, .. } => {
7957            collect_properties_from_expr_into(expr, properties);
7958        }
7959        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
7960        // Mark them with "*" so the outer scan produces structural projections that
7961        // extract_row_params can resolve.
7962        Expr::Parameter(name) => {
7963            properties
7964                .entry(name.clone())
7965                .or_default()
7966                .insert("*".to_string());
7967        }
7968        // Literals and wildcard don't reference properties
7969        Expr::Literal(_) | Expr::Wildcard => {}
7970    }
7971}
7972
7973/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
7974///
7975/// This is needed so that correlated property accesses like `a.city` inside
7976/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
7977/// scan to include `a.city` in its projected columns.
7978fn collect_properties_from_subquery(
7979    query: &Query,
7980    properties: &mut HashMap<String, HashSet<String>>,
7981) {
7982    match query {
7983        Query::Single(stmt) => {
7984            for clause in &stmt.clauses {
7985                match clause {
7986                    Clause::Match(m) => {
7987                        if let Some(ref wc) = m.where_clause {
7988                            collect_properties_from_expr_into(wc, properties);
7989                        }
7990                    }
7991                    Clause::With(w) => {
7992                        for item in &w.items {
7993                            if let ReturnItem::Expr { expr, .. } = item {
7994                                collect_properties_from_expr_into(expr, properties);
7995                            }
7996                        }
7997                        if let Some(ref wc) = w.where_clause {
7998                            collect_properties_from_expr_into(wc, properties);
7999                        }
8000                    }
8001                    Clause::Return(r) => {
8002                        for item in &r.items {
8003                            if let ReturnItem::Expr { expr, .. } = item {
8004                                collect_properties_from_expr_into(expr, properties);
8005                            }
8006                        }
8007                    }
8008                    _ => {}
8009                }
8010            }
8011        }
8012        Query::Union { left, right, .. } => {
8013            collect_properties_from_subquery(left, properties);
8014            collect_properties_from_subquery(right, properties);
8015        }
8016        _ => {}
8017    }
8018}
8019
8020/// Analyze function calls to extract property requirements for pushdown hydration
8021///
8022/// This function examines function calls and their arguments to determine which properties
8023/// need to be loaded for entity arguments. For example:
8024/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
8025/// - keys(n) -> n needs all properties (*)
8026///
8027/// The extracted requirements are added to the properties map for later use during
8028/// scan planning.
8029fn analyze_function_property_requirements(
8030    name: &str,
8031    args: &[Expr],
8032    properties: &mut HashMap<String, HashSet<String>>,
8033) {
8034    use crate::query::function_props::get_function_spec;
8035
8036    /// Helper to mark a variable as needing all properties.
8037    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
8038        properties
8039            .entry(var.to_string())
8040            .or_default()
8041            .insert("*".to_string());
8042    }
8043
8044    let Some(spec) = get_function_spec(name) else {
8045        // Unknown function: conservatively require all properties for variable args
8046        for arg in args {
8047            if let Expr::Variable(var) = arg {
8048                mark_wildcard(var, properties);
8049            }
8050        }
8051        return;
8052    };
8053
8054    // Extract property names from string literal arguments
8055    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
8056        let entity_arg = args.get(entity_arg_idx);
8057        let prop_arg = args.get(prop_arg_idx);
8058
8059        match (entity_arg, prop_arg) {
8060            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
8061                properties
8062                    .entry(var.clone())
8063                    .or_default()
8064                    .insert(prop.clone());
8065            }
8066            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
8067                // Parameter property name: need all properties
8068                mark_wildcard(var, properties);
8069            }
8070            _ => {}
8071        }
8072    }
8073
8074    // Handle full entity requirement (keys(), properties())
8075    if spec.needs_full_entity {
8076        for &idx in spec.entity_args {
8077            if let Some(Expr::Variable(var)) = args.get(idx) {
8078                mark_wildcard(var, properties);
8079            }
8080        }
8081    }
8082}
8083
8084#[cfg(test)]
8085mod pushdown_tests {
8086    use super::*;
8087
8088    #[test]
8089    fn test_validat_extracts_property_names() {
8090        // validAt(e, 'start', 'end', ts) → e: {start, end}
8091        let mut properties = HashMap::new();
8092
8093        let args = vec![
8094            Expr::Variable("e".to_string()),
8095            Expr::Literal(CypherLiteral::String("start".to_string())),
8096            Expr::Literal(CypherLiteral::String("end".to_string())),
8097            Expr::Variable("ts".to_string()),
8098        ];
8099
8100        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
8101
8102        assert!(properties.contains_key("e"));
8103        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
8104            .iter()
8105            .cloned()
8106            .collect();
8107        assert_eq!(properties.get("e").unwrap(), &e_props);
8108    }
8109
8110    #[test]
8111    fn test_keys_requires_wildcard() {
8112        // keys(n) → n: {*}
8113        let mut properties = HashMap::new();
8114
8115        let args = vec![Expr::Variable("n".to_string())];
8116
8117        analyze_function_property_requirements("keys", &args, &mut properties);
8118
8119        assert!(properties.contains_key("n"));
8120        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8121        assert_eq!(properties.get("n").unwrap(), &n_props);
8122    }
8123
8124    #[test]
8125    fn test_properties_requires_wildcard() {
8126        // properties(n) → n: {*}
8127        let mut properties = HashMap::new();
8128
8129        let args = vec![Expr::Variable("n".to_string())];
8130
8131        analyze_function_property_requirements("properties", &args, &mut properties);
8132
8133        assert!(properties.contains_key("n"));
8134        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8135        assert_eq!(properties.get("n").unwrap(), &n_props);
8136    }
8137
8138    #[test]
8139    fn test_unknown_function_conservative() {
8140        // customUdf(e) → e: {*}
8141        let mut properties = HashMap::new();
8142
8143        let args = vec![Expr::Variable("e".to_string())];
8144
8145        analyze_function_property_requirements("customUdf", &args, &mut properties);
8146
8147        assert!(properties.contains_key("e"));
8148        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8149        assert_eq!(properties.get("e").unwrap(), &e_props);
8150    }
8151
8152    #[test]
8153    fn test_parameter_property_name() {
8154        // validAt(e, $start, $end, ts) → e: {*}
8155        let mut properties = HashMap::new();
8156
8157        let args = vec![
8158            Expr::Variable("e".to_string()),
8159            Expr::Parameter("start".to_string()),
8160            Expr::Parameter("end".to_string()),
8161            Expr::Variable("ts".to_string()),
8162        ];
8163
8164        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
8165
8166        assert!(properties.contains_key("e"));
8167        assert!(properties.get("e").unwrap().contains("*"));
8168    }
8169
8170    #[test]
8171    fn test_validat_expr_extracts_properties() {
8172        // Test Expr::ValidAt variant property extraction
8173        let mut properties = HashMap::new();
8174
8175        let validat_expr = Expr::ValidAt {
8176            entity: Box::new(Expr::Variable("e".to_string())),
8177            timestamp: Box::new(Expr::Variable("ts".to_string())),
8178            start_prop: Some("valid_from".to_string()),
8179            end_prop: Some("valid_to".to_string()),
8180        };
8181
8182        collect_properties_from_expr_into(&validat_expr, &mut properties);
8183
8184        assert!(properties.contains_key("e"));
8185        assert!(properties.get("e").unwrap().contains("valid_from"));
8186        assert!(properties.get("e").unwrap().contains("valid_to"));
8187    }
8188
8189    #[test]
8190    fn test_array_index_requires_wildcard() {
8191        // e[prop] → e: {*}
8192        let mut properties = HashMap::new();
8193
8194        let array_index_expr = Expr::ArrayIndex {
8195            array: Box::new(Expr::Variable("e".to_string())),
8196            index: Box::new(Expr::Variable("prop".to_string())),
8197        };
8198
8199        collect_properties_from_expr_into(&array_index_expr, &mut properties);
8200
8201        assert!(properties.contains_key("e"));
8202        assert!(properties.get("e").unwrap().contains("*"));
8203    }
8204
8205    #[test]
8206    fn test_property_access_extraction() {
8207        // e.name → e: {name}
8208        let mut properties = HashMap::new();
8209
8210        let prop_access = Expr::Property(
8211            Box::new(Expr::Variable("e".to_string())),
8212            "name".to_string(),
8213        );
8214
8215        collect_properties_from_expr_into(&prop_access, &mut properties);
8216
8217        assert!(properties.contains_key("e"));
8218        assert!(properties.get("e").unwrap().contains("name"));
8219    }
8220}