Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::PredicateAnalyzer;
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig,
14    ScalarIndexConfig, ScalarIndexType, Schema, TokenizerConfig, VectorIndexConfig,
15    VectorIndexType,
16};
17use uni_cypher::ast::{
18    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
19    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
20    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
21    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
22    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
23    WithRecursiveClause,
24};
25
26/// Type of variable in scope for semantic validation.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum VariableType {
29    /// Node variable (from MATCH (n), CREATE (n), etc.)
30    Node,
31    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
32    Edge,
33    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
34    Path,
35    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
36    /// Could hold a map or dynamic value — property access is allowed.
37    Scalar,
38    /// Scalar from a known non-graph literal (int, float, bool, string, list).
39    /// Property access is NOT allowed on these at compile time.
40    ScalarLiteral,
41    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
42    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
43    Imported,
44}
45
46impl VariableType {
47    /// Returns true if this type is compatible with the expected type.
48    ///
49    /// `Imported` is always compatible because the actual type is unknown at plan time.
50    fn is_compatible_with(self, expected: VariableType) -> bool {
51        self == expected
52            || self == VariableType::Imported
53            // ScalarLiteral behaves like Scalar for compatibility checks
54            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
55    }
56}
57
58/// Information about a variable in scope during planning.
59#[derive(Debug, Clone)]
60pub struct VariableInfo {
61    /// Variable name as written in the query.
62    pub name: String,
63    /// Semantic type of the variable.
64    pub var_type: VariableType,
65    /// True if this is a variable-length path (VLP) step variable.
66    ///
67    /// VLP step variables are typed as Edge but semantically hold edge lists.
68    pub is_vlp: bool,
69}
70
71impl VariableInfo {
72    pub fn new(name: String, var_type: VariableType) -> Self {
73        Self {
74            name,
75            var_type,
76            is_vlp: false,
77        }
78    }
79}
80
81/// Find a variable in scope by name.
82fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
83    vars.iter().find(|v| v.name == name)
84}
85
86/// Check if a variable is in scope.
87fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
88    find_var_in_scope(vars, name).is_some()
89}
90
91/// Check if an expression contains a pattern predicate.
92fn contains_pattern_predicate(expr: &Expr) -> bool {
93    if matches!(
94        expr,
95        Expr::Exists {
96            from_pattern_predicate: true,
97            ..
98        }
99    ) {
100        return true;
101    }
102    let mut found = false;
103    expr.for_each_child(&mut |child| {
104        if !found {
105            found = contains_pattern_predicate(child);
106        }
107    });
108    found
109}
110
111/// Add a variable to scope with type conflict validation.
112/// Returns an error if the variable already exists with a different type.
113fn add_var_to_scope(
114    vars: &mut Vec<VariableInfo>,
115    name: &str,
116    var_type: VariableType,
117) -> Result<()> {
118    if name.is_empty() {
119        return Ok(());
120    }
121
122    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
123        if existing.var_type == VariableType::Imported {
124            // Imported vars upgrade to the concrete type
125            existing.var_type = var_type;
126        } else if var_type == VariableType::Imported || existing.var_type == var_type {
127            // New type is Imported (keep existing) or same type — no conflict
128        } else if matches!(
129            existing.var_type,
130            VariableType::Scalar | VariableType::ScalarLiteral
131        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
132        {
133            // Scalar can be used as Node/Edge in CREATE context — a scalar
134            // holding a node/edge reference is valid for pattern use
135            existing.var_type = var_type;
136        } else {
137            return Err(anyhow!(
138                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
139                name,
140                existing.var_type,
141                var_type
142            ));
143        }
144    } else {
145        vars.push(VariableInfo::new(name.to_string(), var_type));
146    }
147    Ok(())
148}
149
150/// Convert VariableInfo vec to String vec for backward compatibility
151fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
152    vars.iter().map(|v| v.name.clone()).collect()
153}
154
155fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
156    match expr {
157        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
158            .map(|info| info.var_type)
159            .unwrap_or(VariableType::Scalar),
160        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
161        // Known non-graph literals: property access is NOT valid on these.
162        Expr::Literal(CypherLiteral::Integer(_))
163        | Expr::Literal(CypherLiteral::Float(_))
164        | Expr::Literal(CypherLiteral::String(_))
165        | Expr::Literal(CypherLiteral::Bool(_))
166        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
167        Expr::FunctionCall { name, args, .. } => {
168            let lower = name.to_lowercase();
169            if lower == "coalesce" {
170                infer_coalesce_type(args, vars_in_scope)
171            } else if lower == "collect" && !args.is_empty() {
172                let collected = infer_with_output_type(&args[0], vars_in_scope);
173                if matches!(
174                    collected,
175                    VariableType::Node
176                        | VariableType::Edge
177                        | VariableType::Path
178                        | VariableType::Imported
179                ) {
180                    collected
181                } else {
182                    VariableType::Scalar
183                }
184            } else {
185                VariableType::Scalar
186            }
187        }
188        // WITH list literals/expressions produce scalar list values. Preserving
189        // entity typing here causes invalid node/edge reuse in later MATCH clauses
190        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
191        // Lists are ScalarLiteral since property access is not valid on them.
192        Expr::List(_) => VariableType::ScalarLiteral,
193        _ => VariableType::Scalar,
194    }
195}
196
197fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
198    let mut resolved: Option<VariableType> = None;
199    let mut saw_imported = false;
200    for arg in args {
201        let t = infer_with_output_type(arg, vars_in_scope);
202        match t {
203            VariableType::Node | VariableType::Edge | VariableType::Path => {
204                if let Some(existing) = resolved {
205                    if existing != t {
206                        return VariableType::Scalar;
207                    }
208                } else {
209                    resolved = Some(t);
210                }
211            }
212            VariableType::Imported => saw_imported = true,
213            VariableType::Scalar | VariableType::ScalarLiteral => {}
214        }
215    }
216    if let Some(t) = resolved {
217        t
218    } else if saw_imported {
219        VariableType::Imported
220    } else {
221        VariableType::Scalar
222    }
223}
224
225fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
226    match expr {
227        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
228            .map(|info| info.var_type)
229            .unwrap_or(VariableType::Scalar),
230        Expr::FunctionCall { name, args, .. }
231            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
232        {
233            infer_with_output_type(&args[0], vars_in_scope)
234        }
235        Expr::List(items) => {
236            let mut inferred: Option<VariableType> = None;
237            for item in items {
238                let t = infer_with_output_type(item, vars_in_scope);
239                if !matches!(
240                    t,
241                    VariableType::Node
242                        | VariableType::Edge
243                        | VariableType::Path
244                        | VariableType::Imported
245                ) {
246                    return VariableType::Scalar;
247                }
248                if let Some(existing) = inferred {
249                    if existing != t
250                        && t != VariableType::Imported
251                        && existing != VariableType::Imported
252                    {
253                        return VariableType::Scalar;
254                    }
255                    if existing == VariableType::Imported && t != VariableType::Imported {
256                        inferred = Some(t);
257                    }
258                } else {
259                    inferred = Some(t);
260                }
261            }
262            inferred.unwrap_or(VariableType::Scalar)
263        }
264        _ => VariableType::Scalar,
265    }
266}
267
268/// Collect all variable names referenced in an expression
269fn collect_expr_variables(expr: &Expr) -> Vec<String> {
270    let mut vars = Vec::new();
271    collect_expr_variables_inner(expr, &mut vars);
272    vars
273}
274
275fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
276    let mut add_var = |name: &String| {
277        if !vars.contains(name) {
278            vars.push(name.clone());
279        }
280    };
281
282    match expr {
283        Expr::Variable(name) => add_var(name),
284        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
285        Expr::BinaryOp { left, right, .. } => {
286            collect_expr_variables_inner(left, vars);
287            collect_expr_variables_inner(right, vars);
288        }
289        Expr::UnaryOp { expr: e, .. }
290        | Expr::IsNull(e)
291        | Expr::IsNotNull(e)
292        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
293        Expr::FunctionCall { args, .. } => {
294            for a in args {
295                collect_expr_variables_inner(a, vars);
296            }
297        }
298        Expr::List(items) => {
299            for item in items {
300                collect_expr_variables_inner(item, vars);
301            }
302        }
303        Expr::In { expr: e, list } => {
304            collect_expr_variables_inner(e, vars);
305            collect_expr_variables_inner(list, vars);
306        }
307        Expr::Case {
308            expr: case_expr,
309            when_then,
310            else_expr,
311        } => {
312            if let Some(e) = case_expr {
313                collect_expr_variables_inner(e, vars);
314            }
315            for (w, t) in when_then {
316                collect_expr_variables_inner(w, vars);
317                collect_expr_variables_inner(t, vars);
318            }
319            if let Some(e) = else_expr {
320                collect_expr_variables_inner(e, vars);
321            }
322        }
323        Expr::Map(entries) => {
324            for (_, v) in entries {
325                collect_expr_variables_inner(v, vars);
326            }
327        }
328        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
329        Expr::ArrayIndex { array, index } => {
330            collect_expr_variables_inner(array, vars);
331            collect_expr_variables_inner(index, vars);
332        }
333        Expr::ArraySlice { array, start, end } => {
334            collect_expr_variables_inner(array, vars);
335            if let Some(s) = start {
336                collect_expr_variables_inner(s, vars);
337            }
338            if let Some(e) = end {
339                collect_expr_variables_inner(e, vars);
340            }
341        }
342        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
343        // they introduce local variable bindings not in outer scope.
344        _ => {}
345    }
346}
347
348/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
349///
350/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
351/// before the final RETURN projection without losing alias semantics.
352fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
353    let repr = expr.to_string_repr();
354    if let Some(rewritten) = aliases.get(&repr) {
355        return rewritten.clone();
356    }
357
358    match expr {
359        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
360        Expr::Property(base, prop) => Expr::Property(
361            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
362            prop.clone(),
363        ),
364        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
365            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
366            op: *op,
367            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
368        },
369        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
370            op: *op,
371            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
372        },
373        Expr::FunctionCall {
374            name,
375            args,
376            distinct,
377            window_spec,
378        } => Expr::FunctionCall {
379            name: name.clone(),
380            args: args
381                .iter()
382                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
383                .collect(),
384            distinct: *distinct,
385            window_spec: window_spec.clone(),
386        },
387        Expr::List(items) => Expr::List(
388            items
389                .iter()
390                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
391                .collect(),
392        ),
393        Expr::Map(entries) => Expr::Map(
394            entries
395                .iter()
396                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
397                .collect(),
398        ),
399        Expr::Case {
400            expr: case_expr,
401            when_then,
402            else_expr,
403        } => Expr::Case {
404            expr: case_expr
405                .as_ref()
406                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
407            when_then: when_then
408                .iter()
409                .map(|(w, t)| {
410                    (
411                        rewrite_order_by_expr_with_aliases(w, aliases),
412                        rewrite_order_by_expr_with_aliases(t, aliases),
413                    )
414                })
415                .collect(),
416            else_expr: else_expr
417                .as_ref()
418                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
419        },
420        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
421        // they introduce local variable bindings that could shadow aliases.
422        _ => expr.clone(),
423    }
424}
425
426/// Validate function call argument types.
427/// Returns error if type constraints are violated.
428fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
429    let name_lower = name.to_lowercase();
430
431    // labels() requires Node
432    if name_lower == "labels"
433        && let Some(Expr::Variable(var_name)) = args.first()
434        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
435        && !info.var_type.is_compatible_with(VariableType::Node)
436    {
437        return Err(anyhow!(
438            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
439        ));
440    }
441
442    // type() requires Edge
443    if name_lower == "type"
444        && let Some(Expr::Variable(var_name)) = args.first()
445        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
446        && !info.var_type.is_compatible_with(VariableType::Edge)
447    {
448        return Err(anyhow!(
449            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
450        ));
451    }
452
453    // properties() requires Node/Edge/Map (not scalar literals)
454    if name_lower == "properties"
455        && let Some(arg) = args.first()
456    {
457        match arg {
458            Expr::Literal(CypherLiteral::Integer(_))
459            | Expr::Literal(CypherLiteral::Float(_))
460            | Expr::Literal(CypherLiteral::String(_))
461            | Expr::Literal(CypherLiteral::Bool(_))
462            | Expr::List(_) => {
463                return Err(anyhow!(
464                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
465                ));
466            }
467            Expr::Variable(var_name) => {
468                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
469                    && matches!(
470                        info.var_type,
471                        VariableType::Scalar | VariableType::ScalarLiteral
472                    )
473                {
474                    return Err(anyhow!(
475                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
476                    ));
477                }
478            }
479            _ => {}
480        }
481    }
482
483    // nodes()/relationships() require Path
484    if (name_lower == "nodes" || name_lower == "relationships")
485        && let Some(Expr::Variable(var_name)) = args.first()
486        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
487        && !info.var_type.is_compatible_with(VariableType::Path)
488    {
489        return Err(anyhow!(
490            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
491            name_lower
492        ));
493    }
494
495    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
496    if name_lower == "size"
497        && let Some(Expr::Variable(var_name)) = args.first()
498        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
499        && info.var_type == VariableType::Path
500    {
501        return Err(anyhow!(
502            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
503        ));
504    }
505
506    // length()/size() do NOT accept Node or single-Edge arguments.
507    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
508    // but are actually edge lists — size()/length() is valid on those.
509    if (name_lower == "length" || name_lower == "size")
510        && let Some(Expr::Variable(var_name)) = args.first()
511        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
512        && (info.var_type == VariableType::Node
513            || (info.var_type == VariableType::Edge && !info.is_vlp))
514    {
515        return Err(anyhow!(
516            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
517            name_lower
518        ));
519    }
520
521    Ok(())
522}
523
524/// Check if an expression is a non-boolean literal.
525fn is_non_boolean_literal(expr: &Expr) -> bool {
526    matches!(
527        expr,
528        Expr::Literal(CypherLiteral::Integer(_))
529            | Expr::Literal(CypherLiteral::Float(_))
530            | Expr::Literal(CypherLiteral::String(_))
531            | Expr::List(_)
532            | Expr::Map(_)
533    )
534}
535
536/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
537fn validate_boolean_expression(expr: &Expr) -> Result<()> {
538    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
539    if let Expr::BinaryOp { left, op, right } = expr
540        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
541    {
542        let op_name = format!("{op:?}").to_uppercase();
543        for operand in [left.as_ref(), right.as_ref()] {
544            if is_non_boolean_literal(operand) {
545                return Err(anyhow!(
546                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
547                    op_name
548                ));
549            }
550        }
551    }
552    if let Expr::UnaryOp {
553        op: uni_cypher::ast::UnaryOp::Not,
554        expr: inner,
555    } = expr
556        && is_non_boolean_literal(inner)
557    {
558        return Err(anyhow!(
559            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
560        ));
561    }
562    let mut result = Ok(());
563    expr.for_each_child(&mut |child| {
564        if result.is_ok() {
565            result = validate_boolean_expression(child);
566        }
567    });
568    result
569}
570
571/// Validate that all variables used in an expression are in scope.
572fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
573    let used_vars = collect_expr_variables(expr);
574    for var in used_vars {
575        if !is_var_in_scope(vars_in_scope, &var) {
576            return Err(anyhow!(
577                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
578                var
579            ));
580        }
581    }
582    Ok(())
583}
584
585/// Check if a function name (lowercase) is an aggregate function.
586fn is_aggregate_function_name(name: &str) -> bool {
587    matches!(
588        name.to_lowercase().as_str(),
589        "count"
590            | "sum"
591            | "avg"
592            | "min"
593            | "max"
594            | "collect"
595            | "stdev"
596            | "stdevp"
597            | "percentiledisc"
598            | "percentilecont"
599            | "btic_min"
600            | "btic_max"
601            | "btic_span_agg"
602            | "btic_count_at"
603    )
604}
605
606/// Returns true if the expression is a window function (FunctionCall with window_spec).
607fn is_window_function(expr: &Expr) -> bool {
608    matches!(
609        expr,
610        Expr::FunctionCall {
611            window_spec: Some(_),
612            ..
613        }
614    )
615}
616
617/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
618/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
619/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
620/// size() call, BinaryOp, etc.).
621fn is_compound_aggregate(expr: &Expr) -> bool {
622    if !expr.is_aggregate() {
623        return false;
624    }
625    match expr {
626        Expr::FunctionCall {
627            name, window_spec, ..
628        } => {
629            // A bare aggregate FunctionCall is NOT compound
630            if window_spec.is_some() {
631                return true; // window wrapping an aggregate — treat as compound
632            }
633            !is_aggregate_function_name(name)
634        }
635        // Subquery aggregates are "bare" (not compound)
636        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
637        // Everything else (ListComprehension, BinaryOp, etc.) is compound
638        _ => true,
639    }
640}
641
642/// Recursively collect all bare aggregate FunctionCall sub-expressions from
643/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
644/// the outermost aggregate boundaries).
645///
646/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
647/// is searched because the body (`map_expr`, `predicate`, `expr`) references
648/// the loop variable, not outer-scope aggregates.
649fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
650    let mut out = Vec::new();
651    extract_inner_aggregates_rec(expr, &mut out);
652    out
653}
654
655fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
656    match expr {
657        Expr::FunctionCall {
658            name, window_spec, ..
659        } if window_spec.is_none() && is_aggregate_function_name(name) => {
660            // Found a bare aggregate — collect it and stop recursing
661            out.push(expr.clone());
662        }
663        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
664            out.push(expr.clone());
665        }
666        // For list comprehension, only search the `list` source for aggregates
667        Expr::ListComprehension { list, .. } => {
668            extract_inner_aggregates_rec(list, out);
669        }
670        // For quantifier, only search the `list` source
671        Expr::Quantifier { list, .. } => {
672            extract_inner_aggregates_rec(list, out);
673        }
674        // For reduce, search `init` and `list` (not the body `expr`)
675        Expr::Reduce { init, list, .. } => {
676            extract_inner_aggregates_rec(init, out);
677            extract_inner_aggregates_rec(list, out);
678        }
679        // Standard recursive cases
680        Expr::FunctionCall { args, .. } => {
681            for arg in args {
682                extract_inner_aggregates_rec(arg, out);
683            }
684        }
685        Expr::BinaryOp { left, right, .. } => {
686            extract_inner_aggregates_rec(left, out);
687            extract_inner_aggregates_rec(right, out);
688        }
689        Expr::UnaryOp { expr: e, .. }
690        | Expr::IsNull(e)
691        | Expr::IsNotNull(e)
692        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
693        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
694        Expr::List(items) => {
695            for item in items {
696                extract_inner_aggregates_rec(item, out);
697            }
698        }
699        Expr::Case {
700            expr: case_expr,
701            when_then,
702            else_expr,
703        } => {
704            if let Some(e) = case_expr {
705                extract_inner_aggregates_rec(e, out);
706            }
707            for (w, t) in when_then {
708                extract_inner_aggregates_rec(w, out);
709                extract_inner_aggregates_rec(t, out);
710            }
711            if let Some(e) = else_expr {
712                extract_inner_aggregates_rec(e, out);
713            }
714        }
715        Expr::In {
716            expr: in_expr,
717            list,
718        } => {
719            extract_inner_aggregates_rec(in_expr, out);
720            extract_inner_aggregates_rec(list, out);
721        }
722        Expr::ArrayIndex { array, index } => {
723            extract_inner_aggregates_rec(array, out);
724            extract_inner_aggregates_rec(index, out);
725        }
726        Expr::ArraySlice { array, start, end } => {
727            extract_inner_aggregates_rec(array, out);
728            if let Some(s) = start {
729                extract_inner_aggregates_rec(s, out);
730            }
731            if let Some(e) = end {
732                extract_inner_aggregates_rec(e, out);
733            }
734        }
735        Expr::Map(entries) => {
736            for (_, v) in entries {
737                extract_inner_aggregates_rec(v, out);
738            }
739        }
740        _ => {}
741    }
742}
743
744/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
745/// `Expr::Variable(aggregate_column_name(agg))`.
746///
747/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
748/// rewritten (the body references the loop variable, not outer-scope columns).
749fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
750    match expr {
751        Expr::FunctionCall {
752            name, window_spec, ..
753        } if window_spec.is_none() && is_aggregate_function_name(name) => {
754            // Replace bare aggregate with column reference
755            Expr::Variable(aggregate_column_name(expr))
756        }
757        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
758            Expr::Variable(aggregate_column_name(expr))
759        }
760        Expr::ListComprehension {
761            variable,
762            list,
763            where_clause,
764            map_expr,
765        } => Expr::ListComprehension {
766            variable: variable.clone(),
767            list: Box::new(replace_aggregates_with_columns(list)),
768            where_clause: where_clause.clone(), // don't touch — references loop var
769            map_expr: map_expr.clone(),         // don't touch — references loop var
770        },
771        Expr::Quantifier {
772            quantifier,
773            variable,
774            list,
775            predicate,
776        } => Expr::Quantifier {
777            quantifier: *quantifier,
778            variable: variable.clone(),
779            list: Box::new(replace_aggregates_with_columns(list)),
780            predicate: predicate.clone(), // don't touch — references loop var
781        },
782        Expr::Reduce {
783            accumulator,
784            init,
785            variable,
786            list,
787            expr: body,
788        } => Expr::Reduce {
789            accumulator: accumulator.clone(),
790            init: Box::new(replace_aggregates_with_columns(init)),
791            variable: variable.clone(),
792            list: Box::new(replace_aggregates_with_columns(list)),
793            expr: body.clone(), // don't touch — references loop var
794        },
795        Expr::FunctionCall {
796            name,
797            args,
798            distinct,
799            window_spec,
800        } => Expr::FunctionCall {
801            name: name.clone(),
802            args: args.iter().map(replace_aggregates_with_columns).collect(),
803            distinct: *distinct,
804            window_spec: window_spec.clone(),
805        },
806        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
807            left: Box::new(replace_aggregates_with_columns(left)),
808            op: *op,
809            right: Box::new(replace_aggregates_with_columns(right)),
810        },
811        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
812            op: *op,
813            expr: Box::new(replace_aggregates_with_columns(e)),
814        },
815        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
816        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
817        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
818        Expr::Property(base, prop) => Expr::Property(
819            Box::new(replace_aggregates_with_columns(base)),
820            prop.clone(),
821        ),
822        Expr::List(items) => {
823            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
824        }
825        Expr::Case {
826            expr: case_expr,
827            when_then,
828            else_expr,
829        } => Expr::Case {
830            expr: case_expr
831                .as_ref()
832                .map(|e| Box::new(replace_aggregates_with_columns(e))),
833            when_then: when_then
834                .iter()
835                .map(|(w, t)| {
836                    (
837                        replace_aggregates_with_columns(w),
838                        replace_aggregates_with_columns(t),
839                    )
840                })
841                .collect(),
842            else_expr: else_expr
843                .as_ref()
844                .map(|e| Box::new(replace_aggregates_with_columns(e))),
845        },
846        Expr::In {
847            expr: in_expr,
848            list,
849        } => Expr::In {
850            expr: Box::new(replace_aggregates_with_columns(in_expr)),
851            list: Box::new(replace_aggregates_with_columns(list)),
852        },
853        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
854            array: Box::new(replace_aggregates_with_columns(array)),
855            index: Box::new(replace_aggregates_with_columns(index)),
856        },
857        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
858            array: Box::new(replace_aggregates_with_columns(array)),
859            start: start
860                .as_ref()
861                .map(|e| Box::new(replace_aggregates_with_columns(e))),
862            end: end
863                .as_ref()
864                .map(|e| Box::new(replace_aggregates_with_columns(e))),
865        },
866        Expr::Map(entries) => Expr::Map(
867            entries
868                .iter()
869                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
870                .collect(),
871        ),
872        // Leaf expressions — return as-is
873        other => other.clone(),
874    }
875}
876
877/// Check if an expression contains any aggregate function (recursively).
878fn contains_aggregate_recursive(expr: &Expr) -> bool {
879    match expr {
880        Expr::FunctionCall { name, args, .. } => {
881            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
882        }
883        Expr::BinaryOp { left, right, .. } => {
884            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
885        }
886        Expr::UnaryOp { expr: e, .. }
887        | Expr::IsNull(e)
888        | Expr::IsNotNull(e)
889        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
890        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
891        Expr::Case {
892            expr,
893            when_then,
894            else_expr,
895        } => {
896            expr.as_deref().is_some_and(contains_aggregate_recursive)
897                || when_then.iter().any(|(w, t)| {
898                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
899                })
900                || else_expr
901                    .as_deref()
902                    .is_some_and(contains_aggregate_recursive)
903        }
904        Expr::In { expr, list } => {
905            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
906        }
907        Expr::Property(base, _) => contains_aggregate_recursive(base),
908        Expr::ListComprehension { list, .. } => {
909            // Only check the list source — where_clause/map_expr reference the loop variable
910            contains_aggregate_recursive(list)
911        }
912        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
913        Expr::Reduce { init, list, .. } => {
914            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
915        }
916        Expr::ArrayIndex { array, index } => {
917            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
918        }
919        Expr::ArraySlice { array, start, end } => {
920            contains_aggregate_recursive(array)
921                || start.as_deref().is_some_and(contains_aggregate_recursive)
922                || end.as_deref().is_some_and(contains_aggregate_recursive)
923        }
924        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
925        _ => false,
926    }
927}
928
929/// Check if an expression contains a non-deterministic function (e.g. rand()).
930fn contains_non_deterministic(expr: &Expr) -> bool {
931    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
932        return true;
933    }
934    let mut found = false;
935    expr.for_each_child(&mut |child| {
936        if !found {
937            found = contains_non_deterministic(child);
938        }
939    });
940    found
941}
942
943fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
944    match expr {
945        Expr::FunctionCall { name, args, .. } => {
946            if is_aggregate_function_name(name) {
947                out.insert(expr.to_string_repr());
948                return;
949            }
950            for arg in args {
951                collect_aggregate_reprs(arg, out);
952            }
953        }
954        Expr::BinaryOp { left, right, .. } => {
955            collect_aggregate_reprs(left, out);
956            collect_aggregate_reprs(right, out);
957        }
958        Expr::UnaryOp { expr, .. }
959        | Expr::IsNull(expr)
960        | Expr::IsNotNull(expr)
961        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
962        Expr::List(items) => {
963            for item in items {
964                collect_aggregate_reprs(item, out);
965            }
966        }
967        Expr::Case {
968            expr,
969            when_then,
970            else_expr,
971        } => {
972            if let Some(e) = expr {
973                collect_aggregate_reprs(e, out);
974            }
975            for (w, t) in when_then {
976                collect_aggregate_reprs(w, out);
977                collect_aggregate_reprs(t, out);
978            }
979            if let Some(e) = else_expr {
980                collect_aggregate_reprs(e, out);
981            }
982        }
983        Expr::In { expr, list } => {
984            collect_aggregate_reprs(expr, out);
985            collect_aggregate_reprs(list, out);
986        }
987        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
988        Expr::ListComprehension { list, .. } => {
989            collect_aggregate_reprs(list, out);
990        }
991        Expr::Quantifier { list, .. } => {
992            collect_aggregate_reprs(list, out);
993        }
994        Expr::Reduce { init, list, .. } => {
995            collect_aggregate_reprs(init, out);
996            collect_aggregate_reprs(list, out);
997        }
998        Expr::ArrayIndex { array, index } => {
999            collect_aggregate_reprs(array, out);
1000            collect_aggregate_reprs(index, out);
1001        }
1002        Expr::ArraySlice { array, start, end } => {
1003            collect_aggregate_reprs(array, out);
1004            if let Some(s) = start {
1005                collect_aggregate_reprs(s, out);
1006            }
1007            if let Some(e) = end {
1008                collect_aggregate_reprs(e, out);
1009            }
1010        }
1011        _ => {}
1012    }
1013}
1014
1015#[derive(Debug, Clone)]
1016enum NonAggregateRef {
1017    Var(String),
1018    Property {
1019        repr: String,
1020        base_var: Option<String>,
1021    },
1022}
1023
1024fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1025    match expr {
1026        Expr::FunctionCall { name, args, .. } => {
1027            if is_aggregate_function_name(name) {
1028                return;
1029            }
1030            for arg in args {
1031                collect_non_aggregate_refs(arg, inside_agg, out);
1032            }
1033        }
1034        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1035        Expr::Property(base, _) if !inside_agg => {
1036            let base_var = if let Expr::Variable(v) = base.as_ref() {
1037                Some(v.clone())
1038            } else {
1039                None
1040            };
1041            out.push(NonAggregateRef::Property {
1042                repr: expr.to_string_repr(),
1043                base_var,
1044            });
1045        }
1046        Expr::BinaryOp { left, right, .. } => {
1047            collect_non_aggregate_refs(left, inside_agg, out);
1048            collect_non_aggregate_refs(right, inside_agg, out);
1049        }
1050        Expr::UnaryOp { expr, .. }
1051        | Expr::IsNull(expr)
1052        | Expr::IsNotNull(expr)
1053        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1054        Expr::List(items) => {
1055            for item in items {
1056                collect_non_aggregate_refs(item, inside_agg, out);
1057            }
1058        }
1059        Expr::Case {
1060            expr,
1061            when_then,
1062            else_expr,
1063        } => {
1064            if let Some(e) = expr {
1065                collect_non_aggregate_refs(e, inside_agg, out);
1066            }
1067            for (w, t) in when_then {
1068                collect_non_aggregate_refs(w, inside_agg, out);
1069                collect_non_aggregate_refs(t, inside_agg, out);
1070            }
1071            if let Some(e) = else_expr {
1072                collect_non_aggregate_refs(e, inside_agg, out);
1073            }
1074        }
1075        Expr::In { expr, list } => {
1076            collect_non_aggregate_refs(expr, inside_agg, out);
1077            collect_non_aggregate_refs(list, inside_agg, out);
1078        }
1079        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1080        // source. The body references the loop variable, not outer-scope vars.
1081        Expr::ListComprehension { list, .. } => {
1082            collect_non_aggregate_refs(list, inside_agg, out);
1083        }
1084        Expr::Quantifier { list, .. } => {
1085            collect_non_aggregate_refs(list, inside_agg, out);
1086        }
1087        Expr::Reduce { init, list, .. } => {
1088            collect_non_aggregate_refs(init, inside_agg, out);
1089            collect_non_aggregate_refs(list, inside_agg, out);
1090        }
1091        _ => {}
1092    }
1093}
1094
1095fn validate_with_order_by_aggregate_item(
1096    expr: &Expr,
1097    projected_aggregate_reprs: &HashSet<String>,
1098    projected_simple_reprs: &HashSet<String>,
1099    projected_aliases: &HashSet<String>,
1100) -> Result<()> {
1101    let mut aggregate_reprs = HashSet::new();
1102    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1103    for agg in aggregate_reprs {
1104        if !projected_aggregate_reprs.contains(&agg) {
1105            return Err(anyhow!(
1106                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1107                agg
1108            ));
1109        }
1110    }
1111
1112    let mut refs = Vec::new();
1113    collect_non_aggregate_refs(expr, false, &mut refs);
1114    refs.retain(|r| match r {
1115        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1116        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1117    });
1118
1119    let mut dedup = HashSet::new();
1120    refs.retain(|r| {
1121        let key = match r {
1122            NonAggregateRef::Var(v) => format!("v:{v}"),
1123            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1124        };
1125        dedup.insert(key)
1126    });
1127
1128    if refs.len() > 1 {
1129        return Err(anyhow!(
1130            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1131        ));
1132    }
1133
1134    if let Some(r) = refs.first() {
1135        return match r {
1136            NonAggregateRef::Var(v) => Err(anyhow!(
1137                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1138                v
1139            )),
1140            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1141                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1142                base_var
1143                    .clone()
1144                    .unwrap_or_else(|| "<property-base>".to_string())
1145            )),
1146        };
1147    }
1148
1149    Ok(())
1150}
1151
1152/// Validate that no aggregation functions appear in WHERE clause.
1153fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1154    if contains_aggregate_recursive(predicate) {
1155        return Err(anyhow!(
1156            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1157        ));
1158    }
1159    Ok(())
1160}
1161
1162#[derive(Debug, Clone, Copy)]
1163enum ConstNumber {
1164    Int(i64),
1165    Float(f64),
1166}
1167
1168impl ConstNumber {
1169    fn to_f64(self) -> f64 {
1170        match self {
1171            Self::Int(v) => v as f64,
1172            Self::Float(v) => v,
1173        }
1174    }
1175}
1176
1177fn eval_const_numeric_expr(
1178    expr: &Expr,
1179    params: &HashMap<String, uni_common::Value>,
1180) -> Result<ConstNumber> {
1181    match expr {
1182        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1183        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1184        Expr::Parameter(name) => match params.get(name) {
1185            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1186            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1187            Some(uni_common::Value::Null) => Err(anyhow!(
1188                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1189                name
1190            )),
1191            Some(other) => Err(anyhow!(
1192                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1193                name,
1194                other
1195            )),
1196            None => Err(anyhow!(
1197                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1198            )),
1199        },
1200        Expr::UnaryOp {
1201            op: uni_cypher::ast::UnaryOp::Neg,
1202            expr,
1203        } => match eval_const_numeric_expr(expr, params)? {
1204            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1205            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1206        },
1207        Expr::BinaryOp { left, op, right } => {
1208            let l = eval_const_numeric_expr(left, params)?;
1209            let r = eval_const_numeric_expr(right, params)?;
1210            match op {
1211                BinaryOp::Add => match (l, r) {
1212                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1213                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1214                },
1215                BinaryOp::Sub => match (l, r) {
1216                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1217                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1218                },
1219                BinaryOp::Mul => match (l, r) {
1220                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1221                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1222                },
1223                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1224                BinaryOp::Mod => match (l, r) {
1225                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1226                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1227                },
1228                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1229                _ => Err(anyhow!(
1230                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1231                )),
1232            }
1233        }
1234        Expr::FunctionCall { name, args, .. } => {
1235            let lower = name.to_lowercase();
1236            match lower.as_str() {
1237                "rand" if args.is_empty() => {
1238                    use rand::Rng;
1239                    let mut rng = rand::thread_rng();
1240                    Ok(ConstNumber::Float(rng.r#gen::<f64>()))
1241                }
1242                "tointeger" | "toint" if args.len() == 1 => {
1243                    match eval_const_numeric_expr(&args[0], params)? {
1244                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1245                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1246                    }
1247                }
1248                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1249                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1250                )),
1251                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1252                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1253                )),
1254                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1255                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1256                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1257                },
1258                _ => Err(anyhow!(
1259                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1260                )),
1261            }
1262        }
1263        _ => Err(anyhow!(
1264            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1265        )),
1266    }
1267}
1268
1269/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1270/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1271fn parse_non_negative_integer(
1272    expr: &Expr,
1273    clause_name: &str,
1274    params: &HashMap<String, uni_common::Value>,
1275) -> Result<Option<usize>> {
1276    let referenced_vars = collect_expr_variables(expr);
1277    if !referenced_vars.is_empty() {
1278        return Err(anyhow!(
1279            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1280            clause_name
1281        ));
1282    }
1283
1284    let value = eval_const_numeric_expr(expr, params)?;
1285    let as_int = match value {
1286        ConstNumber::Int(v) => v,
1287        ConstNumber::Float(v) => {
1288            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1289                return Err(anyhow!(
1290                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1291                    clause_name
1292                ));
1293            }
1294            v as i64
1295        }
1296    };
1297    if as_int < 0 {
1298        return Err(anyhow!(
1299            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1300            clause_name
1301        ));
1302    }
1303    Ok(Some(as_int as usize))
1304}
1305
1306/// Validate that aggregation functions are not nested.
1307fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1308    if let Expr::FunctionCall { name, args, .. } = expr
1309        && is_aggregate_function_name(name)
1310    {
1311        for arg in args {
1312            if contains_aggregate_recursive(arg) {
1313                return Err(anyhow!(
1314                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1315                ));
1316            }
1317            if contains_non_deterministic(arg) {
1318                return Err(anyhow!(
1319                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1320                ));
1321            }
1322        }
1323    }
1324    let mut result = Ok(());
1325    expr.for_each_child(&mut |child| {
1326        if result.is_ok() {
1327            result = validate_no_nested_aggregation(child);
1328        }
1329    });
1330    result
1331}
1332
1333/// Validate that an expression does not access properties or labels of
1334/// deleted entities. `type(r)` on a deleted relationship is allowed per
1335/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1336fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1337    // Check n.prop on a deleted variable
1338    if let Expr::Property(inner, _) = expr
1339        && let Expr::Variable(name) = inner.as_ref()
1340        && deleted_vars.contains(name)
1341    {
1342        return Err(anyhow!(
1343            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1344            name
1345        ));
1346    }
1347    // Check labels(n) or keys(n) on a deleted variable
1348    if let Expr::FunctionCall { name, args, .. } = expr
1349        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1350        && args.len() == 1
1351        && let Expr::Variable(var) = &args[0]
1352        && deleted_vars.contains(var)
1353    {
1354        return Err(anyhow!(
1355            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1356            name.to_lowercase(),
1357            var
1358        ));
1359    }
1360    let mut result = Ok(());
1361    expr.for_each_child(&mut |child| {
1362        if result.is_ok() {
1363            result = validate_no_deleted_entity_access(child, deleted_vars);
1364        }
1365    });
1366    result
1367}
1368
1369/// Validate that all variables referenced in properties are defined,
1370/// either in scope or in the local CREATE variable list.
1371fn validate_property_variables(
1372    properties: &Option<Expr>,
1373    vars_in_scope: &[VariableInfo],
1374    create_vars: &[&str],
1375) -> Result<()> {
1376    if let Some(props) = properties {
1377        for var in collect_expr_variables(props) {
1378            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1379                return Err(anyhow!(
1380                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1381                    var
1382                ));
1383            }
1384        }
1385    }
1386    Ok(())
1387}
1388
1389/// Check that a variable name is not already bound in scope or in the local CREATE list.
1390/// Used to prevent rebinding in CREATE clauses.
1391fn check_not_already_bound(
1392    name: &str,
1393    vars_in_scope: &[VariableInfo],
1394    create_vars: &[&str],
1395) -> Result<()> {
1396    if is_var_in_scope(vars_in_scope, name) {
1397        return Err(anyhow!(
1398            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1399            name
1400        ));
1401    }
1402    if create_vars.contains(&name) {
1403        return Err(anyhow!(
1404            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1405            name
1406        ));
1407    }
1408    Ok(())
1409}
1410
1411fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1412    let mut scope = vars_in_scope.to_vec();
1413
1414    for path in &pattern.paths {
1415        if let Some(path_var) = &path.variable
1416            && !path_var.is_empty()
1417            && !is_var_in_scope(&scope, path_var)
1418        {
1419            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1420        }
1421        for element in &path.elements {
1422            match element {
1423                PatternElement::Node(n) => {
1424                    if let Some(v) = &n.variable
1425                        && !v.is_empty()
1426                        && !is_var_in_scope(&scope, v)
1427                    {
1428                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1429                    }
1430                }
1431                PatternElement::Relationship(r) => {
1432                    if let Some(v) = &r.variable
1433                        && !v.is_empty()
1434                        && !is_var_in_scope(&scope, v)
1435                    {
1436                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1437                    }
1438                }
1439                PatternElement::Parenthesized { .. } => {}
1440            }
1441        }
1442    }
1443
1444    scope
1445}
1446
1447fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1448    match item {
1449        SetItem::Property { expr, value } => {
1450            validate_expression_variables(expr, vars_in_scope)?;
1451            validate_expression(expr, vars_in_scope)?;
1452            validate_expression_variables(value, vars_in_scope)?;
1453            validate_expression(value, vars_in_scope)?;
1454            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1455                return Err(anyhow!(
1456                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1457                ));
1458            }
1459        }
1460        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1461            if !is_var_in_scope(vars_in_scope, variable) {
1462                return Err(anyhow!(
1463                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1464                    variable
1465                ));
1466            }
1467            validate_expression_variables(value, vars_in_scope)?;
1468            validate_expression(value, vars_in_scope)?;
1469            if contains_pattern_predicate(value) {
1470                return Err(anyhow!(
1471                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1472                ));
1473            }
1474        }
1475        SetItem::Labels { variable, .. } => {
1476            if !is_var_in_scope(vars_in_scope, variable) {
1477                return Err(anyhow!(
1478                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1479                    variable
1480                ));
1481            }
1482        }
1483    }
1484
1485    Ok(())
1486}
1487
1488/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1489/// The OpenCypher spec requires all property values in MERGE to be non-null.
1490fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1491    if let Some(Expr::Map(entries)) = properties {
1492        for (key, value) in entries {
1493            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1494                return Err(anyhow!(
1495                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1496                    key
1497                ));
1498            }
1499        }
1500    }
1501    Ok(())
1502}
1503
1504fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1505    for path in &merge_clause.pattern.paths {
1506        for element in &path.elements {
1507            match element {
1508                PatternElement::Node(n) => {
1509                    if let Some(Expr::Parameter(_)) = &n.properties {
1510                        return Err(anyhow!(
1511                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1512                        ));
1513                    }
1514                    reject_null_merge_properties(&n.properties)?;
1515                    // VariableAlreadyBound: reject if a bound variable is used
1516                    // as a standalone MERGE node or introduces new labels/properties.
1517                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1518                    if let Some(variable) = &n.variable
1519                        && !variable.is_empty()
1520                        && is_var_in_scope(vars_in_scope, variable)
1521                    {
1522                        let is_standalone = path.elements.len() == 1;
1523                        let has_new_labels = !n.labels.is_empty();
1524                        let has_new_properties = n.properties.is_some();
1525                        if is_standalone || has_new_labels || has_new_properties {
1526                            return Err(anyhow!(
1527                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1528                                variable
1529                            ));
1530                        }
1531                    }
1532                }
1533                PatternElement::Relationship(r) => {
1534                    if let Some(variable) = &r.variable
1535                        && !variable.is_empty()
1536                        && is_var_in_scope(vars_in_scope, variable)
1537                    {
1538                        return Err(anyhow!(
1539                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1540                            variable
1541                        ));
1542                    }
1543                    if r.types.len() != 1 {
1544                        return Err(anyhow!(
1545                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1546                        ));
1547                    }
1548                    if r.range.is_some() {
1549                        return Err(anyhow!(
1550                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1551                        ));
1552                    }
1553                    if let Some(Expr::Parameter(_)) = &r.properties {
1554                        return Err(anyhow!(
1555                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1556                        ));
1557                    }
1558                    reject_null_merge_properties(&r.properties)?;
1559                }
1560                PatternElement::Parenthesized { .. } => {}
1561            }
1562        }
1563    }
1564
1565    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1566    for item in &merge_clause.on_create {
1567        validate_merge_set_item(item, &merge_scope)?;
1568    }
1569    for item in &merge_clause.on_match {
1570        validate_merge_set_item(item, &merge_scope)?;
1571    }
1572
1573    Ok(())
1574}
1575
1576/// Recursively validate an expression for type errors, undefined variables, etc.
1577fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1578    // Validate boolean operators and nested aggregation first
1579    validate_boolean_expression(expr)?;
1580    validate_no_nested_aggregation(expr)?;
1581
1582    // Helper to validate multiple expressions
1583    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1584        for e in exprs {
1585            validate_expression(e, vars)?;
1586        }
1587        Ok(())
1588    }
1589
1590    match expr {
1591        Expr::FunctionCall { name, args, .. } => {
1592            validate_function_call(name, args, vars_in_scope)?;
1593            validate_all(args, vars_in_scope)
1594        }
1595        Expr::BinaryOp { left, right, .. } => {
1596            validate_expression(left, vars_in_scope)?;
1597            validate_expression(right, vars_in_scope)
1598        }
1599        Expr::UnaryOp { expr: e, .. }
1600        | Expr::IsNull(e)
1601        | Expr::IsNotNull(e)
1602        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1603        Expr::Property(base, prop) => {
1604            if let Expr::Variable(var_name) = base.as_ref()
1605                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1606            {
1607                // Paths don't have properties
1608                if var_info.var_type == VariableType::Path {
1609                    return Err(anyhow!(
1610                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1611                        var_name,
1612                        prop
1613                    ));
1614                }
1615                // Known non-graph literals (int, float, bool, string, list) don't have properties
1616                if var_info.var_type == VariableType::ScalarLiteral {
1617                    return Err(anyhow!(
1618                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1619                    ));
1620                }
1621            }
1622            validate_expression(base, vars_in_scope)
1623        }
1624        Expr::List(items) => validate_all(items, vars_in_scope),
1625        Expr::Case {
1626            expr: case_expr,
1627            when_then,
1628            else_expr,
1629        } => {
1630            if let Some(e) = case_expr {
1631                validate_expression(e, vars_in_scope)?;
1632            }
1633            for (w, t) in when_then {
1634                validate_expression(w, vars_in_scope)?;
1635                validate_expression(t, vars_in_scope)?;
1636            }
1637            if let Some(e) = else_expr {
1638                validate_expression(e, vars_in_scope)?;
1639            }
1640            Ok(())
1641        }
1642        Expr::In { expr: e, list } => {
1643            validate_expression(e, vars_in_scope)?;
1644            validate_expression(list, vars_in_scope)
1645        }
1646        Expr::Exists {
1647            query,
1648            from_pattern_predicate: true,
1649        } => {
1650            // Pattern predicates cannot introduce new named variables.
1651            // Extract named vars from inner MATCH pattern, check each is in scope.
1652            if let Query::Single(stmt) = query.as_ref() {
1653                for clause in &stmt.clauses {
1654                    if let Clause::Match(m) = clause {
1655                        for path in &m.pattern.paths {
1656                            for elem in &path.elements {
1657                                match elem {
1658                                    PatternElement::Node(n) => {
1659                                        if let Some(var) = &n.variable
1660                                            && !is_var_in_scope(vars_in_scope, var)
1661                                        {
1662                                            return Err(anyhow!(
1663                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1664                                                var
1665                                            ));
1666                                        }
1667                                    }
1668                                    PatternElement::Relationship(r) => {
1669                                        if let Some(var) = &r.variable
1670                                            && !is_var_in_scope(vars_in_scope, var)
1671                                        {
1672                                            return Err(anyhow!(
1673                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1674                                                var
1675                                            ));
1676                                        }
1677                                    }
1678                                    _ => {}
1679                                }
1680                            }
1681                        }
1682                    }
1683                }
1684            }
1685            Ok(())
1686        }
1687        _ => Ok(()),
1688    }
1689}
1690
1691/// One step (hop) in a Quantified Path Pattern sub-pattern.
1692///
1693/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1694#[derive(Debug, Clone)]
1695pub struct QppStepInfo {
1696    /// Edge type IDs that this step can traverse.
1697    pub edge_type_ids: Vec<u32>,
1698    /// Traversal direction for this step.
1699    pub direction: Direction,
1700    /// Optional label constraint on the target node.
1701    pub target_label: Option<String>,
1702}
1703
1704/// Logical query plan produced by [`QueryPlanner`].
1705///
1706/// Each variant represents one step in the Cypher execution pipeline.
1707/// Plans are tree-structured — leaf nodes produce rows, intermediate nodes
1708/// transform or join them, and the root node defines the final output.
1709#[derive(Debug, Clone)]
1710pub enum LogicalPlan {
1711    /// UNION / UNION ALL of two sub-plans.
1712    Union {
1713        left: Box<LogicalPlan>,
1714        right: Box<LogicalPlan>,
1715        /// When `true`, duplicate rows are preserved (UNION ALL semantics).
1716        all: bool,
1717    },
1718    /// Scan vertices of a single labeled dataset.
1719    Scan {
1720        label_id: u16,
1721        labels: Vec<String>,
1722        variable: String,
1723        filter: Option<Expr>,
1724        optional: bool,
1725    },
1726    /// Lookup vertices by ext_id using the main vertices table.
1727    /// Used when a query references ext_id without specifying a label.
1728    ExtIdLookup {
1729        variable: String,
1730        ext_id: String,
1731        filter: Option<Expr>,
1732        optional: bool,
1733    },
1734    /// Scan all vertices from main table (MATCH (n) without label).
1735    /// Used for schemaless queries that don't specify any label.
1736    ScanAll {
1737        variable: String,
1738        filter: Option<Expr>,
1739        optional: bool,
1740    },
1741    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1742    /// Used for labels not defined in schema (schemaless support).
1743    /// Scan main vertices table by label name(s) for schemaless support.
1744    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1745    ScanMainByLabels {
1746        labels: Vec<String>,
1747        variable: String,
1748        filter: Option<Expr>,
1749        optional: bool,
1750    },
1751    /// Produces exactly one empty row (used to bootstrap pipelines with no source).
1752    Empty,
1753    /// UNWIND: expand a list expression into one row per element.
1754    Unwind {
1755        input: Box<LogicalPlan>,
1756        expr: Expr,
1757        variable: String,
1758    },
1759    Traverse {
1760        input: Box<LogicalPlan>,
1761        edge_type_ids: Vec<u32>,
1762        direction: Direction,
1763        source_variable: String,
1764        target_variable: String,
1765        target_label_id: u16,
1766        step_variable: Option<String>,
1767        min_hops: usize,
1768        max_hops: usize,
1769        optional: bool,
1770        target_filter: Option<Expr>,
1771        path_variable: Option<String>,
1772        edge_properties: HashSet<String>,
1773        /// Whether this is a variable-length pattern (has `*` range specifier).
1774        /// When true, step_variable holds a list of edges (even for *1..1).
1775        is_variable_length: bool,
1776        /// All variables from this OPTIONAL MATCH pattern.
1777        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1778        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1779        optional_pattern_vars: HashSet<String>,
1780        /// Variable names (node + edge) from the current MATCH clause scope.
1781        /// Used for relationship uniqueness scoping: only edge ID columns whose
1782        /// associated variable is in this set participate in uniqueness filtering.
1783        /// Variables from previous disconnected MATCH clauses are excluded.
1784        scope_match_variables: HashSet<String>,
1785        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1786        edge_filter_expr: Option<Expr>,
1787        /// Path traversal semantics (Trail by default for OpenCypher).
1788        path_mode: crate::query::df_graph::nfa::PathMode,
1789        /// QPP steps for multi-hop quantified path patterns.
1790        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1791        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1792        qpp_steps: Option<Vec<QppStepInfo>>,
1793    },
1794    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1795    /// Used for edge types not defined in schema (schemaless support).
1796    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1797    TraverseMainByType {
1798        type_names: Vec<String>,
1799        input: Box<LogicalPlan>,
1800        direction: Direction,
1801        source_variable: String,
1802        target_variable: String,
1803        step_variable: Option<String>,
1804        min_hops: usize,
1805        max_hops: usize,
1806        optional: bool,
1807        target_filter: Option<Expr>,
1808        path_variable: Option<String>,
1809        /// Whether this is a variable-length pattern (has `*` range specifier).
1810        /// When true, step_variable holds a list of edges (even for *1..1).
1811        is_variable_length: bool,
1812        /// All variables from this OPTIONAL MATCH pattern.
1813        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1814        optional_pattern_vars: HashSet<String>,
1815        /// Variables belonging to the current MATCH clause scope.
1816        /// Used for relationship uniqueness scoping: only edge columns whose
1817        /// associated variable is in this set participate in uniqueness filtering.
1818        scope_match_variables: HashSet<String>,
1819        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1820        edge_filter_expr: Option<Expr>,
1821        /// Path traversal semantics (Trail by default for OpenCypher).
1822        path_mode: crate::query::df_graph::nfa::PathMode,
1823    },
1824    Filter {
1825        input: Box<LogicalPlan>,
1826        predicate: Expr,
1827        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1828        /// When evaluating the filter, if any of these variables are NULL,
1829        /// the row is preserved regardless of the predicate result.
1830        optional_variables: HashSet<String>,
1831    },
1832    Create {
1833        input: Box<LogicalPlan>,
1834        pattern: Pattern,
1835    },
1836    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1837    ///
1838    /// This variant combines multiple CREATE patterns into a single plan node
1839    /// to avoid deep recursion when executing many CREATEs sequentially.
1840    CreateBatch {
1841        input: Box<LogicalPlan>,
1842        patterns: Vec<Pattern>,
1843    },
1844    Merge {
1845        input: Box<LogicalPlan>,
1846        pattern: Pattern,
1847        on_match: Option<SetClause>,
1848        on_create: Option<SetClause>,
1849    },
1850    Set {
1851        input: Box<LogicalPlan>,
1852        items: Vec<SetItem>,
1853    },
1854    Remove {
1855        input: Box<LogicalPlan>,
1856        items: Vec<RemoveItem>,
1857    },
1858    Delete {
1859        input: Box<LogicalPlan>,
1860        items: Vec<Expr>,
1861        detach: bool,
1862    },
1863    /// FOREACH (variable IN list | clauses)
1864    Foreach {
1865        input: Box<LogicalPlan>,
1866        variable: String,
1867        list: Expr,
1868        body: Vec<LogicalPlan>,
1869    },
1870    Sort {
1871        input: Box<LogicalPlan>,
1872        order_by: Vec<SortItem>,
1873    },
1874    Limit {
1875        input: Box<LogicalPlan>,
1876        skip: Option<usize>,
1877        fetch: Option<usize>,
1878    },
1879    Aggregate {
1880        input: Box<LogicalPlan>,
1881        group_by: Vec<Expr>,
1882        aggregates: Vec<Expr>,
1883    },
1884    Distinct {
1885        input: Box<LogicalPlan>,
1886    },
1887    Window {
1888        input: Box<LogicalPlan>,
1889        window_exprs: Vec<Expr>,
1890    },
1891    Project {
1892        input: Box<LogicalPlan>,
1893        projections: Vec<(Expr, Option<String>)>,
1894    },
1895    CrossJoin {
1896        left: Box<LogicalPlan>,
1897        right: Box<LogicalPlan>,
1898    },
1899    Apply {
1900        input: Box<LogicalPlan>,
1901        subquery: Box<LogicalPlan>,
1902        input_filter: Option<Expr>,
1903    },
1904    RecursiveCTE {
1905        cte_name: String,
1906        initial: Box<LogicalPlan>,
1907        recursive: Box<LogicalPlan>,
1908    },
1909    ProcedureCall {
1910        procedure_name: String,
1911        arguments: Vec<Expr>,
1912        yield_items: Vec<(String, Option<String>)>,
1913    },
1914    SubqueryCall {
1915        input: Box<LogicalPlan>,
1916        subquery: Box<LogicalPlan>,
1917    },
1918    VectorKnn {
1919        label_id: u16,
1920        variable: String,
1921        property: String,
1922        query: Expr,
1923        k: usize,
1924        threshold: Option<f32>,
1925    },
1926    InvertedIndexLookup {
1927        label_id: u16,
1928        variable: String,
1929        property: String,
1930        terms: Expr,
1931    },
1932    ShortestPath {
1933        input: Box<LogicalPlan>,
1934        edge_type_ids: Vec<u32>,
1935        direction: Direction,
1936        source_variable: String,
1937        target_variable: String,
1938        target_label_id: u16,
1939        path_variable: String,
1940        /// Minimum number of hops (edges) in the path. Default is 1.
1941        min_hops: u32,
1942        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
1943        max_hops: u32,
1944    },
1945    /// allShortestPaths() - Returns all paths with minimum length
1946    AllShortestPaths {
1947        input: Box<LogicalPlan>,
1948        edge_type_ids: Vec<u32>,
1949        direction: Direction,
1950        source_variable: String,
1951        target_variable: String,
1952        target_label_id: u16,
1953        path_variable: String,
1954        /// Minimum number of hops (edges) in the path. Default is 1.
1955        min_hops: u32,
1956        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
1957        max_hops: u32,
1958    },
1959    QuantifiedPattern {
1960        input: Box<LogicalPlan>,
1961        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
1962        min_iterations: u32,
1963        max_iterations: u32,
1964        path_variable: Option<String>,
1965        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
1966        binding_variable: String, // Output variable of iteration (e.g. 'b')
1967    },
1968    // DDL Plans
1969    CreateVectorIndex {
1970        config: VectorIndexConfig,
1971        if_not_exists: bool,
1972    },
1973    CreateFullTextIndex {
1974        config: FullTextIndexConfig,
1975        if_not_exists: bool,
1976    },
1977    CreateScalarIndex {
1978        config: ScalarIndexConfig,
1979        if_not_exists: bool,
1980    },
1981    CreateJsonFtsIndex {
1982        config: JsonFtsIndexConfig,
1983        if_not_exists: bool,
1984    },
1985    DropIndex {
1986        name: String,
1987        if_exists: bool,
1988    },
1989    ShowIndexes {
1990        filter: Option<String>,
1991    },
1992    Copy {
1993        target: String,
1994        source: String,
1995        is_export: bool,
1996        options: HashMap<String, Value>,
1997    },
1998    Backup {
1999        destination: String,
2000        options: HashMap<String, Value>,
2001    },
2002    Explain {
2003        plan: Box<LogicalPlan>,
2004    },
2005    // Admin Plans
2006    ShowDatabase,
2007    ShowConfig,
2008    ShowStatistics,
2009    Vacuum,
2010    Checkpoint,
2011    CopyTo {
2012        label: String,
2013        path: String,
2014        format: String,
2015        options: HashMap<String, Value>,
2016    },
2017    CopyFrom {
2018        label: String,
2019        path: String,
2020        format: String,
2021        options: HashMap<String, Value>,
2022    },
2023    // Schema DDL
2024    CreateLabel(CreateLabel),
2025    CreateEdgeType(CreateEdgeType),
2026    AlterLabel(AlterLabel),
2027    AlterEdgeType(AlterEdgeType),
2028    DropLabel(DropLabel),
2029    DropEdgeType(DropEdgeType),
2030    // Constraints
2031    CreateConstraint(CreateConstraint),
2032    DropConstraint(DropConstraint),
2033    ShowConstraints(ShowConstraints),
2034    /// Bind a zero-length path (single node pattern with path variable).
2035    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2036    BindZeroLengthPath {
2037        input: Box<LogicalPlan>,
2038        node_variable: String,
2039        path_variable: String,
2040    },
2041    /// Bind a fixed-length path from already-computed node and edge columns.
2042    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2043    BindPath {
2044        input: Box<LogicalPlan>,
2045        node_variables: Vec<String>,
2046        edge_variables: Vec<String>,
2047        path_variable: String,
2048    },
2049
2050    // ── Locy variants ──────────────────────────────────────────
2051    /// Top-level Locy program: stratified rules + commands.
2052    LocyProgram {
2053        strata: Vec<super::planner_locy_types::LocyStratum>,
2054        commands: Vec<super::planner_locy_types::LocyCommand>,
2055        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2056        max_iterations: usize,
2057        timeout: std::time::Duration,
2058        max_derived_bytes: usize,
2059        deterministic_best_by: bool,
2060        strict_probability_domain: bool,
2061        probability_epsilon: f64,
2062        exact_probability: bool,
2063        max_bdd_variables: usize,
2064        top_k_proofs: usize,
2065    },
2066    /// FOLD operator: lattice-join non-key columns per KEY group.
2067    LocyFold {
2068        input: Box<LogicalPlan>,
2069        key_columns: Vec<String>,
2070        fold_bindings: Vec<(String, Expr)>,
2071        strict_probability_domain: bool,
2072        probability_epsilon: f64,
2073    },
2074    /// BEST BY operator: select best row per KEY group by ordered criteria.
2075    LocyBestBy {
2076        input: Box<LogicalPlan>,
2077        key_columns: Vec<String>,
2078        /// (expression, ascending) pairs.
2079        criteria: Vec<(Expr, bool)>,
2080    },
2081    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2082    LocyPriority {
2083        input: Box<LogicalPlan>,
2084        key_columns: Vec<String>,
2085    },
2086    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2087    LocyDerivedScan {
2088        scan_index: usize,
2089        data: Arc<RwLock<Vec<RecordBatch>>>,
2090        schema: SchemaRef,
2091    },
2092    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2093    /// without carrying through helper/property columns like the regular Project.
2094    LocyProject {
2095        input: Box<LogicalPlan>,
2096        projections: Vec<(Expr, Option<String>)>,
2097        /// Expected output Arrow type per projection (for CAST support).
2098        target_types: Vec<DataType>,
2099    },
2100}
2101
2102/// Extracted vector similarity predicate info for optimization
2103struct VectorSimilarityPredicate {
2104    variable: String,
2105    property: String,
2106    query: Expr,
2107    threshold: Option<f32>,
2108}
2109
2110/// Result of extracting vector_similarity from a predicate
2111struct VectorSimilarityExtraction {
2112    /// The extracted vector similarity predicate
2113    predicate: VectorSimilarityPredicate,
2114    /// Remaining predicates that couldn't be optimized (if any)
2115    residual: Option<Expr>,
2116}
2117
2118/// Try to extract a vector_similarity predicate from an expression.
2119/// Matches patterns like:
2120/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2121/// - n.embedding ~= $query
2122///
2123/// Also handles AND predicates.
2124fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2125    match expr {
2126        Expr::BinaryOp { left, op, right } => {
2127            // Handle AND: check both sides for vector_similarity
2128            if matches!(op, BinaryOp::And) {
2129                // Try left side first
2130                if let Some(vs) = extract_simple_vector_similarity(left) {
2131                    return Some(VectorSimilarityExtraction {
2132                        predicate: vs,
2133                        residual: Some(right.as_ref().clone()),
2134                    });
2135                }
2136                // Try right side
2137                if let Some(vs) = extract_simple_vector_similarity(right) {
2138                    return Some(VectorSimilarityExtraction {
2139                        predicate: vs,
2140                        residual: Some(left.as_ref().clone()),
2141                    });
2142                }
2143                // Recursively check within left/right for nested ANDs
2144                if let Some(mut extraction) = extract_vector_similarity(left) {
2145                    extraction.residual = Some(combine_with_and(
2146                        extraction.residual,
2147                        right.as_ref().clone(),
2148                    ));
2149                    return Some(extraction);
2150                }
2151                if let Some(mut extraction) = extract_vector_similarity(right) {
2152                    extraction.residual =
2153                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2154                    return Some(extraction);
2155                }
2156                return None;
2157            }
2158
2159            // Simple case: direct vector_similarity comparison
2160            if let Some(vs) = extract_simple_vector_similarity(expr) {
2161                return Some(VectorSimilarityExtraction {
2162                    predicate: vs,
2163                    residual: None,
2164                });
2165            }
2166            None
2167        }
2168        _ => None,
2169    }
2170}
2171
2172/// Helper to combine an optional expression with another using AND
2173fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2174    match opt_expr {
2175        Some(e) => Expr::BinaryOp {
2176            left: Box::new(e),
2177            op: BinaryOp::And,
2178            right: Box::new(other),
2179        },
2180        None => other,
2181    }
2182}
2183
2184/// Extract a simple vector_similarity comparison (no AND)
2185fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2186    match expr {
2187        Expr::BinaryOp { left, op, right } => {
2188            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2189            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2190                && let (Some(vs), Some(thresh)) = (
2191                    extract_vector_similarity_call(left),
2192                    extract_float_literal(right),
2193                )
2194            {
2195                return Some(VectorSimilarityPredicate {
2196                    variable: vs.0,
2197                    property: vs.1,
2198                    query: vs.2,
2199                    threshold: Some(thresh),
2200                });
2201            }
2202            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2203            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2204                && let (Some(thresh), Some(vs)) = (
2205                    extract_float_literal(left),
2206                    extract_vector_similarity_call(right),
2207                )
2208            {
2209                return Some(VectorSimilarityPredicate {
2210                    variable: vs.0,
2211                    property: vs.1,
2212                    query: vs.2,
2213                    threshold: Some(thresh),
2214                });
2215            }
2216            // Pattern: n.embedding ~= query
2217            if matches!(op, BinaryOp::ApproxEq)
2218                && let Expr::Property(var_expr, prop) = left.as_ref()
2219                && let Expr::Variable(var) = var_expr.as_ref()
2220            {
2221                return Some(VectorSimilarityPredicate {
2222                    variable: var.clone(),
2223                    property: prop.clone(),
2224                    query: right.as_ref().clone(),
2225                    threshold: None,
2226                });
2227            }
2228            None
2229        }
2230        _ => None,
2231    }
2232}
2233
2234/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2235fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2236    if let Expr::FunctionCall { name, args, .. } = expr
2237        && name.eq_ignore_ascii_case("vector_similarity")
2238        && args.len() == 2
2239    {
2240        // First arg should be Property(Identifier(var), prop)
2241        if let Expr::Property(var_expr, prop) = &args[0]
2242            && let Expr::Variable(var) = var_expr.as_ref()
2243        {
2244            // Second arg is query
2245            return Some((var.clone(), prop.clone(), args[1].clone()));
2246        }
2247    }
2248    None
2249}
2250
2251/// Extract a float value from a literal expression
2252fn extract_float_literal(expr: &Expr) -> Option<f32> {
2253    match expr {
2254        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2255        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2256        _ => None,
2257    }
2258}
2259
2260/// Translates a parsed Cypher AST into a [`LogicalPlan`].
2261///
2262/// `QueryPlanner` applies semantic validation (variable scoping, label
2263/// resolution, type checking) and produces a plan tree that the executor
2264/// can run against storage.
2265#[derive(Debug)]
2266pub struct QueryPlanner {
2267    schema: Arc<Schema>,
2268    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2269    gen_expr_cache: HashMap<(String, String), Expr>,
2270    /// Counter for generating unique anonymous variable names.
2271    anon_counter: std::sync::atomic::AtomicUsize,
2272    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2273    params: HashMap<String, uni_common::Value>,
2274}
2275
2276struct TraverseParams<'a> {
2277    rel: &'a RelationshipPattern,
2278    target_node: &'a NodePattern,
2279    optional: bool,
2280    path_variable: Option<String>,
2281    /// All variables from this OPTIONAL MATCH pattern.
2282    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2283    optional_pattern_vars: HashSet<String>,
2284}
2285
2286impl QueryPlanner {
2287    /// Create a new planner for the given schema.
2288    ///
2289    /// Pre-parses all generation expressions defined in the schema so that
2290    /// repeated plan calls avoid redundant parsing.
2291    pub fn new(schema: Arc<Schema>) -> Self {
2292        // Pre-parse all generation expressions for caching
2293        let mut gen_expr_cache = HashMap::new();
2294        for (label, props) in &schema.properties {
2295            for (gen_col, meta) in props {
2296                if let Some(expr_str) = &meta.generation_expression
2297                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2298                {
2299                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2300                }
2301            }
2302        }
2303        Self {
2304            schema,
2305            gen_expr_cache,
2306            anon_counter: std::sync::atomic::AtomicUsize::new(0),
2307            params: HashMap::new(),
2308        }
2309    }
2310
2311    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2312    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2313        self.params = params;
2314        self
2315    }
2316
2317    /// Plan a Cypher query with no pre-bound variables.
2318    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2319        self.plan_with_scope(query, Vec::new())
2320    }
2321
2322    /// Plan a Cypher query with a set of externally pre-bound variable names.
2323    ///
2324    /// `vars` lists variable names already in scope before this query executes
2325    /// (e.g., from an enclosing Locy rule body).
2326    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2327        // Apply query rewrites before planning
2328        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2329        if Self::has_mixed_union_modes(&rewritten_query) {
2330            return Err(anyhow!(
2331                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2332            ));
2333        }
2334
2335        match rewritten_query {
2336            Query::Single(stmt) => self.plan_single(stmt, vars),
2337            Query::Union { left, right, all } => {
2338                let l = self.plan_with_scope(*left, vars.clone())?;
2339                let r = self.plan_with_scope(*right, vars)?;
2340
2341                // Validate that both sides have the same column names
2342                let left_cols = Self::extract_projection_columns(&l);
2343                let right_cols = Self::extract_projection_columns(&r);
2344
2345                if left_cols != right_cols {
2346                    return Err(anyhow!(
2347                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2348                    ));
2349                }
2350
2351                Ok(LogicalPlan::Union {
2352                    left: Box::new(l),
2353                    right: Box::new(r),
2354                    all,
2355                })
2356            }
2357            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2358            Query::Explain(inner) => {
2359                let inner_plan = self.plan_with_scope(*inner, vars)?;
2360                Ok(LogicalPlan::Explain {
2361                    plan: Box::new(inner_plan),
2362                })
2363            }
2364            Query::TimeTravel { .. } => {
2365                unreachable!("TimeTravel should be resolved at API layer before planning")
2366            }
2367        }
2368    }
2369
2370    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2371        match query {
2372            Query::Union { left, right, all } => {
2373                out.insert(*all);
2374                Self::collect_union_modes(left, out);
2375                Self::collect_union_modes(right, out);
2376            }
2377            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2378            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2379            Query::Single(_) | Query::Schema(_) => {}
2380        }
2381    }
2382
2383    fn has_mixed_union_modes(query: &Query) -> bool {
2384        let mut modes = HashSet::new();
2385        Self::collect_union_modes(query, &mut modes);
2386        modes.len() > 1
2387    }
2388
2389    fn next_anon_var(&self) -> String {
2390        let id = self
2391            .anon_counter
2392            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2393        format!("_anon_{}", id)
2394    }
2395
2396    /// Extract projection column names from a logical plan.
2397    /// Used for UNION column validation.
2398    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2399        match plan {
2400            LogicalPlan::Project { projections, .. } => projections
2401                .iter()
2402                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2403                .collect(),
2404            LogicalPlan::Limit { input, .. }
2405            | LogicalPlan::Sort { input, .. }
2406            | LogicalPlan::Distinct { input, .. }
2407            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2408            LogicalPlan::Union { left, right, .. } => {
2409                let left_cols = Self::extract_projection_columns(left);
2410                if left_cols.is_empty() {
2411                    Self::extract_projection_columns(right)
2412                } else {
2413                    left_cols
2414                }
2415            }
2416            LogicalPlan::Aggregate {
2417                group_by,
2418                aggregates,
2419                ..
2420            } => {
2421                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2422                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2423                cols
2424            }
2425            _ => Vec::new(),
2426        }
2427    }
2428
2429    fn plan_return_clause(
2430        &self,
2431        return_clause: &ReturnClause,
2432        plan: LogicalPlan,
2433        vars_in_scope: &[VariableInfo],
2434    ) -> Result<LogicalPlan> {
2435        let mut plan = plan;
2436        let mut group_by = Vec::new();
2437        let mut aggregates = Vec::new();
2438        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2439        let mut has_agg = false;
2440        let mut projections = Vec::new();
2441        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2442        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2443        let mut projected_aliases: HashSet<String> = HashSet::new();
2444
2445        for item in &return_clause.items {
2446            match item {
2447                ReturnItem::All => {
2448                    // RETURN * - add all user-named variables in scope
2449                    // (anonymous variables like _anon_0 are excluded)
2450                    let user_vars: Vec<_> = vars_in_scope
2451                        .iter()
2452                        .filter(|v| !v.name.starts_with("_anon_"))
2453                        .collect();
2454                    if user_vars.is_empty() {
2455                        return Err(anyhow!(
2456                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2457                        ));
2458                    }
2459                    for v in user_vars {
2460                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2461                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2462                            group_by.push(Expr::Variable(v.name.clone()));
2463                        }
2464                        projected_aliases.insert(v.name.clone());
2465                        projected_simple_reprs.insert(v.name.clone());
2466                    }
2467                }
2468                ReturnItem::Expr {
2469                    expr,
2470                    alias,
2471                    source_text,
2472                } => {
2473                    if matches!(expr, Expr::Wildcard) {
2474                        for v in vars_in_scope {
2475                            projections
2476                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2477                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2478                                group_by.push(Expr::Variable(v.name.clone()));
2479                            }
2480                            projected_aliases.insert(v.name.clone());
2481                            projected_simple_reprs.insert(v.name.clone());
2482                        }
2483                    } else {
2484                        // Validate expression variables are defined
2485                        validate_expression_variables(expr, vars_in_scope)?;
2486                        // Validate function argument types and boolean operators
2487                        validate_expression(expr, vars_in_scope)?;
2488                        // Pattern predicates are not allowed in RETURN
2489                        if contains_pattern_predicate(expr) {
2490                            return Err(anyhow!(
2491                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2492                            ));
2493                        }
2494
2495                        // Use source text as column name when no explicit alias
2496                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2497                        projections.push((expr.clone(), effective_alias));
2498                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2499                            // Bare aggregate — push directly
2500                            has_agg = true;
2501                            aggregates.push(expr.clone());
2502                            projected_aggregate_reprs.insert(expr.to_string_repr());
2503                        } else if !is_window_function(expr)
2504                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2505                        {
2506                            // Compound aggregate or expression containing aggregates —
2507                            // extract the inner bare aggregates for the Aggregate node
2508                            has_agg = true;
2509                            compound_agg_exprs.push(expr.clone());
2510                            for inner in extract_inner_aggregates(expr) {
2511                                let repr = inner.to_string_repr();
2512                                if !projected_aggregate_reprs.contains(&repr) {
2513                                    aggregates.push(inner);
2514                                    projected_aggregate_reprs.insert(repr);
2515                                }
2516                            }
2517                        } else if !group_by.contains(expr) {
2518                            group_by.push(expr.clone());
2519                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
2520                                projected_simple_reprs.insert(expr.to_string_repr());
2521                            }
2522                        }
2523
2524                        if let Some(a) = alias {
2525                            if projected_aliases.contains(a) {
2526                                return Err(anyhow!(
2527                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2528                                    a
2529                                ));
2530                            }
2531                            projected_aliases.insert(a.clone());
2532                        } else if let Expr::Variable(v) = expr {
2533                            if projected_aliases.contains(v) {
2534                                return Err(anyhow!(
2535                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2536                                    v
2537                                ));
2538                            }
2539                            projected_aliases.insert(v.clone());
2540                        }
2541                    }
2542                }
2543            }
2544        }
2545
2546        // Validate compound aggregate expressions: non-aggregate refs must be
2547        // individually present in the group_by as simple variables or properties.
2548        if has_agg {
2549            let group_by_reprs: HashSet<String> =
2550                group_by.iter().map(|e| e.to_string_repr()).collect();
2551            for expr in &compound_agg_exprs {
2552                let mut refs = Vec::new();
2553                collect_non_aggregate_refs(expr, false, &mut refs);
2554                for r in &refs {
2555                    let is_covered = match r {
2556                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
2557                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
2558                    };
2559                    if !is_covered {
2560                        return Err(anyhow!(
2561                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
2562                        ));
2563                    }
2564                }
2565            }
2566        }
2567
2568        if has_agg {
2569            plan = LogicalPlan::Aggregate {
2570                input: Box::new(plan),
2571                group_by,
2572                aggregates,
2573            };
2574        }
2575
2576        let mut window_exprs = Vec::new();
2577        for (expr, _) in &projections {
2578            Self::collect_window_functions(expr, &mut window_exprs);
2579        }
2580
2581        if let Some(order_by) = &return_clause.order_by {
2582            for item in order_by {
2583                Self::collect_window_functions(&item.expr, &mut window_exprs);
2584            }
2585        }
2586
2587        let has_window_exprs = !window_exprs.is_empty();
2588
2589        if has_window_exprs {
2590            // Before creating the Window node, we need to ensure all properties
2591            // referenced by window functions are available. Create a Project node
2592            // that loads these properties.
2593            let mut props_needed_for_window: Vec<Expr> = Vec::new();
2594            for window_expr in &window_exprs {
2595                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
2596            }
2597
2598            // Also include non-window expressions from projections that might be needed
2599            // Preserve qualified names (e.g., "e.salary") as aliases for properties
2600            let non_window_projections: Vec<_> = projections
2601                .iter()
2602                .filter_map(|(expr, alias)| {
2603                    // Keep expressions that don't have window_spec
2604                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
2605                        window_spec.is_none()
2606                    } else {
2607                        true
2608                    };
2609
2610                    if keep {
2611                        // For property references, use the qualified name as alias
2612                        let new_alias = if matches!(expr, Expr::Property(..)) {
2613                            Some(expr.to_string_repr())
2614                        } else {
2615                            alias.clone()
2616                        };
2617                        Some((expr.clone(), new_alias))
2618                    } else {
2619                        None
2620                    }
2621                })
2622                .collect();
2623
2624            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
2625                let mut intermediate_projections = non_window_projections;
2626                // Add any additional property references needed by window functions
2627                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
2628                for prop in &props_needed_for_window {
2629                    if !intermediate_projections
2630                        .iter()
2631                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
2632                    {
2633                        let qualified_name = prop.to_string_repr();
2634                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
2635                    }
2636                }
2637
2638                if !intermediate_projections.is_empty() {
2639                    plan = LogicalPlan::Project {
2640                        input: Box::new(plan),
2641                        projections: intermediate_projections,
2642                    };
2643                }
2644            }
2645
2646            // Transform property expressions in window functions to use qualified variable names
2647            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
2648            let transformed_window_exprs: Vec<Expr> = window_exprs
2649                .into_iter()
2650                .map(Self::transform_window_expr_properties)
2651                .collect();
2652
2653            plan = LogicalPlan::Window {
2654                input: Box::new(plan),
2655                window_exprs: transformed_window_exprs,
2656            };
2657        }
2658
2659        if let Some(order_by) = &return_clause.order_by {
2660            let alias_exprs: HashMap<String, Expr> = projections
2661                .iter()
2662                .filter_map(|(expr, alias)| {
2663                    alias.as_ref().map(|a| {
2664                        // ORDER BY is planned before the final RETURN projection.
2665                        // In aggregate contexts, aliases must resolve to the
2666                        // post-aggregate output columns, not raw aggregate calls.
2667                        let rewritten = if has_agg && !has_window_exprs {
2668                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
2669                                Expr::Variable(aggregate_column_name(expr))
2670                            } else if is_compound_aggregate(expr)
2671                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
2672                            {
2673                                replace_aggregates_with_columns(expr)
2674                            } else {
2675                                Expr::Variable(expr.to_string_repr())
2676                            }
2677                        } else {
2678                            expr.clone()
2679                        };
2680                        (a.clone(), rewritten)
2681                    })
2682                })
2683                .collect();
2684
2685            // Build an extended scope that includes RETURN aliases so ORDER BY
2686            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
2687            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
2688                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
2689                // Keep aliases and directly returned variables in scope.
2690                let mut scope = Vec::new();
2691                for (expr, alias) in &projections {
2692                    if let Some(a) = alias
2693                        && !is_var_in_scope(&scope, a)
2694                    {
2695                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
2696                    }
2697                    if let Expr::Variable(v) = expr
2698                        && !is_var_in_scope(&scope, v)
2699                    {
2700                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
2701                    }
2702                }
2703                scope
2704            } else {
2705                let mut scope = vars_in_scope.to_vec();
2706                for (expr, alias) in &projections {
2707                    if let Some(a) = alias
2708                        && !is_var_in_scope(&scope, a)
2709                    {
2710                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
2711                    } else if let Expr::Variable(v) = expr
2712                        && !is_var_in_scope(&scope, v)
2713                    {
2714                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
2715                    }
2716                }
2717                scope
2718            };
2719            // Validate ORDER BY expressions against the extended scope
2720            for item in order_by {
2721                // DISTINCT allows ORDER BY on the same projected expression
2722                // even when underlying variables are not otherwise visible.
2723                let matches_projected_expr = return_clause.distinct
2724                    && projections
2725                        .iter()
2726                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
2727                if !matches_projected_expr {
2728                    validate_expression_variables(&item.expr, &order_by_scope)?;
2729                    validate_expression(&item.expr, &order_by_scope)?;
2730                }
2731                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
2732                if has_aggregate_in_item && !has_agg {
2733                    return Err(anyhow!(
2734                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
2735                    ));
2736                }
2737                if has_agg && has_aggregate_in_item {
2738                    validate_with_order_by_aggregate_item(
2739                        &item.expr,
2740                        &projected_aggregate_reprs,
2741                        &projected_simple_reprs,
2742                        &projected_aliases,
2743                    )?;
2744                }
2745            }
2746            let rewritten_order_by: Vec<SortItem> = order_by
2747                .iter()
2748                .map(|item| SortItem {
2749                    expr: {
2750                        let mut rewritten =
2751                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
2752                        if has_agg && !has_window_exprs {
2753                            rewritten = replace_aggregates_with_columns(&rewritten);
2754                        }
2755                        rewritten
2756                    },
2757                    ascending: item.ascending,
2758                })
2759                .collect();
2760            plan = LogicalPlan::Sort {
2761                input: Box::new(plan),
2762                order_by: rewritten_order_by,
2763            };
2764        }
2765
2766        if return_clause.skip.is_some() || return_clause.limit.is_some() {
2767            let skip = return_clause
2768                .skip
2769                .as_ref()
2770                .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
2771                .transpose()?
2772                .flatten();
2773            let fetch = return_clause
2774                .limit
2775                .as_ref()
2776                .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
2777                .transpose()?
2778                .flatten();
2779
2780            plan = LogicalPlan::Limit {
2781                input: Box::new(plan),
2782                skip,
2783                fetch,
2784            };
2785        }
2786
2787        if !projections.is_empty() {
2788            // If we created an Aggregate or Window node, we need to adjust the final projections
2789            // to reference aggregate/window function results as columns instead of re-evaluating them
2790            let final_projections = if has_agg || has_window_exprs {
2791                projections
2792                    .into_iter()
2793                    .map(|(expr, alias)| {
2794                        // Check if this expression is an aggregate function
2795                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
2796                        {
2797                            // Bare aggregate — replace with column reference
2798                            let col_name = aggregate_column_name(&expr);
2799                            (Expr::Variable(col_name), alias)
2800                        } else if !has_window_exprs
2801                            && (is_compound_aggregate(&expr)
2802                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
2803                        {
2804                            // Compound aggregate — replace inner aggregates with
2805                            // column references, keep outer expression for Project
2806                            (replace_aggregates_with_columns(&expr), alias)
2807                        }
2808                        // For grouped RETURN projections, reference the pre-computed
2809                        // group-by output column instead of re-evaluating the expression
2810                        // against the aggregate schema (which no longer has original vars).
2811                        else if has_agg
2812                            && !has_window_exprs
2813                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
2814                        {
2815                            (Expr::Variable(expr.to_string_repr()), alias)
2816                        }
2817                        // Check if this expression is a window function
2818                        else if let Expr::FunctionCall {
2819                            window_spec: Some(_),
2820                            ..
2821                        } = &expr
2822                        {
2823                            // Replace window function with a column reference to its result
2824                            // The column name in the Window output is the full expression string
2825                            let window_col_name = expr.to_string_repr();
2826                            // Keep the original alias for the final output
2827                            (Expr::Variable(window_col_name), alias)
2828                        } else {
2829                            (expr, alias)
2830                        }
2831                    })
2832                    .collect()
2833            } else {
2834                projections
2835            };
2836
2837            plan = LogicalPlan::Project {
2838                input: Box::new(plan),
2839                projections: final_projections,
2840            };
2841        }
2842
2843        if return_clause.distinct {
2844            plan = LogicalPlan::Distinct {
2845                input: Box::new(plan),
2846            };
2847        }
2848
2849        Ok(plan)
2850    }
2851
2852    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
2853        let typed_vars: Vec<VariableInfo> = initial_vars
2854            .into_iter()
2855            .map(|name| VariableInfo::new(name, VariableType::Imported))
2856            .collect();
2857        self.plan_single_typed(query, typed_vars)
2858    }
2859
2860    /// Rewrite a query then plan it, preserving typed variable scope when possible.
2861    ///
2862    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
2863    /// variable info through and avoid false type-conflict errors in subqueries.
2864    /// For unions and other compound queries, falls back to `plan_with_scope`.
2865    fn rewrite_and_plan_typed(
2866        &self,
2867        query: Query,
2868        typed_vars: &[VariableInfo],
2869    ) -> Result<LogicalPlan> {
2870        let rewritten = crate::query::rewrite::rewrite_query(query)?;
2871        match rewritten {
2872            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
2873            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
2874        }
2875    }
2876
2877    fn plan_single_typed(
2878        &self,
2879        query: Statement,
2880        initial_vars: Vec<VariableInfo>,
2881    ) -> Result<LogicalPlan> {
2882        let mut plan = LogicalPlan::Empty;
2883
2884        if !initial_vars.is_empty() {
2885            // Project bound variables from outer scope as parameters.
2886            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
2887            // Use Parameter expressions to read from params, not Variable which would read from input row.
2888            let projections = initial_vars
2889                .iter()
2890                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
2891                .collect();
2892            plan = LogicalPlan::Project {
2893                input: Box::new(plan),
2894                projections,
2895            };
2896        }
2897
2898        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
2899        // Track variables introduced by CREATE clauses so we can distinguish
2900        // MATCH-introduced variables (which cannot be re-created as bare nodes)
2901        // from CREATE-introduced variables (which can be referenced as bare nodes).
2902        let mut create_introduced_vars: HashSet<String> = HashSet::new();
2903        // Track variables targeted by DELETE so we can reject property/label
2904        // access on deleted entities in subsequent RETURN clauses.
2905        let mut deleted_vars: HashSet<String> = HashSet::new();
2906
2907        let clause_count = query.clauses.len();
2908        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
2909            match clause {
2910                Clause::Match(match_clause) => {
2911                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
2912                }
2913                Clause::Unwind(unwind) => {
2914                    plan = LogicalPlan::Unwind {
2915                        input: Box::new(plan),
2916                        expr: unwind.expr.clone(),
2917                        variable: unwind.variable.clone(),
2918                    };
2919                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
2920                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
2921                }
2922                Clause::Call(call_clause) => {
2923                    match &call_clause.kind {
2924                        CallKind::Procedure {
2925                            procedure,
2926                            arguments,
2927                        } => {
2928                            // Validate that procedure arguments don't contain aggregation functions
2929                            for arg in arguments {
2930                                if contains_aggregate_recursive(arg) {
2931                                    return Err(anyhow!(
2932                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
2933                                    ));
2934                                }
2935                            }
2936
2937                            let has_yield_star = call_clause.yield_items.len() == 1
2938                                && call_clause.yield_items[0].name == "*"
2939                                && call_clause.yield_items[0].alias.is_none();
2940                            if has_yield_star && clause_idx + 1 < clause_count {
2941                                return Err(anyhow!(
2942                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
2943                                ));
2944                            }
2945
2946                            // Validate for duplicate yield names (VariableAlreadyBound)
2947                            let mut yield_names = Vec::new();
2948                            for item in &call_clause.yield_items {
2949                                if item.name == "*" {
2950                                    continue;
2951                                }
2952                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
2953                                if yield_names.contains(output_name) {
2954                                    return Err(anyhow!(
2955                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
2956                                        output_name
2957                                    ));
2958                                }
2959                                // Check against existing scope (in-query CALL must not shadow)
2960                                if clause_idx > 0
2961                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
2962                                {
2963                                    return Err(anyhow!(
2964                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
2965                                        output_name
2966                                    ));
2967                                }
2968                                yield_names.push(output_name.clone());
2969                            }
2970
2971                            let mut yields = Vec::new();
2972                            for item in &call_clause.yield_items {
2973                                if item.name == "*" {
2974                                    continue;
2975                                }
2976                                yields.push((item.name.clone(), item.alias.clone()));
2977                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
2978                                // Use Imported because procedure return types are unknown
2979                                // at plan time (could be nodes, edges, or scalars)
2980                                add_var_to_scope(
2981                                    &mut vars_in_scope,
2982                                    var_name,
2983                                    VariableType::Imported,
2984                                )?;
2985                            }
2986                            let proc_plan = LogicalPlan::ProcedureCall {
2987                                procedure_name: procedure.clone(),
2988                                arguments: arguments.clone(),
2989                                yield_items: yields.clone(),
2990                            };
2991
2992                            if matches!(plan, LogicalPlan::Empty) {
2993                                // Standalone CALL (first clause) — use directly
2994                                plan = proc_plan;
2995                            } else if yields.is_empty() {
2996                                // In-query CALL with no YIELD (void procedure):
2997                                // preserve the input rows unchanged
2998                            } else {
2999                                // In-query CALL with YIELD: cross-join input × procedure output
3000                                plan = LogicalPlan::Apply {
3001                                    input: Box::new(plan),
3002                                    subquery: Box::new(proc_plan),
3003                                    input_filter: None,
3004                                };
3005                            }
3006                        }
3007                        CallKind::Subquery(query) => {
3008                            let subquery_plan =
3009                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
3010
3011                            // Extract variables from subquery RETURN clause
3012                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
3013
3014                            // Add new variables to scope (as Scalar since they come from subquery projection)
3015                            for var in subquery_vars {
3016                                if !is_var_in_scope(&vars_in_scope, &var) {
3017                                    add_var_to_scope(
3018                                        &mut vars_in_scope,
3019                                        &var,
3020                                        VariableType::Scalar,
3021                                    )?;
3022                                }
3023                            }
3024
3025                            plan = LogicalPlan::SubqueryCall {
3026                                input: Box::new(plan),
3027                                subquery: Box::new(subquery_plan),
3028                            };
3029                        }
3030                    }
3031                }
3032                Clause::Merge(merge_clause) => {
3033                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
3034
3035                    plan = LogicalPlan::Merge {
3036                        input: Box::new(plan),
3037                        pattern: merge_clause.pattern.clone(),
3038                        on_match: Some(SetClause {
3039                            items: merge_clause.on_match.clone(),
3040                        }),
3041                        on_create: Some(SetClause {
3042                            items: merge_clause.on_create.clone(),
3043                        }),
3044                    };
3045
3046                    for path in &merge_clause.pattern.paths {
3047                        if let Some(path_var) = &path.variable
3048                            && !path_var.is_empty()
3049                            && !is_var_in_scope(&vars_in_scope, path_var)
3050                        {
3051                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3052                        }
3053                        for element in &path.elements {
3054                            if let PatternElement::Node(n) = element {
3055                                if let Some(v) = &n.variable
3056                                    && !is_var_in_scope(&vars_in_scope, v)
3057                                {
3058                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3059                                }
3060                            } else if let PatternElement::Relationship(r) = element
3061                                && let Some(v) = &r.variable
3062                                && !is_var_in_scope(&vars_in_scope, v)
3063                            {
3064                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3065                            }
3066                        }
3067                    }
3068                }
3069                Clause::Create(create_clause) => {
3070                    // Validate CREATE patterns:
3071                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3072                    // - Bare nodes (v) are "references" if bound, "creations" if not
3073                    // - Relationships are always creations - can't rebind
3074                    // - Within CREATE, each new variable can only be defined once
3075                    // - Variables used in properties must be defined
3076                    let mut create_vars: Vec<&str> = Vec::new();
3077                    for path in &create_clause.pattern.paths {
3078                        let is_standalone_node = path.elements.len() == 1;
3079                        for element in &path.elements {
3080                            match element {
3081                                PatternElement::Node(n) => {
3082                                    validate_property_variables(
3083                                        &n.properties,
3084                                        &vars_in_scope,
3085                                        &create_vars,
3086                                    )?;
3087
3088                                    if let Some(v) = n.variable.as_deref()
3089                                        && !v.is_empty()
3090                                    {
3091                                        // A node is a "creation" if it has labels or properties
3092                                        let is_creation =
3093                                            !n.labels.is_empty() || n.properties.is_some();
3094
3095                                        if is_creation {
3096                                            check_not_already_bound(
3097                                                v,
3098                                                &vars_in_scope,
3099                                                &create_vars,
3100                                            )?;
3101                                            create_vars.push(v);
3102                                        } else if is_standalone_node
3103                                            && is_var_in_scope(&vars_in_scope, v)
3104                                            && !create_introduced_vars.contains(v)
3105                                        {
3106                                            // Standalone bare node referencing a variable from a
3107                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3108                                            // Bare nodes used as relationship endpoints
3109                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3110                                            return Err(anyhow!(
3111                                                "SyntaxError: VariableAlreadyBound - '{}'",
3112                                                v
3113                                            ));
3114                                        } else if !create_vars.contains(&v) {
3115                                            // New bare variable — register it
3116                                            create_vars.push(v);
3117                                        }
3118                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3119                                    }
3120                                }
3121                                PatternElement::Relationship(r) => {
3122                                    validate_property_variables(
3123                                        &r.properties,
3124                                        &vars_in_scope,
3125                                        &create_vars,
3126                                    )?;
3127
3128                                    if let Some(v) = r.variable.as_deref()
3129                                        && !v.is_empty()
3130                                    {
3131                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3132                                        create_vars.push(v);
3133                                    }
3134
3135                                    // Validate relationship constraints for CREATE
3136                                    if r.types.len() != 1 {
3137                                        return Err(anyhow!(
3138                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3139                                        ));
3140                                    }
3141                                    if r.direction == Direction::Both {
3142                                        return Err(anyhow!(
3143                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3144                                        ));
3145                                    }
3146                                    if r.range.is_some() {
3147                                        return Err(anyhow!(
3148                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3149                                        ));
3150                                    }
3151                                }
3152                                PatternElement::Parenthesized { .. } => {}
3153                            }
3154                        }
3155                    }
3156
3157                    // Batch consecutive CREATEs to avoid deep recursion
3158                    match &mut plan {
3159                        LogicalPlan::CreateBatch { patterns, .. } => {
3160                            // Append to existing batch
3161                            patterns.push(create_clause.pattern.clone());
3162                        }
3163                        LogicalPlan::Create { input, pattern } => {
3164                            // Convert single Create to CreateBatch with both patterns
3165                            let first_pattern = pattern.clone();
3166                            plan = LogicalPlan::CreateBatch {
3167                                input: input.clone(),
3168                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3169                            };
3170                        }
3171                        _ => {
3172                            // Start new Create (may become batch if more CREATEs follow)
3173                            plan = LogicalPlan::Create {
3174                                input: Box::new(plan),
3175                                pattern: create_clause.pattern.clone(),
3176                            };
3177                        }
3178                    }
3179                    // Add variables from created nodes and relationships to scope
3180                    for path in &create_clause.pattern.paths {
3181                        for element in &path.elements {
3182                            match element {
3183                                PatternElement::Node(n) => {
3184                                    if let Some(var) = &n.variable
3185                                        && !var.is_empty()
3186                                    {
3187                                        create_introduced_vars.insert(var.clone());
3188                                        add_var_to_scope(
3189                                            &mut vars_in_scope,
3190                                            var,
3191                                            VariableType::Node,
3192                                        )?;
3193                                    }
3194                                }
3195                                PatternElement::Relationship(r) => {
3196                                    if let Some(var) = &r.variable
3197                                        && !var.is_empty()
3198                                    {
3199                                        create_introduced_vars.insert(var.clone());
3200                                        add_var_to_scope(
3201                                            &mut vars_in_scope,
3202                                            var,
3203                                            VariableType::Edge,
3204                                        )?;
3205                                    }
3206                                }
3207                                PatternElement::Parenthesized { .. } => {
3208                                    // Skip for now - not commonly used in CREATE
3209                                }
3210                            }
3211                        }
3212                    }
3213                }
3214                Clause::Set(set_clause) => {
3215                    // Validate SET value expressions
3216                    for item in &set_clause.items {
3217                        match item {
3218                            SetItem::Property { value, .. }
3219                            | SetItem::Variable { value, .. }
3220                            | SetItem::VariablePlus { value, .. } => {
3221                                validate_expression_variables(value, &vars_in_scope)?;
3222                                validate_expression(value, &vars_in_scope)?;
3223                                if contains_pattern_predicate(value) {
3224                                    return Err(anyhow!(
3225                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3226                                    ));
3227                                }
3228                            }
3229                            SetItem::Labels { .. } => {}
3230                        }
3231                    }
3232                    plan = LogicalPlan::Set {
3233                        input: Box::new(plan),
3234                        items: set_clause.items.clone(),
3235                    };
3236                }
3237                Clause::Remove(remove_clause) => {
3238                    plan = LogicalPlan::Remove {
3239                        input: Box::new(plan),
3240                        items: remove_clause.items.clone(),
3241                    };
3242                }
3243                Clause::Delete(delete_clause) => {
3244                    // Validate DELETE targets
3245                    for item in &delete_clause.items {
3246                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3247                        if matches!(item, Expr::LabelCheck { .. }) {
3248                            return Err(anyhow!(
3249                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3250                            ));
3251                        }
3252                        let vars_used = collect_expr_variables(item);
3253                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3254                        if vars_used.is_empty() {
3255                            return Err(anyhow!(
3256                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3257                            ));
3258                        }
3259                        for var in &vars_used {
3260                            // Check if variable is defined
3261                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3262                                return Err(anyhow!(
3263                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3264                                    var
3265                                ));
3266                            }
3267                        }
3268                        // Strict type check only for simple variable references —
3269                        // complex expressions (property access, array index, etc.)
3270                        // may resolve to a node/edge at runtime even if the base
3271                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3272                        if let Expr::Variable(name) = item
3273                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3274                            && matches!(
3275                                info.var_type,
3276                                VariableType::Scalar | VariableType::ScalarLiteral
3277                            )
3278                        {
3279                            return Err(anyhow!(
3280                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3281                                name
3282                            ));
3283                        }
3284                    }
3285                    // Track deleted variables for later validation
3286                    for item in &delete_clause.items {
3287                        if let Expr::Variable(name) = item {
3288                            deleted_vars.insert(name.clone());
3289                        }
3290                    }
3291                    plan = LogicalPlan::Delete {
3292                        input: Box::new(plan),
3293                        items: delete_clause.items.clone(),
3294                        detach: delete_clause.detach,
3295                    };
3296                }
3297                Clause::With(with_clause) => {
3298                    let (new_plan, new_vars) =
3299                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3300                    plan = new_plan;
3301                    vars_in_scope = new_vars;
3302                }
3303                Clause::WithRecursive(with_recursive) => {
3304                    // Plan the recursive CTE
3305                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3306                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3307                    add_var_to_scope(
3308                        &mut vars_in_scope,
3309                        &with_recursive.name,
3310                        VariableType::Scalar,
3311                    )?;
3312                }
3313                Clause::Return(return_clause) => {
3314                    // Check for property/label access on deleted entities
3315                    if !deleted_vars.is_empty() {
3316                        for item in &return_clause.items {
3317                            if let ReturnItem::Expr { expr, .. } = item {
3318                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3319                            }
3320                        }
3321                    }
3322                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3323                } // All Clause variants are handled above - no catch-all needed
3324            }
3325        }
3326
3327        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3328        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3329        // If RETURN was used, the plan will have been wrapped in Project, so we only
3330        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3331        let plan = match &plan {
3332            LogicalPlan::Create { .. }
3333            | LogicalPlan::CreateBatch { .. }
3334            | LogicalPlan::Delete { .. }
3335            | LogicalPlan::Set { .. }
3336            | LogicalPlan::Remove { .. }
3337            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3338                input: Box::new(plan),
3339                skip: None,
3340                fetch: Some(0),
3341            },
3342            _ => plan,
3343        };
3344
3345        Ok(plan)
3346    }
3347
3348    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3349        match expr {
3350            Expr::Property(_, _) => {
3351                if !collected
3352                    .iter()
3353                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3354                {
3355                    collected.push(expr.clone());
3356                }
3357            }
3358            Expr::Variable(_) => {
3359                // Variables are already available, don't need to project them
3360            }
3361            Expr::BinaryOp { left, right, .. } => {
3362                Self::collect_properties_from_expr(left, collected);
3363                Self::collect_properties_from_expr(right, collected);
3364            }
3365            Expr::FunctionCall {
3366                args, window_spec, ..
3367            } => {
3368                for arg in args {
3369                    Self::collect_properties_from_expr(arg, collected);
3370                }
3371                if let Some(spec) = window_spec {
3372                    for partition_expr in &spec.partition_by {
3373                        Self::collect_properties_from_expr(partition_expr, collected);
3374                    }
3375                    for sort_item in &spec.order_by {
3376                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3377                    }
3378                }
3379            }
3380            Expr::List(items) => {
3381                for item in items {
3382                    Self::collect_properties_from_expr(item, collected);
3383                }
3384            }
3385            Expr::UnaryOp { expr: e, .. }
3386            | Expr::IsNull(e)
3387            | Expr::IsNotNull(e)
3388            | Expr::IsUnique(e) => {
3389                Self::collect_properties_from_expr(e, collected);
3390            }
3391            Expr::Case {
3392                expr,
3393                when_then,
3394                else_expr,
3395            } => {
3396                if let Some(e) = expr {
3397                    Self::collect_properties_from_expr(e, collected);
3398                }
3399                for (w, t) in when_then {
3400                    Self::collect_properties_from_expr(w, collected);
3401                    Self::collect_properties_from_expr(t, collected);
3402                }
3403                if let Some(e) = else_expr {
3404                    Self::collect_properties_from_expr(e, collected);
3405                }
3406            }
3407            Expr::In { expr, list } => {
3408                Self::collect_properties_from_expr(expr, collected);
3409                Self::collect_properties_from_expr(list, collected);
3410            }
3411            Expr::ArrayIndex { array, index } => {
3412                Self::collect_properties_from_expr(array, collected);
3413                Self::collect_properties_from_expr(index, collected);
3414            }
3415            Expr::ArraySlice { array, start, end } => {
3416                Self::collect_properties_from_expr(array, collected);
3417                if let Some(s) = start {
3418                    Self::collect_properties_from_expr(s, collected);
3419                }
3420                if let Some(e) = end {
3421                    Self::collect_properties_from_expr(e, collected);
3422                }
3423            }
3424            _ => {}
3425        }
3426    }
3427
3428    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3429        if let Expr::FunctionCall { window_spec, .. } = expr {
3430            // Collect any function with a window spec (OVER clause)
3431            if window_spec.is_some() {
3432                if !collected
3433                    .iter()
3434                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3435                {
3436                    collected.push(expr.clone());
3437                }
3438                return;
3439            }
3440        }
3441
3442        match expr {
3443            Expr::BinaryOp { left, right, .. } => {
3444                Self::collect_window_functions(left, collected);
3445                Self::collect_window_functions(right, collected);
3446            }
3447            Expr::FunctionCall { args, .. } => {
3448                for arg in args {
3449                    Self::collect_window_functions(arg, collected);
3450                }
3451            }
3452            Expr::List(items) => {
3453                for i in items {
3454                    Self::collect_window_functions(i, collected);
3455                }
3456            }
3457            Expr::Map(items) => {
3458                for (_, i) in items {
3459                    Self::collect_window_functions(i, collected);
3460                }
3461            }
3462            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
3463                Self::collect_window_functions(e, collected);
3464            }
3465            Expr::Case {
3466                expr,
3467                when_then,
3468                else_expr,
3469            } => {
3470                if let Some(e) = expr {
3471                    Self::collect_window_functions(e, collected);
3472                }
3473                for (w, t) in when_then {
3474                    Self::collect_window_functions(w, collected);
3475                    Self::collect_window_functions(t, collected);
3476                }
3477                if let Some(e) = else_expr {
3478                    Self::collect_window_functions(e, collected);
3479                }
3480            }
3481            Expr::Reduce {
3482                init, list, expr, ..
3483            } => {
3484                Self::collect_window_functions(init, collected);
3485                Self::collect_window_functions(list, collected);
3486                Self::collect_window_functions(expr, collected);
3487            }
3488            Expr::Quantifier {
3489                list, predicate, ..
3490            } => {
3491                Self::collect_window_functions(list, collected);
3492                Self::collect_window_functions(predicate, collected);
3493            }
3494            Expr::In { expr, list } => {
3495                Self::collect_window_functions(expr, collected);
3496                Self::collect_window_functions(list, collected);
3497            }
3498            Expr::ArrayIndex { array, index } => {
3499                Self::collect_window_functions(array, collected);
3500                Self::collect_window_functions(index, collected);
3501            }
3502            Expr::ArraySlice { array, start, end } => {
3503                Self::collect_window_functions(array, collected);
3504                if let Some(s) = start {
3505                    Self::collect_window_functions(s, collected);
3506                }
3507                if let Some(e) = end {
3508                    Self::collect_window_functions(e, collected);
3509                }
3510            }
3511            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
3512            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
3513            _ => {}
3514        }
3515    }
3516
3517    /// Transform property expressions in manual window functions to use qualified variable names.
3518    ///
3519    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
3520    /// so the executor can look up values directly from the row HashMap after the
3521    /// intermediate projection has materialized these properties with qualified names.
3522    ///
3523    /// Transforms ALL window functions (both manual and aggregate).
3524    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
3525    fn transform_window_expr_properties(expr: Expr) -> Expr {
3526        let Expr::FunctionCall {
3527            name,
3528            args,
3529            window_spec: Some(spec),
3530            distinct,
3531        } = expr
3532        else {
3533            return expr;
3534        };
3535
3536        // Transform arguments for ALL window functions
3537        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
3538        let transformed_args = args
3539            .into_iter()
3540            .map(Self::transform_property_to_variable)
3541            .collect();
3542
3543        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
3544        let transformed_partition_by = spec
3545            .partition_by
3546            .into_iter()
3547            .map(Self::transform_property_to_variable)
3548            .collect();
3549
3550        let transformed_order_by = spec
3551            .order_by
3552            .into_iter()
3553            .map(|item| SortItem {
3554                expr: Self::transform_property_to_variable(item.expr),
3555                ascending: item.ascending,
3556            })
3557            .collect();
3558
3559        Expr::FunctionCall {
3560            name,
3561            args: transformed_args,
3562            window_spec: Some(WindowSpec {
3563                partition_by: transformed_partition_by,
3564                order_by: transformed_order_by,
3565            }),
3566            distinct,
3567        }
3568    }
3569
3570    /// Transform a property expression to a variable expression with qualified name.
3571    ///
3572    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
3573    fn transform_property_to_variable(expr: Expr) -> Expr {
3574        let Expr::Property(base, prop) = expr else {
3575            return expr;
3576        };
3577
3578        match *base {
3579            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
3580            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
3581        }
3582    }
3583
3584    /// Transform VALID_AT macro into function call
3585    ///
3586    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
3587    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
3588    fn transform_valid_at_to_function(expr: Expr) -> Expr {
3589        match expr {
3590            Expr::ValidAt {
3591                entity,
3592                timestamp,
3593                start_prop,
3594                end_prop,
3595            } => {
3596                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
3597                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
3598
3599                Expr::FunctionCall {
3600                    name: "uni.temporal.validAt".to_string(),
3601                    args: vec![
3602                        Self::transform_valid_at_to_function(*entity),
3603                        Expr::Literal(CypherLiteral::String(start)),
3604                        Expr::Literal(CypherLiteral::String(end)),
3605                        Self::transform_valid_at_to_function(*timestamp),
3606                    ],
3607                    distinct: false,
3608                    window_spec: None,
3609                }
3610            }
3611            // Recursively transform nested expressions
3612            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
3613                left: Box::new(Self::transform_valid_at_to_function(*left)),
3614                op,
3615                right: Box::new(Self::transform_valid_at_to_function(*right)),
3616            },
3617            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
3618                op,
3619                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
3620            },
3621            Expr::FunctionCall {
3622                name,
3623                args,
3624                distinct,
3625                window_spec,
3626            } => Expr::FunctionCall {
3627                name,
3628                args: args
3629                    .into_iter()
3630                    .map(Self::transform_valid_at_to_function)
3631                    .collect(),
3632                distinct,
3633                window_spec,
3634            },
3635            Expr::Property(base, prop) => {
3636                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
3637            }
3638            Expr::List(items) => Expr::List(
3639                items
3640                    .into_iter()
3641                    .map(Self::transform_valid_at_to_function)
3642                    .collect(),
3643            ),
3644            Expr::In { expr, list } => Expr::In {
3645                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
3646                list: Box::new(Self::transform_valid_at_to_function(*list)),
3647            },
3648            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
3649            Expr::IsNotNull(e) => {
3650                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
3651            }
3652            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
3653            // Other cases: return as-is
3654            other => other,
3655        }
3656    }
3657
3658    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
3659    fn plan_match_clause(
3660        &self,
3661        match_clause: &MatchClause,
3662        plan: LogicalPlan,
3663        vars_in_scope: &mut Vec<VariableInfo>,
3664    ) -> Result<LogicalPlan> {
3665        let mut plan = plan;
3666
3667        if match_clause.pattern.paths.is_empty() {
3668            return Err(anyhow!("Empty pattern"));
3669        }
3670
3671        // Track variables introduced by this OPTIONAL MATCH
3672        let vars_before_pattern = vars_in_scope.len();
3673
3674        for path in &match_clause.pattern.paths {
3675            if let Some(mode) = &path.shortest_path_mode {
3676                plan =
3677                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
3678            } else {
3679                plan = self.plan_path(
3680                    path,
3681                    plan,
3682                    vars_in_scope,
3683                    match_clause.optional,
3684                    vars_before_pattern,
3685                )?;
3686            }
3687        }
3688
3689        // Collect variables introduced by this OPTIONAL MATCH pattern
3690        let optional_vars: HashSet<String> = if match_clause.optional {
3691            vars_in_scope[vars_before_pattern..]
3692                .iter()
3693                .map(|v| v.name.clone())
3694                .collect()
3695        } else {
3696            HashSet::new()
3697        };
3698
3699        // Handle WHERE clause with vector_similarity and predicate pushdown
3700        if let Some(predicate) = &match_clause.where_clause {
3701            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
3702        }
3703
3704        Ok(plan)
3705    }
3706
3707    /// Plan a shortestPath pattern.
3708    fn plan_shortest_path(
3709        &self,
3710        path: &PathPattern,
3711        plan: LogicalPlan,
3712        vars_in_scope: &mut Vec<VariableInfo>,
3713        mode: &ShortestPathMode,
3714        _vars_before_pattern: usize,
3715    ) -> Result<LogicalPlan> {
3716        let mut plan = plan;
3717        let elements = &path.elements;
3718
3719        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
3720        if elements.len() < 3 || elements.len().is_multiple_of(2) {
3721            return Err(anyhow!(
3722                "shortestPath requires at least one relationship: (a)-[*]->(b)"
3723            ));
3724        }
3725
3726        let source_node = match &elements[0] {
3727            PatternElement::Node(n) => n,
3728            _ => return Err(anyhow!("ShortestPath must start with a node")),
3729        };
3730        let rel = match &elements[1] {
3731            PatternElement::Relationship(r) => r,
3732            _ => {
3733                return Err(anyhow!(
3734                    "ShortestPath middle element must be a relationship"
3735                ));
3736            }
3737        };
3738        let target_node = match &elements[2] {
3739            PatternElement::Node(n) => n,
3740            _ => return Err(anyhow!("ShortestPath must end with a node")),
3741        };
3742
3743        let source_var = source_node
3744            .variable
3745            .clone()
3746            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
3747        let target_var = target_node
3748            .variable
3749            .clone()
3750            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
3751        let path_var = path
3752            .variable
3753            .clone()
3754            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
3755
3756        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
3757        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
3758
3759        // Plan source node if not bound
3760        if !source_bound {
3761            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
3762        } else if let Some(prop_filter) =
3763            self.properties_to_expr(&source_var, &source_node.properties)
3764        {
3765            plan = LogicalPlan::Filter {
3766                input: Box::new(plan),
3767                predicate: prop_filter,
3768                optional_variables: HashSet::new(),
3769            };
3770        }
3771
3772        // Plan target node if not bound
3773        let target_label_id = if !target_bound {
3774            // Use first label for target_label_id
3775            let target_label_name = target_node
3776                .labels
3777                .first()
3778                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
3779            let target_label_meta = self
3780                .schema
3781                .get_label_case_insensitive(target_label_name)
3782                .ok_or_else(|| anyhow!("Label {} not found", target_label_name))?;
3783
3784            let target_scan = LogicalPlan::Scan {
3785                label_id: target_label_meta.id,
3786                labels: target_node.labels.clone(),
3787                variable: target_var.clone(),
3788                filter: self.properties_to_expr(&target_var, &target_node.properties),
3789                optional: false,
3790            };
3791
3792            plan = Self::join_with_plan(plan, target_scan);
3793            target_label_meta.id
3794        } else {
3795            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
3796            {
3797                plan = LogicalPlan::Filter {
3798                    input: Box::new(plan),
3799                    predicate: prop_filter,
3800                    optional_variables: HashSet::new(),
3801                };
3802            }
3803            0 // Wildcard for already-bound target
3804        };
3805
3806        // Add ShortestPath operator
3807        let edge_type_ids = if rel.types.is_empty() {
3808            // If no type specified, fetch all edge types (both schema and schemaless)
3809            self.schema.all_edge_type_ids()
3810        } else {
3811            let mut ids = Vec::new();
3812            for type_name in &rel.types {
3813                let edge_meta = self
3814                    .schema
3815                    .edge_types
3816                    .get(type_name)
3817                    .ok_or_else(|| anyhow!("Edge type {} not found", type_name))?;
3818                ids.push(edge_meta.id);
3819            }
3820            ids
3821        };
3822
3823        // Extract hop constraints from relationship pattern
3824        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
3825        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
3826
3827        let sp_plan = match mode {
3828            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
3829                input: Box::new(plan),
3830                edge_type_ids,
3831                direction: rel.direction.clone(),
3832                source_variable: source_var.clone(),
3833                target_variable: target_var.clone(),
3834                target_label_id,
3835                path_variable: path_var.clone(),
3836                min_hops,
3837                max_hops,
3838            },
3839            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
3840                input: Box::new(plan),
3841                edge_type_ids,
3842                direction: rel.direction.clone(),
3843                source_variable: source_var.clone(),
3844                target_variable: target_var.clone(),
3845                target_label_id,
3846                path_variable: path_var.clone(),
3847                min_hops,
3848                max_hops,
3849            },
3850        };
3851
3852        if !source_bound {
3853            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
3854        }
3855        if !target_bound {
3856            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
3857        }
3858        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
3859
3860        Ok(sp_plan)
3861    }
3862    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
3863    ///
3864    /// This is a public entry point for the Locy plan builder to reuse the
3865    /// existing pattern-planning logic for clause bodies.
3866    pub fn plan_pattern(
3867        &self,
3868        pattern: &Pattern,
3869        initial_vars: &[VariableInfo],
3870    ) -> Result<LogicalPlan> {
3871        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
3872        let vars_before_pattern = vars_in_scope.len();
3873        let mut plan = LogicalPlan::Empty;
3874        for path in &pattern.paths {
3875            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
3876        }
3877        Ok(plan)
3878    }
3879
3880    /// Plan a regular MATCH path (not shortestPath).
3881    fn plan_path(
3882        &self,
3883        path: &PathPattern,
3884        plan: LogicalPlan,
3885        vars_in_scope: &mut Vec<VariableInfo>,
3886        optional: bool,
3887        vars_before_pattern: usize,
3888    ) -> Result<LogicalPlan> {
3889        let mut plan = plan;
3890        let elements = &path.elements;
3891        let mut i = 0;
3892
3893        let path_variable = path.variable.clone();
3894
3895        // Check for VariableAlreadyBound: path variable already in scope
3896        if let Some(pv) = &path_variable
3897            && !pv.is_empty()
3898            && is_var_in_scope(vars_in_scope, pv)
3899        {
3900            return Err(anyhow!(
3901                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3902                pv
3903            ));
3904        }
3905
3906        // Check for VariableAlreadyBound: path variable conflicts with element variables
3907        if let Some(pv) = &path_variable
3908            && !pv.is_empty()
3909        {
3910            for element in elements {
3911                match element {
3912                    PatternElement::Node(n) => {
3913                        if let Some(v) = &n.variable
3914                            && v == pv
3915                        {
3916                            return Err(anyhow!(
3917                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3918                                pv
3919                            ));
3920                        }
3921                    }
3922                    PatternElement::Relationship(r) => {
3923                        if let Some(v) = &r.variable
3924                            && v == pv
3925                        {
3926                            return Err(anyhow!(
3927                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
3928                                pv
3929                            ));
3930                        }
3931                    }
3932                    PatternElement::Parenthesized { .. } => {}
3933                }
3934            }
3935        }
3936
3937        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
3938        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
3939        let mut optional_pattern_vars: HashSet<String> = if optional {
3940            let mut vars = HashSet::new();
3941            for element in elements {
3942                match element {
3943                    PatternElement::Node(n) => {
3944                        if let Some(v) = &n.variable
3945                            && !v.is_empty()
3946                            && !is_var_in_scope(vars_in_scope, v)
3947                        {
3948                            vars.insert(v.clone());
3949                        }
3950                    }
3951                    PatternElement::Relationship(r) => {
3952                        if let Some(v) = &r.variable
3953                            && !v.is_empty()
3954                            && !is_var_in_scope(vars_in_scope, v)
3955                        {
3956                            vars.insert(v.clone());
3957                        }
3958                    }
3959                    PatternElement::Parenthesized { pattern, .. } => {
3960                        // Also check nested patterns
3961                        for nested_elem in &pattern.elements {
3962                            match nested_elem {
3963                                PatternElement::Node(n) => {
3964                                    if let Some(v) = &n.variable
3965                                        && !v.is_empty()
3966                                        && !is_var_in_scope(vars_in_scope, v)
3967                                    {
3968                                        vars.insert(v.clone());
3969                                    }
3970                                }
3971                                PatternElement::Relationship(r) => {
3972                                    if let Some(v) = &r.variable
3973                                        && !v.is_empty()
3974                                        && !is_var_in_scope(vars_in_scope, v)
3975                                    {
3976                                        vars.insert(v.clone());
3977                                    }
3978                                }
3979                                _ => {}
3980                            }
3981                        }
3982                    }
3983                }
3984            }
3985            // Include path variable if present
3986            if let Some(pv) = &path_variable
3987                && !pv.is_empty()
3988            {
3989                vars.insert(pv.clone());
3990            }
3991            vars
3992        } else {
3993            HashSet::new()
3994        };
3995
3996        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
3997        // These must participate in Trail mode (relationship uniqueness) enforcement
3998        // across ALL segments in this path, so that VLP segments like [*0..1] don't
3999        // traverse through edges already claimed by a bound relationship [r].
4000        let path_bound_edge_vars: HashSet<String> = {
4001            let mut bound = HashSet::new();
4002            for element in elements {
4003                if let PatternElement::Relationship(rel) = element
4004                    && let Some(ref var_name) = rel.variable
4005                    && !var_name.is_empty()
4006                    && vars_in_scope[..vars_before_pattern]
4007                        .iter()
4008                        .any(|v| v.name == *var_name)
4009                {
4010                    bound.insert(var_name.clone());
4011                }
4012            }
4013            bound
4014        };
4015
4016        // Track if any traverses were added (for zero-length path detection)
4017        let mut had_traverses = false;
4018        // Track the node variable for zero-length path binding
4019        let mut single_node_variable: Option<String> = None;
4020        // Collect node/edge variables for BindPath (fixed-length path binding)
4021        let mut path_node_vars: Vec<String> = Vec::new();
4022        let mut path_edge_vars: Vec<String> = Vec::new();
4023        // Track the last processed outer node variable for QPP source binding.
4024        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
4025        let mut last_outer_node_var: Option<String> = None;
4026
4027        // Multi-hop path variables are now supported - path is accumulated across hops
4028        while i < elements.len() {
4029            let element = &elements[i];
4030            match element {
4031                PatternElement::Node(n) => {
4032                    let mut variable = n.variable.clone().unwrap_or_default();
4033                    if variable.is_empty() {
4034                        variable = self.next_anon_var();
4035                    }
4036                    // Track first node variable for zero-length path
4037                    if single_node_variable.is_none() {
4038                        single_node_variable = Some(variable.clone());
4039                    }
4040                    let is_bound =
4041                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4042                    if optional && !is_bound {
4043                        optional_pattern_vars.insert(variable.clone());
4044                    }
4045
4046                    if is_bound {
4047                        // Check for type conflict - can't use an Edge/Path as a Node
4048                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4049                            && !info.var_type.is_compatible_with(VariableType::Node)
4050                        {
4051                            return Err(anyhow!(
4052                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4053                                variable,
4054                                info.var_type
4055                            ));
4056                        }
4057                        if let Some(node_filter) =
4058                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4059                        {
4060                            plan = LogicalPlan::Filter {
4061                                input: Box::new(plan),
4062                                predicate: node_filter,
4063                                optional_variables: HashSet::new(),
4064                            };
4065                        }
4066                    } else {
4067                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4068                        if !variable.is_empty() {
4069                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4070                        }
4071                    }
4072
4073                    // Track source node for BindPath
4074                    if path_variable.is_some() && path_node_vars.is_empty() {
4075                        path_node_vars.push(variable.clone());
4076                    }
4077
4078                    // Look ahead for relationships
4079                    let mut current_source_var = variable;
4080                    last_outer_node_var = Some(current_source_var.clone());
4081                    i += 1;
4082                    while i < elements.len() {
4083                        if let PatternElement::Relationship(r) = &elements[i] {
4084                            if i + 1 < elements.len() {
4085                                let target_node_part = &elements[i + 1];
4086                                if let PatternElement::Node(n_target) = target_node_part {
4087                                    // For VLP traversals, pass path_variable through
4088                                    // For fixed-length, we use BindPath instead
4089                                    let is_vlp = r.range.is_some();
4090                                    let traverse_path_var =
4091                                        if is_vlp { path_variable.clone() } else { None };
4092
4093                                    // If we're about to start a VLP segment and there are
4094                                    // collected fixed-hop path vars, create an intermediate
4095                                    // BindPath for the fixed prefix first. The VLP will then
4096                                    // extend this existing path.
4097                                    if is_vlp
4098                                        && let Some(pv) = path_variable.as_ref()
4099                                        && !path_node_vars.is_empty()
4100                                    {
4101                                        plan = LogicalPlan::BindPath {
4102                                            input: Box::new(plan),
4103                                            node_variables: std::mem::take(&mut path_node_vars),
4104                                            edge_variables: std::mem::take(&mut path_edge_vars),
4105                                            path_variable: pv.clone(),
4106                                        };
4107                                        if !is_var_in_scope(vars_in_scope, pv) {
4108                                            add_var_to_scope(
4109                                                vars_in_scope,
4110                                                pv,
4111                                                VariableType::Path,
4112                                            )?;
4113                                        }
4114                                    }
4115
4116                                    // Plan the traverse from the current source node
4117                                    let target_was_bound =
4118                                        n_target.variable.as_ref().is_some_and(|v| {
4119                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4120                                        });
4121                                    let (new_plan, target_var, effective_target) = self
4122                                        .plan_traverse_with_source(
4123                                            plan,
4124                                            vars_in_scope,
4125                                            TraverseParams {
4126                                                rel: r,
4127                                                target_node: n_target,
4128                                                optional,
4129                                                path_variable: traverse_path_var,
4130                                                optional_pattern_vars: optional_pattern_vars
4131                                                    .clone(),
4132                                            },
4133                                            &current_source_var,
4134                                            vars_before_pattern,
4135                                            &path_bound_edge_vars,
4136                                        )?;
4137                                    plan = new_plan;
4138                                    if optional && !target_was_bound {
4139                                        optional_pattern_vars.insert(target_var.clone());
4140                                    }
4141
4142                                    // Track edge/target node for BindPath
4143                                    if path_variable.is_some() && !is_vlp {
4144                                        // Use the edge variable if given, otherwise use
4145                                        // the internal tracking column pattern.
4146                                        // Use effective_target (which may be __rebound_x
4147                                        // for bound-target traversals) to match the actual
4148                                        // column name produced by GraphTraverseExec.
4149                                        if let Some(ev) = &r.variable {
4150                                            path_edge_vars.push(ev.clone());
4151                                        } else {
4152                                            path_edge_vars
4153                                                .push(format!("__eid_to_{}", effective_target));
4154                                        }
4155                                        path_node_vars.push(target_var.clone());
4156                                    }
4157
4158                                    current_source_var = target_var;
4159                                    last_outer_node_var = Some(current_source_var.clone());
4160                                    had_traverses = true;
4161                                    i += 2;
4162                                } else {
4163                                    return Err(anyhow!("Relationship must be followed by a node"));
4164                                }
4165                            } else {
4166                                return Err(anyhow!("Relationship cannot be the last element"));
4167                            }
4168                        } else {
4169                            break;
4170                        }
4171                    }
4172                }
4173                PatternElement::Relationship(_) => {
4174                    return Err(anyhow!("Pattern must start with a node"));
4175                }
4176                PatternElement::Parenthesized { pattern, range } => {
4177                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4178                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4179                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4180                        return Err(anyhow!(
4181                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4182                        ));
4183                    }
4184
4185                    let source_node = match &pattern.elements[0] {
4186                        PatternElement::Node(n) => n,
4187                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4188                    };
4189
4190                    // Extract all relationship-node pairs (QPP steps)
4191                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4192                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4193                        let rel = match &pattern.elements[pair_idx] {
4194                            PatternElement::Relationship(r) => r,
4195                            _ => {
4196                                return Err(anyhow!(
4197                                    "Quantified pattern element at position {} must be a relationship",
4198                                    pair_idx
4199                                ));
4200                            }
4201                        };
4202                        let node = match &pattern.elements[pair_idx + 1] {
4203                            PatternElement::Node(n) => n,
4204                            _ => {
4205                                return Err(anyhow!(
4206                                    "Quantified pattern element at position {} must be a node",
4207                                    pair_idx + 1
4208                                ));
4209                            }
4210                        };
4211                        // Reject nested quantifiers
4212                        if rel.range.is_some() {
4213                            return Err(anyhow!(
4214                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4215                            ));
4216                        }
4217                        qpp_rels.push((rel, node));
4218                    }
4219
4220                    // Check if there's an outer target node after the Parenthesized element.
4221                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4222                    // target that should receive the traversal result.
4223                    let inner_target_node = qpp_rels.last().unwrap().1;
4224                    let outer_target_node = if i + 1 < elements.len() {
4225                        match &elements[i + 1] {
4226                            PatternElement::Node(n) => Some(n),
4227                            _ => None,
4228                        }
4229                    } else {
4230                        None
4231                    };
4232                    // Use the outer target for variable binding and filters; inner target
4233                    // labels are used for state constraints within the NFA.
4234                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4235
4236                    // For simple 3-element single-hop QPP without intermediate label constraints,
4237                    // fall back to existing VLP behavior (copy range to relationship).
4238                    let use_simple_vlp = qpp_rels.len() == 1
4239                        && inner_target_node
4240                            .labels
4241                            .first()
4242                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4243                            .is_none();
4244
4245                    // Plan source node.
4246                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4247                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4248                    // node variable, use it; otherwise fall back to the inner source.
4249                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4250                        // The preceding outer node is already bound and in scope
4251                        // Apply any property filters from the inner source node
4252                        if let Some(prop_filter) =
4253                            self.properties_to_expr(outer_src, &source_node.properties)
4254                        {
4255                            plan = LogicalPlan::Filter {
4256                                input: Box::new(plan),
4257                                predicate: prop_filter,
4258                                optional_variables: HashSet::new(),
4259                            };
4260                        }
4261                        outer_src.clone()
4262                    } else {
4263                        let sv = source_node
4264                            .variable
4265                            .clone()
4266                            .filter(|v| !v.is_empty())
4267                            .unwrap_or_else(|| self.next_anon_var());
4268
4269                        if is_var_in_scope(vars_in_scope, &sv) {
4270                            // Source is already bound, apply property filter if needed
4271                            if let Some(prop_filter) =
4272                                self.properties_to_expr(&sv, &source_node.properties)
4273                            {
4274                                plan = LogicalPlan::Filter {
4275                                    input: Box::new(plan),
4276                                    predicate: prop_filter,
4277                                    optional_variables: HashSet::new(),
4278                                };
4279                            }
4280                        } else {
4281                            // Source is unbound, scan it
4282                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4283                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4284                            if optional {
4285                                optional_pattern_vars.insert(sv.clone());
4286                            }
4287                        }
4288                        sv
4289                    };
4290
4291                    if use_simple_vlp {
4292                        // Simple single-hop QPP: apply range to relationship and use VLP path
4293                        let mut relationship = qpp_rels[0].0.clone();
4294                        relationship.range = range.clone();
4295
4296                        let target_was_bound = target_node
4297                            .variable
4298                            .as_ref()
4299                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4300                        let (new_plan, target_var, _effective_target) = self
4301                            .plan_traverse_with_source(
4302                                plan,
4303                                vars_in_scope,
4304                                TraverseParams {
4305                                    rel: &relationship,
4306                                    target_node,
4307                                    optional,
4308                                    path_variable: path_variable.clone(),
4309                                    optional_pattern_vars: optional_pattern_vars.clone(),
4310                                },
4311                                &source_variable,
4312                                vars_before_pattern,
4313                                &path_bound_edge_vars,
4314                            )?;
4315                        plan = new_plan;
4316                        if optional && !target_was_bound {
4317                            optional_pattern_vars.insert(target_var);
4318                        }
4319                    } else {
4320                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4321                        let mut qpp_step_infos = Vec::new();
4322                        let mut all_edge_type_ids = Vec::new();
4323
4324                        for (rel, node) in &qpp_rels {
4325                            let mut step_edge_type_ids = Vec::new();
4326                            if rel.types.is_empty() {
4327                                step_edge_type_ids = self.schema.all_edge_type_ids();
4328                            } else {
4329                                for type_name in &rel.types {
4330                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4331                                        step_edge_type_ids.push(edge_meta.id);
4332                                    }
4333                                }
4334                            }
4335                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4336
4337                            let target_label = node.labels.first().and_then(|l| {
4338                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4339                            });
4340
4341                            qpp_step_infos.push(QppStepInfo {
4342                                edge_type_ids: step_edge_type_ids,
4343                                direction: rel.direction.clone(),
4344                                target_label,
4345                            });
4346                        }
4347
4348                        // Deduplicate edge type IDs for adjacency warming
4349                        all_edge_type_ids.sort_unstable();
4350                        all_edge_type_ids.dedup();
4351
4352                        // Compute iteration bounds from range
4353                        let hops_per_iter = qpp_step_infos.len();
4354                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4355                        let (min_iter, max_iter) = if let Some(range) = range {
4356                            let min = range.min.unwrap_or(1) as usize;
4357                            let max = range
4358                                .max
4359                                .map(|m| m as usize)
4360                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4361                            (min, max)
4362                        } else {
4363                            (1, 1)
4364                        };
4365                        let min_hops = min_iter * hops_per_iter;
4366                        let max_hops = max_iter * hops_per_iter;
4367
4368                        // Target variable from the last node in the QPP sub-pattern
4369                        let target_variable = target_node
4370                            .variable
4371                            .clone()
4372                            .filter(|v| !v.is_empty())
4373                            .unwrap_or_else(|| self.next_anon_var());
4374
4375                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
4376
4377                        // Determine target label for the final node
4378                        let target_label_meta = target_node
4379                            .labels
4380                            .first()
4381                            .and_then(|l| self.schema.get_label_case_insensitive(l));
4382
4383                        // Collect scope match variables
4384                        let mut scope_match_variables: HashSet<String> = vars_in_scope
4385                            [vars_before_pattern..]
4386                            .iter()
4387                            .map(|v| v.name.clone())
4388                            .collect();
4389                        scope_match_variables.insert(target_variable.clone());
4390
4391                        // Handle bound target: use rebound variable for traverse
4392                        let rebound_target_var = if target_is_bound {
4393                            Some(target_variable.clone())
4394                        } else {
4395                            None
4396                        };
4397                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4398                            format!("__rebound_{}", bv)
4399                        } else {
4400                            target_variable.clone()
4401                        };
4402
4403                        plan = LogicalPlan::Traverse {
4404                            input: Box::new(plan),
4405                            edge_type_ids: all_edge_type_ids,
4406                            direction: qpp_rels[0].0.direction.clone(),
4407                            source_variable: source_variable.to_string(),
4408                            target_variable: effective_target_var.clone(),
4409                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4410                            step_variable: None, // QPP doesn't expose intermediate edges
4411                            min_hops,
4412                            max_hops,
4413                            optional,
4414                            target_filter: self.node_filter_expr(
4415                                &target_variable,
4416                                &target_node.labels,
4417                                &target_node.properties,
4418                            ),
4419                            path_variable: path_variable.clone(),
4420                            edge_properties: HashSet::new(),
4421                            is_variable_length: true,
4422                            optional_pattern_vars: optional_pattern_vars.clone(),
4423                            scope_match_variables,
4424                            edge_filter_expr: None,
4425                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4426                            qpp_steps: Some(qpp_step_infos),
4427                        };
4428
4429                        // Handle bound target: filter rebound results against original variable
4430                        if let Some(ref btv) = rebound_target_var {
4431                            // Filter: __rebound_x._vid = x._vid
4432                            let filter_pred = Expr::BinaryOp {
4433                                left: Box::new(Expr::Property(
4434                                    Box::new(Expr::Variable(effective_target_var.clone())),
4435                                    "_vid".to_string(),
4436                                )),
4437                                op: BinaryOp::Eq,
4438                                right: Box::new(Expr::Property(
4439                                    Box::new(Expr::Variable(btv.clone())),
4440                                    "_vid".to_string(),
4441                                )),
4442                            };
4443                            plan = LogicalPlan::Filter {
4444                                input: Box::new(plan),
4445                                predicate: filter_pred,
4446                                optional_variables: if optional {
4447                                    optional_pattern_vars.clone()
4448                                } else {
4449                                    HashSet::new()
4450                                },
4451                            };
4452                        }
4453
4454                        // Add target variable to scope
4455                        if !target_is_bound {
4456                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4457                        }
4458
4459                        // Add path variable to scope
4460                        if let Some(ref pv) = path_variable
4461                            && !pv.is_empty()
4462                            && !is_var_in_scope(vars_in_scope, pv)
4463                        {
4464                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4465                        }
4466                    }
4467                    had_traverses = true;
4468
4469                    // Skip the outer target node if we consumed it
4470                    if outer_target_node.is_some() {
4471                        i += 2; // skip both Parenthesized and the following Node
4472                    } else {
4473                        i += 1;
4474                    }
4475                }
4476            }
4477        }
4478
4479        // If this is a single-node pattern with a path variable, bind the zero-length path
4480        // E.g., `p = (a)` should create a Path with one node and zero edges
4481        if let Some(ref path_var) = path_variable
4482            && !path_var.is_empty()
4483            && !had_traverses
4484            && let Some(node_var) = single_node_variable
4485        {
4486            plan = LogicalPlan::BindZeroLengthPath {
4487                input: Box::new(plan),
4488                node_variable: node_var,
4489                path_variable: path_var.clone(),
4490            };
4491            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
4492        }
4493
4494        // Bind fixed-length path from collected node/edge variables
4495        if let Some(ref path_var) = path_variable
4496            && !path_var.is_empty()
4497            && had_traverses
4498            && !path_node_vars.is_empty()
4499            && !is_var_in_scope(vars_in_scope, path_var)
4500        {
4501            plan = LogicalPlan::BindPath {
4502                input: Box::new(plan),
4503                node_variables: path_node_vars,
4504                edge_variables: path_edge_vars,
4505                path_variable: path_var.clone(),
4506            };
4507            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
4508        }
4509
4510        Ok(plan)
4511    }
4512
4513    /// Plan a traverse with an explicit source variable name.
4514    ///
4515    /// Returns `(plan, target_variable, effective_target_variable)` where:
4516    /// - `target_variable` is the semantic variable name for downstream scope
4517    /// - `effective_target_variable` is the actual column-name prefix used by
4518    ///   the traverse (may be `__rebound_x` for bound-target patterns)
4519    fn plan_traverse_with_source(
4520        &self,
4521        plan: LogicalPlan,
4522        vars_in_scope: &mut Vec<VariableInfo>,
4523        params: TraverseParams<'_>,
4524        source_variable: &str,
4525        vars_before_pattern: usize,
4526        path_bound_edge_vars: &HashSet<String>,
4527    ) -> Result<(LogicalPlan, String, String)> {
4528        // Check for parameter used as relationship predicate
4529        if let Some(Expr::Parameter(_)) = &params.rel.properties {
4530            return Err(anyhow!(
4531                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
4532            ));
4533        }
4534
4535        let mut edge_type_ids = Vec::new();
4536        let mut dst_labels = Vec::new();
4537        let mut unknown_types = Vec::new();
4538
4539        if params.rel.types.is_empty() {
4540            // All types - include both schema and schemaless edge types
4541            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
4542            edge_type_ids = self.schema.all_edge_type_ids();
4543            for meta in self.schema.edge_types.values() {
4544                dst_labels.extend(meta.dst_labels.iter().cloned());
4545            }
4546        } else {
4547            for type_name in &params.rel.types {
4548                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4549                    // Known type - use standard Traverse with type_id
4550                    edge_type_ids.push(edge_meta.id);
4551                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
4552                } else {
4553                    // Unknown type - will use TraverseMainByType
4554                    unknown_types.push(type_name.clone());
4555                }
4556            }
4557        }
4558
4559        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
4560        edge_type_ids.sort_unstable();
4561        edge_type_ids.dedup();
4562        unknown_types.sort_unstable();
4563        unknown_types.dedup();
4564
4565        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
4566        if target_variable.is_empty() {
4567            target_variable = self.next_anon_var();
4568        }
4569        let target_is_bound =
4570            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
4571
4572        // Check for VariableTypeConflict: relationship variable used as node
4573        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
4574        if let Some(rel_var) = &params.rel.variable
4575            && !rel_var.is_empty()
4576            && rel_var == &target_variable
4577        {
4578            return Err(anyhow!(
4579                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
4580                rel_var
4581            ));
4582        }
4583
4584        // Check for VariableTypeConflict/RelationshipUniquenessViolation
4585        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
4586        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
4587        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
4588        let mut bound_edge_var: Option<String> = None;
4589        let mut bound_edge_list_var: Option<String> = None;
4590        if let Some(rel_var) = &params.rel.variable
4591            && !rel_var.is_empty()
4592            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
4593        {
4594            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
4595                .iter()
4596                .any(|v| v.name == *rel_var);
4597
4598            if info.var_type == VariableType::Edge {
4599                // Check if this edge variable comes from a previous clause (before this MATCH)
4600                if is_from_previous_clause {
4601                    // Edge variable bound from previous clause - this is allowed
4602                    // We'll filter the traversal to match this specific edge
4603                    bound_edge_var = Some(rel_var.clone());
4604                } else {
4605                    // Same relationship variable used twice in the same MATCH clause
4606                    return Err(anyhow!(
4607                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
4608                        rel_var
4609                    ));
4610                }
4611            } else if params.rel.range.is_some()
4612                && is_from_previous_clause
4613                && matches!(
4614                    info.var_type,
4615                    VariableType::Scalar | VariableType::ScalarLiteral
4616                )
4617            {
4618                // Allow VLP rebound against a previously bound relationship list
4619                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
4620                bound_edge_list_var = Some(rel_var.clone());
4621            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
4622                return Err(anyhow!(
4623                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
4624                    rel_var,
4625                    info.var_type
4626                ));
4627            }
4628        }
4629
4630        // Check for VariableTypeConflict: target node variable already bound as non-Node
4631        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
4632        if target_is_bound
4633            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4634            && !info.var_type.is_compatible_with(VariableType::Node)
4635        {
4636            return Err(anyhow!(
4637                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4638                target_variable,
4639                info.var_type
4640            ));
4641        }
4642
4643        // If all requested types are unknown (schemaless), use TraverseMainByType
4644        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
4645        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
4646        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
4647            // All types are unknown - use schemaless traversal
4648
4649            let is_variable_length = params.rel.range.is_some();
4650
4651            const DEFAULT_MAX_HOPS: usize = 100;
4652            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
4653                let min = range.min.unwrap_or(1) as usize;
4654                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
4655                (min, max)
4656            } else {
4657                (1, 1)
4658            };
4659
4660            // For both single-hop and variable-length paths:
4661            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
4662            //   Single-hop: step_var holds a single edge object
4663            //   VLP: step_var holds a list of edge objects
4664            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
4665            let step_var = params.rel.variable.clone();
4666            let path_var = params.path_variable.clone();
4667
4668            // Compute scope_match_variables for relationship uniqueness scoping.
4669            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
4670                .iter()
4671                .map(|v| v.name.clone())
4672                .collect();
4673            if let Some(ref sv) = step_var {
4674                // Only add the step variable to scope if it's NOT rebound from a previous clause.
4675                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
4676                // filtering because the second MATCH intentionally reuses the same edge.
4677                if bound_edge_var.is_none() {
4678                    scope_match_variables.insert(sv.clone());
4679                }
4680            }
4681            scope_match_variables.insert(target_variable.clone());
4682            // Include bound edge variables from this path for cross-segment Trail mode
4683            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
4684            // edges already claimed by a bound relationship [r] in the same path.
4685            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
4686            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
4687            scope_match_variables.extend(
4688                path_bound_edge_vars
4689                    .iter()
4690                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
4691                    .cloned(),
4692            );
4693
4694            let mut plan = LogicalPlan::TraverseMainByType {
4695                type_names: unknown_types,
4696                input: Box::new(plan),
4697                direction: params.rel.direction.clone(),
4698                source_variable: source_variable.to_string(),
4699                target_variable: target_variable.clone(),
4700                step_variable: step_var.clone(),
4701                min_hops,
4702                max_hops,
4703                optional: params.optional,
4704                target_filter: self.node_filter_expr(
4705                    &target_variable,
4706                    &params.target_node.labels,
4707                    &params.target_node.properties,
4708                ),
4709                path_variable: path_var.clone(),
4710                is_variable_length,
4711                optional_pattern_vars: params.optional_pattern_vars.clone(),
4712                scope_match_variables,
4713                edge_filter_expr: if is_variable_length {
4714                    let filter_var = step_var
4715                        .clone()
4716                        .unwrap_or_else(|| "__anon_edge".to_string());
4717                    self.properties_to_expr(&filter_var, &params.rel.properties)
4718                } else {
4719                    None
4720                },
4721                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4722            };
4723
4724            // Only apply bound target filter for Imported variables (from outer scope/subquery).
4725            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
4726            // uses Parameter which requires the value to be in params (subquery context).
4727            if target_is_bound
4728                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4729                && info.var_type == VariableType::Imported
4730            {
4731                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
4732            }
4733
4734            // Apply relationship property predicates for fixed-length schemaless
4735            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
4736            // For VLP, predicates are stored inline in edge_filter_expr (above).
4737            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
4738            if !is_variable_length
4739                && let Some(edge_var_name) = step_var.as_ref()
4740                && let Some(edge_prop_filter) =
4741                    self.properties_to_expr(edge_var_name, &params.rel.properties)
4742            {
4743                let filter_optional_vars = if params.optional {
4744                    params.optional_pattern_vars.clone()
4745                } else {
4746                    HashSet::new()
4747                };
4748                plan = LogicalPlan::Filter {
4749                    input: Box::new(plan),
4750                    predicate: edge_prop_filter,
4751                    optional_variables: filter_optional_vars,
4752                };
4753            }
4754
4755            // Add the bound variables to scope
4756            if let Some(sv) = &step_var {
4757                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
4758                if is_variable_length
4759                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
4760                {
4761                    info.is_vlp = true;
4762                }
4763            }
4764            if let Some(pv) = &path_var
4765                && !is_var_in_scope(vars_in_scope, pv)
4766            {
4767                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
4768            }
4769            if !is_var_in_scope(vars_in_scope, &target_variable) {
4770                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
4771            }
4772
4773            return Ok((plan, target_variable.clone(), target_variable));
4774        }
4775
4776        // If we have a mix of known and unknown types, error for now
4777        // (could be extended to Union of Traverse + TraverseMainByType)
4778        if !unknown_types.is_empty() {
4779            return Err(anyhow!(
4780                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
4781                unknown_types
4782            ));
4783        }
4784
4785        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
4786            // Use first label for target_label_id
4787            // For schemaless support, allow unknown target labels
4788            self.schema.get_label_case_insensitive(label_name)
4789        } else if !target_is_bound {
4790            // Infer from edge type(s)
4791            let unique_dsts: Vec<_> = dst_labels
4792                .into_iter()
4793                .collect::<HashSet<_>>()
4794                .into_iter()
4795                .collect();
4796            if unique_dsts.len() == 1 {
4797                let label_name = &unique_dsts[0];
4798                self.schema.get_label_case_insensitive(label_name)
4799            } else {
4800                // Multiple or no destination labels inferred - allow any target
4801                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
4802                // where the edge type can connect to multiple labels
4803                None
4804            }
4805        } else {
4806            None
4807        };
4808
4809        // Check if this is a variable-length pattern (has range specifier like *1..3)
4810        let is_variable_length = params.rel.range.is_some();
4811
4812        // For VLP patterns, default min to 1 and max to a reasonable limit.
4813        // For single-hop patterns (no range), both are 1.
4814        const DEFAULT_MAX_HOPS: usize = 100;
4815        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
4816            let min = range.min.unwrap_or(1) as usize;
4817            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
4818            (min, max)
4819        } else {
4820            (1, 1)
4821        };
4822
4823        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
4824        //   Single-hop: step_var holds a single edge object
4825        //   VLP: step_var holds a list of edge objects
4826        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
4827        let step_var = params.rel.variable.clone();
4828        let path_var = params.path_variable.clone();
4829
4830        // If we have a bound edge variable from a previous clause, use a temp variable
4831        // for the Traverse step, then filter to match the bound edge
4832        let rebound_var = bound_edge_var
4833            .as_ref()
4834            .or(bound_edge_list_var.as_ref())
4835            .cloned();
4836        let effective_step_var = if let Some(ref bv) = rebound_var {
4837            Some(format!("__rebound_{}", bv))
4838        } else {
4839            step_var.clone()
4840        };
4841
4842        // If we have a bound target variable from a previous clause (e.g. WITH),
4843        // use a temp variable for the Traverse step, then filter to match the bound
4844        // target — mirroring the bound edge pattern above.
4845        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
4846            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
4847                .map(|info| info.var_type == VariableType::Imported)
4848                .unwrap_or(false);
4849            if !is_imported {
4850                Some(target_variable.clone())
4851            } else {
4852                None
4853            }
4854        } else {
4855            None
4856        };
4857
4858        let effective_target_var = if let Some(ref bv) = rebound_target_var {
4859            format!("__rebound_{}", bv)
4860        } else {
4861            target_variable.clone()
4862        };
4863
4864        // Collect all variables (node + edge) from the current MATCH clause scope
4865        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
4866        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
4867        // if their associated variable is in this set. This prevents relationship
4868        // uniqueness from being enforced across disconnected MATCH clauses.
4869        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
4870            .iter()
4871            .map(|v| v.name.clone())
4872            .collect();
4873        // Include the current traverse's edge variable (not yet added to vars_in_scope)
4874        if let Some(ref sv) = effective_step_var {
4875            scope_match_variables.insert(sv.clone());
4876        }
4877        // Include the target variable (not yet added to vars_in_scope)
4878        scope_match_variables.insert(effective_target_var.clone());
4879        // Include bound edge variables from this path for cross-segment Trail mode
4880        // enforcement (same as the schemaless path above).
4881        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
4882
4883        let mut plan = LogicalPlan::Traverse {
4884            input: Box::new(plan),
4885            edge_type_ids,
4886            direction: params.rel.direction.clone(),
4887            source_variable: source_variable.to_string(),
4888            target_variable: effective_target_var.clone(),
4889            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
4890            step_variable: effective_step_var.clone(),
4891            min_hops,
4892            max_hops,
4893            optional: params.optional,
4894            target_filter: self.node_filter_expr(
4895                &target_variable,
4896                &params.target_node.labels,
4897                &params.target_node.properties,
4898            ),
4899            path_variable: path_var.clone(),
4900            edge_properties: HashSet::new(),
4901            is_variable_length,
4902            optional_pattern_vars: params.optional_pattern_vars.clone(),
4903            scope_match_variables,
4904            edge_filter_expr: if is_variable_length {
4905                // Use the step variable name, or a fallback for anonymous edges.
4906                // The variable name is used by properties_to_expr to build
4907                // `var.prop = value` expressions. For BFS property checking,
4908                // only the property name and value matter (the variable name
4909                // is stripped during extraction).
4910                let filter_var = effective_step_var
4911                    .clone()
4912                    .unwrap_or_else(|| "__anon_edge".to_string());
4913                self.properties_to_expr(&filter_var, &params.rel.properties)
4914            } else {
4915                None
4916            },
4917            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
4918            qpp_steps: None,
4919        };
4920
4921        // Pre-compute optional variables set for filter nodes in this traverse.
4922        // Used by relationship property filters and bound-edge filters below.
4923        let filter_optional_vars = if params.optional {
4924            params.optional_pattern_vars.clone()
4925        } else {
4926            HashSet::new()
4927        };
4928
4929        // Apply relationship property predicates (e.g. [r {k: v}]).
4930        // For VLP, predicates are stored inline in edge_filter_expr (above).
4931        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
4932        if !is_variable_length
4933            && let Some(edge_var_name) = effective_step_var.as_ref()
4934            && let Some(edge_prop_filter) =
4935                self.properties_to_expr(edge_var_name, &params.rel.properties)
4936        {
4937            plan = LogicalPlan::Filter {
4938                input: Box::new(plan),
4939                predicate: edge_prop_filter,
4940                optional_variables: filter_optional_vars.clone(),
4941            };
4942        }
4943
4944        // Only apply bound target filter for Imported variables (from outer scope/subquery).
4945        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
4946        // uses Parameter which requires the value to be in params (subquery context).
4947        if target_is_bound
4948            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
4949            && info.var_type == VariableType::Imported
4950        {
4951            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
4952        }
4953
4954        // If we have a bound edge variable, add a filter to match it
4955        if let Some(ref bv) = bound_edge_var {
4956            let temp_var = format!("__rebound_{}", bv);
4957            let bound_check = Expr::BinaryOp {
4958                left: Box::new(Expr::Property(
4959                    Box::new(Expr::Variable(temp_var)),
4960                    "_eid".to_string(),
4961                )),
4962                op: BinaryOp::Eq,
4963                right: Box::new(Expr::Property(
4964                    Box::new(Expr::Variable(bv.clone())),
4965                    "_eid".to_string(),
4966                )),
4967            };
4968            plan = LogicalPlan::Filter {
4969                input: Box::new(plan),
4970                predicate: bound_check,
4971                optional_variables: filter_optional_vars.clone(),
4972            };
4973        }
4974
4975        // If we have a bound relationship list variable for a VLP pattern,
4976        // add a filter to match the traversed relationship list exactly.
4977        if let Some(ref bv) = bound_edge_list_var {
4978            let temp_var = format!("__rebound_{}", bv);
4979            let temp_eids = Expr::ListComprehension {
4980                variable: "__rebound_edge".to_string(),
4981                list: Box::new(Expr::Variable(temp_var)),
4982                where_clause: None,
4983                map_expr: Box::new(Expr::FunctionCall {
4984                    name: "toInteger".to_string(),
4985                    args: vec![Expr::Property(
4986                        Box::new(Expr::Variable("__rebound_edge".to_string())),
4987                        "_eid".to_string(),
4988                    )],
4989                    distinct: false,
4990                    window_spec: None,
4991                }),
4992            };
4993            let bound_eids = Expr::ListComprehension {
4994                variable: "__bound_edge".to_string(),
4995                list: Box::new(Expr::Variable(bv.clone())),
4996                where_clause: None,
4997                map_expr: Box::new(Expr::FunctionCall {
4998                    name: "toInteger".to_string(),
4999                    args: vec![Expr::Property(
5000                        Box::new(Expr::Variable("__bound_edge".to_string())),
5001                        "_eid".to_string(),
5002                    )],
5003                    distinct: false,
5004                    window_spec: None,
5005                }),
5006            };
5007            let bound_list_check = Expr::BinaryOp {
5008                left: Box::new(temp_eids),
5009                op: BinaryOp::Eq,
5010                right: Box::new(bound_eids),
5011            };
5012            plan = LogicalPlan::Filter {
5013                input: Box::new(plan),
5014                predicate: bound_list_check,
5015                optional_variables: filter_optional_vars.clone(),
5016            };
5017        }
5018
5019        // If we have a bound target variable (non-imported), add a filter to constrain
5020        // the traversal output to match the previously bound target node.
5021        if let Some(ref bv) = rebound_target_var {
5022            let temp_var = format!("__rebound_{}", bv);
5023            let bound_check = Expr::BinaryOp {
5024                left: Box::new(Expr::Property(
5025                    Box::new(Expr::Variable(temp_var.clone())),
5026                    "_vid".to_string(),
5027                )),
5028                op: BinaryOp::Eq,
5029                right: Box::new(Expr::Property(
5030                    Box::new(Expr::Variable(bv.clone())),
5031                    "_vid".to_string(),
5032                )),
5033            };
5034            // For OPTIONAL MATCH, include the rebound variable in optional_variables
5035            // so that OptionalFilterExec excludes it from the grouping key and
5036            // properly nullifies it in recovery rows when all matches are filtered out.
5037            // Without this, each traverse result creates its own group (keyed by
5038            // __rebound_c._vid), and null-row recovery emits a spurious null row
5039            // for every non-matching target instead of one per source group.
5040            let mut rebound_filter_vars = filter_optional_vars;
5041            if params.optional {
5042                rebound_filter_vars.insert(temp_var);
5043            }
5044            plan = LogicalPlan::Filter {
5045                input: Box::new(plan),
5046                predicate: bound_check,
5047                optional_variables: rebound_filter_vars,
5048            };
5049        }
5050
5051        // Add the bound variables to scope
5052        // Skip adding the edge variable if it's already bound from a previous clause
5053        if let Some(sv) = &step_var
5054            && bound_edge_var.is_none()
5055            && bound_edge_list_var.is_none()
5056        {
5057            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5058            if is_variable_length
5059                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5060            {
5061                info.is_vlp = true;
5062            }
5063        }
5064        if let Some(pv) = &path_var
5065            && !is_var_in_scope(vars_in_scope, pv)
5066        {
5067            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5068        }
5069        if !is_var_in_scope(vars_in_scope, &target_variable) {
5070            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5071        }
5072
5073        Ok((plan, target_variable, effective_target_var))
5074    }
5075
5076    /// Combine a new scan plan with an existing plan.
5077    ///
5078    /// If the existing plan is `Empty`, returns the new plan directly.
5079    /// Otherwise, wraps them in a `CrossJoin`.
5080    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5081        if matches!(existing, LogicalPlan::Empty) {
5082            new
5083        } else {
5084            LogicalPlan::CrossJoin {
5085                left: Box::new(existing),
5086                right: Box::new(new),
5087            }
5088        }
5089    }
5090
5091    /// Split node map predicates into scan-pushable and residual filters.
5092    ///
5093    /// A predicate is scan-pushable when its value expression references only
5094    /// the node variable itself (or no variables). Predicates referencing other
5095    /// in-scope variables (correlated predicates) are returned as residual so
5096    /// they can be applied after joining with the existing plan.
5097    fn split_node_property_filters_for_scan(
5098        &self,
5099        variable: &str,
5100        properties: &Option<Expr>,
5101    ) -> (Option<Expr>, Option<Expr>) {
5102        let entries = match properties {
5103            Some(Expr::Map(entries)) => entries,
5104            _ => return (None, None),
5105        };
5106
5107        if entries.is_empty() {
5108            return (None, None);
5109        }
5110
5111        let mut pushdown_entries = Vec::new();
5112        let mut residual_entries = Vec::new();
5113
5114        for (prop, val_expr) in entries {
5115            let vars = collect_expr_variables(val_expr);
5116            if vars.iter().all(|v| v == variable) {
5117                pushdown_entries.push((prop.clone(), val_expr.clone()));
5118            } else {
5119                residual_entries.push((prop.clone(), val_expr.clone()));
5120            }
5121        }
5122
5123        let pushdown_map = if pushdown_entries.is_empty() {
5124            None
5125        } else {
5126            Some(Expr::Map(pushdown_entries))
5127        };
5128        let residual_map = if residual_entries.is_empty() {
5129            None
5130        } else {
5131            Some(Expr::Map(residual_entries))
5132        };
5133
5134        (
5135            self.properties_to_expr(variable, &pushdown_map),
5136            self.properties_to_expr(variable, &residual_map),
5137        )
5138    }
5139
5140    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5141    fn plan_unbound_node(
5142        &self,
5143        node: &NodePattern,
5144        variable: &str,
5145        plan: LogicalPlan,
5146        optional: bool,
5147    ) -> Result<LogicalPlan> {
5148        // Properties handling
5149        let properties = match &node.properties {
5150            Some(Expr::Map(entries)) => entries.as_slice(),
5151            Some(Expr::Parameter(_)) => {
5152                return Err(anyhow!(
5153                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5154                ));
5155            }
5156            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5157            None => &[],
5158        };
5159
5160        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5161
5162        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5163            if let Some(predicate) = residual {
5164                LogicalPlan::Filter {
5165                    input: Box::new(input),
5166                    predicate,
5167                    optional_variables: HashSet::new(),
5168                }
5169            } else {
5170                input
5171            }
5172        };
5173
5174        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5175            self.split_node_property_filters_for_scan(variable, &node.properties)
5176        } else {
5177            (self.properties_to_expr(variable, &node.properties), None)
5178        };
5179
5180        // Check for ext_id in properties when no label is specified
5181        if node.labels.is_empty() {
5182            // Try to find ext_id property for main table lookup
5183            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5184                // Extract the ext_id value as a string
5185                let ext_id = match ext_id_value {
5186                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5187                    _ => {
5188                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5189                    }
5190                };
5191
5192                // Build filter for remaining properties (excluding ext_id)
5193                let remaining_props: Vec<_> = properties
5194                    .iter()
5195                    .filter(|(k, _)| k != "ext_id")
5196                    .cloned()
5197                    .collect();
5198
5199                let remaining_expr = if remaining_props.is_empty() {
5200                    None
5201                } else {
5202                    Some(Expr::Map(remaining_props))
5203                };
5204
5205                let (prop_filter, residual_filter) = if has_existing_scope {
5206                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5207                } else {
5208                    (self.properties_to_expr(variable, &remaining_expr), None)
5209                };
5210
5211                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5212                    variable: variable.to_string(),
5213                    ext_id,
5214                    filter: prop_filter,
5215                    optional,
5216                };
5217
5218                let joined = Self::join_with_plan(plan, ext_id_lookup);
5219                return Ok(apply_residual_filter(joined, residual_filter));
5220            }
5221
5222            // No ext_id: create ScanAll for unlabeled node pattern
5223            let scan_all = LogicalPlan::ScanAll {
5224                variable: variable.to_string(),
5225                filter: node_scan_filter,
5226                optional,
5227            };
5228
5229            let joined = Self::join_with_plan(plan, scan_all);
5230            return Ok(apply_residual_filter(joined, node_residual_filter));
5231        }
5232
5233        // Use first label for label_id (primary label for dataset selection)
5234        let label_name = &node.labels[0];
5235
5236        // Check if label exists in schema
5237        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
5238            // Known label: use standard Scan
5239            let scan = LogicalPlan::Scan {
5240                label_id: label_meta.id,
5241                labels: node.labels.clone(),
5242                variable: variable.to_string(),
5243                filter: node_scan_filter,
5244                optional,
5245            };
5246
5247            let joined = Self::join_with_plan(plan, scan);
5248            Ok(apply_residual_filter(joined, node_residual_filter))
5249        } else {
5250            // Unknown label: use ScanMainByLabels for schemaless support
5251            let scan_main = LogicalPlan::ScanMainByLabels {
5252                labels: node.labels.clone(),
5253                variable: variable.to_string(),
5254                filter: node_scan_filter,
5255                optional,
5256            };
5257
5258            let joined = Self::join_with_plan(plan, scan_main);
5259            Ok(apply_residual_filter(joined, node_residual_filter))
5260        }
5261    }
5262
5263    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
5264    ///
5265    /// When `optional_vars` is non-empty, the Filter will preserve rows where
5266    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
5267    fn plan_where_clause(
5268        &self,
5269        predicate: &Expr,
5270        plan: LogicalPlan,
5271        vars_in_scope: &[VariableInfo],
5272        optional_vars: HashSet<String>,
5273    ) -> Result<LogicalPlan> {
5274        // Validate no aggregation functions in WHERE clause
5275        validate_no_aggregation_in_where(predicate)?;
5276
5277        // Validate all variables used are in scope
5278        validate_expression_variables(predicate, vars_in_scope)?;
5279
5280        // Validate expression types (function args, boolean operators)
5281        validate_expression(predicate, vars_in_scope)?;
5282
5283        // Check that WHERE predicate isn't a bare node/edge/path variable
5284        if let Expr::Variable(var_name) = predicate
5285            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
5286            && matches!(
5287                info.var_type,
5288                VariableType::Node | VariableType::Edge | VariableType::Path
5289            )
5290        {
5291            return Err(anyhow!(
5292                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
5293                info.var_type
5294            ));
5295        }
5296
5297        let mut plan = plan;
5298
5299        // Transform VALID_AT macro to function call
5300        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
5301
5302        let mut current_predicate =
5303            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
5304
5305        // 1. Try to extract vector_similarity predicate for optimization
5306        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
5307            let vs = &extraction.predicate;
5308            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
5309                plan = Self::replace_scan_with_knn(
5310                    plan,
5311                    &vs.variable,
5312                    &vs.property,
5313                    vs.query.clone(),
5314                    vs.threshold,
5315                );
5316                if let Some(residual) = extraction.residual {
5317                    current_predicate = residual;
5318                } else {
5319                    current_predicate = Expr::TRUE;
5320                }
5321            }
5322        }
5323
5324        // 3. Push eligible predicates to Scan OR Traverse filters
5325        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
5326        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
5327        // rows. Let them stay in the Filter operator which handles NULL preservation.
5328        for var in vars_in_scope {
5329            // Skip pushdown for optional variables - they need NULL preservation in Filter
5330            if optional_vars.contains(&var.name) {
5331                continue;
5332            }
5333
5334            // Check if var is produced by a Scan
5335            if Self::find_scan_label_id(&plan, &var.name).is_some() {
5336                let (pushable, residual) =
5337                    Self::extract_variable_predicates(&current_predicate, &var.name);
5338
5339                for pred in pushable {
5340                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
5341                }
5342
5343                if let Some(r) = residual {
5344                    current_predicate = r;
5345                } else {
5346                    current_predicate = Expr::TRUE;
5347                }
5348            } else if Self::is_traverse_target(&plan, &var.name) {
5349                // Push to Traverse
5350                let (pushable, residual) =
5351                    Self::extract_variable_predicates(&current_predicate, &var.name);
5352
5353                for pred in pushable {
5354                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
5355                }
5356
5357                if let Some(r) = residual {
5358                    current_predicate = r;
5359                } else {
5360                    current_predicate = Expr::TRUE;
5361                }
5362            }
5363        }
5364
5365        // 4. Push predicates to Apply.input_filter
5366        // This filters input rows BEFORE executing correlated subqueries.
5367        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
5368
5369        // 5. Add Filter node for any remaining predicates
5370        if !current_predicate.is_true_literal() {
5371            plan = LogicalPlan::Filter {
5372                input: Box::new(plan),
5373                predicate: current_predicate,
5374                optional_variables: optional_vars,
5375            };
5376        }
5377
5378        Ok(plan)
5379    }
5380
5381    fn rewrite_predicates_using_indexes(
5382        &self,
5383        predicate: &Expr,
5384        plan: &LogicalPlan,
5385        vars_in_scope: &[VariableInfo],
5386    ) -> Result<Expr> {
5387        let mut rewritten = predicate.clone();
5388
5389        for var in vars_in_scope {
5390            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
5391                // Find label name
5392                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
5393
5394                if let Some(label) = label_name
5395                    && let Some(props) = self.schema.properties.get(&label)
5396                {
5397                    for (gen_col, meta) in props {
5398                        if meta.generation_expression.is_some() {
5399                            // Use cached parsed expression
5400                            if let Some(schema_expr) =
5401                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
5402                            {
5403                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
5404                                rewritten = Self::replace_expression(
5405                                    rewritten,
5406                                    schema_expr,
5407                                    &var.name,
5408                                    gen_col,
5409                                );
5410                            }
5411                        }
5412                    }
5413                }
5414            }
5415        }
5416        Ok(rewritten)
5417    }
5418
5419    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
5420        // First, normalize schema_expr to use query_var
5421        let schema_var = schema_expr.extract_variable();
5422
5423        if let Some(s_var) = schema_var {
5424            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
5425
5426            if expr == target_expr {
5427                return Expr::Property(
5428                    Box::new(Expr::Variable(query_var.to_string())),
5429                    gen_col.to_string(),
5430                );
5431            }
5432        }
5433
5434        // Recurse
5435        match expr {
5436            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
5437                left: Box::new(Self::replace_expression(
5438                    *left,
5439                    schema_expr,
5440                    query_var,
5441                    gen_col,
5442                )),
5443                op,
5444                right: Box::new(Self::replace_expression(
5445                    *right,
5446                    schema_expr,
5447                    query_var,
5448                    gen_col,
5449                )),
5450            },
5451            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
5452                op,
5453                expr: Box::new(Self::replace_expression(
5454                    *expr,
5455                    schema_expr,
5456                    query_var,
5457                    gen_col,
5458                )),
5459            },
5460            Expr::FunctionCall {
5461                name,
5462                args,
5463                distinct,
5464                window_spec,
5465            } => Expr::FunctionCall {
5466                name,
5467                args: args
5468                    .into_iter()
5469                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
5470                    .collect(),
5471                distinct,
5472                window_spec,
5473            },
5474            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
5475                *expr,
5476                schema_expr,
5477                query_var,
5478                gen_col,
5479            ))),
5480            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
5481                *expr,
5482                schema_expr,
5483                query_var,
5484                gen_col,
5485            ))),
5486            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
5487                *expr,
5488                schema_expr,
5489                query_var,
5490                gen_col,
5491            ))),
5492            Expr::ArrayIndex {
5493                array: e,
5494                index: idx,
5495            } => Expr::ArrayIndex {
5496                array: Box::new(Self::replace_expression(
5497                    *e,
5498                    schema_expr,
5499                    query_var,
5500                    gen_col,
5501                )),
5502                index: Box::new(Self::replace_expression(
5503                    *idx,
5504                    schema_expr,
5505                    query_var,
5506                    gen_col,
5507                )),
5508            },
5509            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
5510                array: Box::new(Self::replace_expression(
5511                    *array,
5512                    schema_expr,
5513                    query_var,
5514                    gen_col,
5515                )),
5516                start: start.map(|s| {
5517                    Box::new(Self::replace_expression(
5518                        *s,
5519                        schema_expr,
5520                        query_var,
5521                        gen_col,
5522                    ))
5523                }),
5524                end: end.map(|e| {
5525                    Box::new(Self::replace_expression(
5526                        *e,
5527                        schema_expr,
5528                        query_var,
5529                        gen_col,
5530                    ))
5531                }),
5532            },
5533            Expr::List(exprs) => Expr::List(
5534                exprs
5535                    .into_iter()
5536                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
5537                    .collect(),
5538            ),
5539            Expr::Map(entries) => Expr::Map(
5540                entries
5541                    .into_iter()
5542                    .map(|(k, v)| {
5543                        (
5544                            k,
5545                            Self::replace_expression(v, schema_expr, query_var, gen_col),
5546                        )
5547                    })
5548                    .collect(),
5549            ),
5550            Expr::Property(e, prop) => Expr::Property(
5551                Box::new(Self::replace_expression(
5552                    *e,
5553                    schema_expr,
5554                    query_var,
5555                    gen_col,
5556                )),
5557                prop,
5558            ),
5559            Expr::Case {
5560                expr: case_expr,
5561                when_then,
5562                else_expr,
5563            } => Expr::Case {
5564                expr: case_expr.map(|e| {
5565                    Box::new(Self::replace_expression(
5566                        *e,
5567                        schema_expr,
5568                        query_var,
5569                        gen_col,
5570                    ))
5571                }),
5572                when_then: when_then
5573                    .into_iter()
5574                    .map(|(w, t)| {
5575                        (
5576                            Self::replace_expression(w, schema_expr, query_var, gen_col),
5577                            Self::replace_expression(t, schema_expr, query_var, gen_col),
5578                        )
5579                    })
5580                    .collect(),
5581                else_expr: else_expr.map(|e| {
5582                    Box::new(Self::replace_expression(
5583                        *e,
5584                        schema_expr,
5585                        query_var,
5586                        gen_col,
5587                    ))
5588                }),
5589            },
5590            Expr::Reduce {
5591                accumulator,
5592                init,
5593                variable: reduce_var,
5594                list,
5595                expr: reduce_expr,
5596            } => Expr::Reduce {
5597                accumulator,
5598                init: Box::new(Self::replace_expression(
5599                    *init,
5600                    schema_expr,
5601                    query_var,
5602                    gen_col,
5603                )),
5604                variable: reduce_var,
5605                list: Box::new(Self::replace_expression(
5606                    *list,
5607                    schema_expr,
5608                    query_var,
5609                    gen_col,
5610                )),
5611                expr: Box::new(Self::replace_expression(
5612                    *reduce_expr,
5613                    schema_expr,
5614                    query_var,
5615                    gen_col,
5616                )),
5617            },
5618
5619            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
5620            _ => expr,
5621        }
5622    }
5623
5624    /// Check if the variable is the target of a Traverse node
5625    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
5626        match plan {
5627            LogicalPlan::Traverse {
5628                target_variable,
5629                input,
5630                ..
5631            } => target_variable == variable || Self::is_traverse_target(input, variable),
5632            LogicalPlan::Filter { input, .. }
5633            | LogicalPlan::Project { input, .. }
5634            | LogicalPlan::Sort { input, .. }
5635            | LogicalPlan::Limit { input, .. }
5636            | LogicalPlan::Aggregate { input, .. }
5637            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
5638            LogicalPlan::CrossJoin { left, right } => {
5639                Self::is_traverse_target(left, variable)
5640                    || Self::is_traverse_target(right, variable)
5641            }
5642            _ => false,
5643        }
5644    }
5645
5646    /// Push a predicate into a Traverse's target_filter for the specified variable
5647    fn push_predicate_to_traverse(
5648        plan: LogicalPlan,
5649        variable: &str,
5650        predicate: Expr,
5651    ) -> LogicalPlan {
5652        match plan {
5653            LogicalPlan::Traverse {
5654                input,
5655                edge_type_ids,
5656                direction,
5657                source_variable,
5658                target_variable,
5659                target_label_id,
5660                step_variable,
5661                min_hops,
5662                max_hops,
5663                optional,
5664                target_filter,
5665                path_variable,
5666                edge_properties,
5667                is_variable_length,
5668                optional_pattern_vars,
5669                scope_match_variables,
5670                edge_filter_expr,
5671                path_mode,
5672                qpp_steps,
5673            } => {
5674                if target_variable == variable {
5675                    // Found the traverse producing this variable
5676                    let new_filter = match target_filter {
5677                        Some(existing) => Some(Expr::BinaryOp {
5678                            left: Box::new(existing),
5679                            op: BinaryOp::And,
5680                            right: Box::new(predicate),
5681                        }),
5682                        None => Some(predicate),
5683                    };
5684                    LogicalPlan::Traverse {
5685                        input,
5686                        edge_type_ids,
5687                        direction,
5688                        source_variable,
5689                        target_variable,
5690                        target_label_id,
5691                        step_variable,
5692                        min_hops,
5693                        max_hops,
5694                        optional,
5695                        target_filter: new_filter,
5696                        path_variable,
5697                        edge_properties,
5698                        is_variable_length,
5699                        optional_pattern_vars,
5700                        scope_match_variables,
5701                        edge_filter_expr,
5702                        path_mode,
5703                        qpp_steps,
5704                    }
5705                } else {
5706                    // Recurse into input
5707                    LogicalPlan::Traverse {
5708                        input: Box::new(Self::push_predicate_to_traverse(
5709                            *input, variable, predicate,
5710                        )),
5711                        edge_type_ids,
5712                        direction,
5713                        source_variable,
5714                        target_variable,
5715                        target_label_id,
5716                        step_variable,
5717                        min_hops,
5718                        max_hops,
5719                        optional,
5720                        target_filter,
5721                        path_variable,
5722                        edge_properties,
5723                        is_variable_length,
5724                        optional_pattern_vars,
5725                        scope_match_variables,
5726                        edge_filter_expr,
5727                        path_mode,
5728                        qpp_steps,
5729                    }
5730                }
5731            }
5732            LogicalPlan::Filter {
5733                input,
5734                predicate: p,
5735                optional_variables: opt_vars,
5736            } => LogicalPlan::Filter {
5737                input: Box::new(Self::push_predicate_to_traverse(
5738                    *input, variable, predicate,
5739                )),
5740                predicate: p,
5741                optional_variables: opt_vars,
5742            },
5743            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
5744                input: Box::new(Self::push_predicate_to_traverse(
5745                    *input, variable, predicate,
5746                )),
5747                projections,
5748            },
5749            LogicalPlan::CrossJoin { left, right } => {
5750                // Check which side has the variable
5751                if Self::is_traverse_target(&left, variable) {
5752                    LogicalPlan::CrossJoin {
5753                        left: Box::new(Self::push_predicate_to_traverse(
5754                            *left, variable, predicate,
5755                        )),
5756                        right,
5757                    }
5758                } else {
5759                    LogicalPlan::CrossJoin {
5760                        left,
5761                        right: Box::new(Self::push_predicate_to_traverse(
5762                            *right, variable, predicate,
5763                        )),
5764                    }
5765                }
5766            }
5767            other => other,
5768        }
5769    }
5770
5771    /// Plan a WITH clause, handling aggregations and projections.
5772    fn plan_with_clause(
5773        &self,
5774        with_clause: &WithClause,
5775        plan: LogicalPlan,
5776        vars_in_scope: &[VariableInfo],
5777    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
5778        let mut plan = plan;
5779        let mut group_by: Vec<Expr> = Vec::new();
5780        let mut aggregates: Vec<Expr> = Vec::new();
5781        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
5782        let mut has_agg = false;
5783        let mut projections = Vec::new();
5784        let mut new_vars: Vec<VariableInfo> = Vec::new();
5785        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
5786        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
5787        let mut projected_aliases: HashSet<String> = HashSet::new();
5788        let mut has_unaliased_non_variable_expr = false;
5789
5790        for item in &with_clause.items {
5791            match item {
5792                ReturnItem::All => {
5793                    // WITH * - add all variables in scope
5794                    for v in vars_in_scope {
5795                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
5796                        projected_aliases.insert(v.name.clone());
5797                        projected_simple_reprs.insert(v.name.clone());
5798                    }
5799                    new_vars.extend(vars_in_scope.iter().cloned());
5800                }
5801                ReturnItem::Expr { expr, alias, .. } => {
5802                    if matches!(expr, Expr::Wildcard) {
5803                        for v in vars_in_scope {
5804                            projections
5805                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
5806                            projected_aliases.insert(v.name.clone());
5807                            projected_simple_reprs.insert(v.name.clone());
5808                        }
5809                        new_vars.extend(vars_in_scope.iter().cloned());
5810                    } else {
5811                        // Validate expression variables and syntax
5812                        validate_expression_variables(expr, vars_in_scope)?;
5813                        validate_expression(expr, vars_in_scope)?;
5814                        // Pattern predicates are not allowed in WITH
5815                        if contains_pattern_predicate(expr) {
5816                            return Err(anyhow!(
5817                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
5818                            ));
5819                        }
5820
5821                        projections.push((expr.clone(), alias.clone()));
5822                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
5823                            // Bare aggregate — push directly
5824                            has_agg = true;
5825                            aggregates.push(expr.clone());
5826                            projected_aggregate_reprs.insert(expr.to_string_repr());
5827                        } else if !is_window_function(expr)
5828                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
5829                        {
5830                            // Compound aggregate or expression containing aggregates
5831                            has_agg = true;
5832                            compound_agg_exprs.push(expr.clone());
5833                            for inner in extract_inner_aggregates(expr) {
5834                                let repr = inner.to_string_repr();
5835                                if !projected_aggregate_reprs.contains(&repr) {
5836                                    aggregates.push(inner);
5837                                    projected_aggregate_reprs.insert(repr);
5838                                }
5839                            }
5840                        } else if !group_by.contains(expr) {
5841                            group_by.push(expr.clone());
5842                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
5843                                projected_simple_reprs.insert(expr.to_string_repr());
5844                            }
5845                        }
5846
5847                        // Preserve non-scalar type information when WITH aliases
5848                        // entity/path-capable expressions.
5849                        if let Some(a) = alias {
5850                            if projected_aliases.contains(a) {
5851                                return Err(anyhow!(
5852                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
5853                                    a
5854                                ));
5855                            }
5856                            let inferred = infer_with_output_type(expr, vars_in_scope);
5857                            new_vars.push(VariableInfo::new(a.clone(), inferred));
5858                            projected_aliases.insert(a.clone());
5859                        } else if let Expr::Variable(v) = expr {
5860                            if projected_aliases.contains(v) {
5861                                return Err(anyhow!(
5862                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
5863                                    v
5864                                ));
5865                            }
5866                            // Preserve the original type if the variable is just passed through
5867                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
5868                                new_vars.push(existing.clone());
5869                            } else {
5870                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
5871                            }
5872                            projected_aliases.insert(v.clone());
5873                        } else {
5874                            has_unaliased_non_variable_expr = true;
5875                        }
5876                    }
5877                }
5878            }
5879        }
5880
5881        // Collect extra variables that need to survive the projection stage
5882        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
5883        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
5884        let mut passthrough_extras: Vec<String> = Vec::new();
5885        let mut seen_passthrough: HashSet<String> = HashSet::new();
5886
5887        if let Some(predicate) = &with_clause.where_clause {
5888            for name in collect_expr_variables(predicate) {
5889                if !projected_names.contains(name.as_str())
5890                    && find_var_in_scope(vars_in_scope, &name).is_some()
5891                    && seen_passthrough.insert(name.clone())
5892                {
5893                    passthrough_extras.push(name);
5894                }
5895            }
5896        }
5897
5898        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
5899        // Carry those variables through the projection so Sort can resolve them.
5900        if !has_agg && let Some(order_by) = &with_clause.order_by {
5901            for item in order_by {
5902                for name in collect_expr_variables(&item.expr) {
5903                    if !projected_names.contains(name.as_str())
5904                        && find_var_in_scope(vars_in_scope, &name).is_some()
5905                        && seen_passthrough.insert(name.clone())
5906                    {
5907                        passthrough_extras.push(name);
5908                    }
5909                }
5910            }
5911        }
5912
5913        let needs_cleanup = !passthrough_extras.is_empty();
5914        for extra in &passthrough_extras {
5915            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
5916        }
5917
5918        // Validate compound aggregate expressions: non-aggregate refs must be
5919        // individually present in the group_by as simple variables or properties.
5920        if has_agg {
5921            let group_by_reprs: HashSet<String> =
5922                group_by.iter().map(|e| e.to_string_repr()).collect();
5923            for expr in &compound_agg_exprs {
5924                let mut refs = Vec::new();
5925                collect_non_aggregate_refs(expr, false, &mut refs);
5926                for r in &refs {
5927                    let is_covered = match r {
5928                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
5929                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
5930                    };
5931                    if !is_covered {
5932                        return Err(anyhow!(
5933                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
5934                        ));
5935                    }
5936                }
5937            }
5938        }
5939
5940        if has_agg {
5941            plan = LogicalPlan::Aggregate {
5942                input: Box::new(plan),
5943                group_by,
5944                aggregates,
5945            };
5946
5947            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
5948            // can reference the WITH aliases instead of raw column names.
5949            let rename_projections: Vec<(Expr, Option<String>)> = projections
5950                .iter()
5951                .map(|(expr, alias)| {
5952                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
5953                        // Bare aggregate — reference by column name
5954                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
5955                    } else if is_compound_aggregate(expr)
5956                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
5957                    {
5958                        // Compound aggregate — replace inner aggregates with
5959                        // column references, keep outer expression
5960                        (replace_aggregates_with_columns(expr), alias.clone())
5961                    } else {
5962                        (Expr::Variable(expr.to_string_repr()), alias.clone())
5963                    }
5964                })
5965                .collect();
5966            plan = LogicalPlan::Project {
5967                input: Box::new(plan),
5968                projections: rename_projections,
5969            };
5970        } else if !projections.is_empty() {
5971            plan = LogicalPlan::Project {
5972                input: Box::new(plan),
5973                projections: projections.clone(),
5974            };
5975        }
5976
5977        // Apply the WHERE filter (post-projection, with extras still visible).
5978        if let Some(predicate) = &with_clause.where_clause {
5979            plan = LogicalPlan::Filter {
5980                input: Box::new(plan),
5981                predicate: predicate.clone(),
5982                optional_variables: HashSet::new(),
5983            };
5984        }
5985
5986        // Validate and apply ORDER BY for WITH clause.
5987        // Keep pre-WITH vars in scope for parser compatibility, then apply
5988        // stricter checks for aggregate-containing ORDER BY items.
5989        if let Some(order_by) = &with_clause.order_by {
5990            // Build a mapping from aliases and projected expression reprs to
5991            // output columns of the preceding Project/Aggregate pipeline.
5992            let with_order_aliases: HashMap<String, Expr> = projections
5993                .iter()
5994                .flat_map(|(expr, alias)| {
5995                    let output_col = if let Some(a) = alias {
5996                        a.clone()
5997                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
5998                        aggregate_column_name(expr)
5999                    } else {
6000                        expr.to_string_repr()
6001                    };
6002
6003                    let mut entries = Vec::new();
6004                    // ORDER BY alias
6005                    if let Some(a) = alias {
6006                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
6007                    }
6008                    // ORDER BY projected expression (e.g. me.age)
6009                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
6010                    entries
6011                })
6012                .collect();
6013
6014            let order_by_scope: Vec<VariableInfo> = {
6015                let mut scope = new_vars.clone();
6016                for v in vars_in_scope {
6017                    if !is_var_in_scope(&scope, &v.name) {
6018                        scope.push(v.clone());
6019                    }
6020                }
6021                scope
6022            };
6023            for item in order_by {
6024                validate_expression_variables(&item.expr, &order_by_scope)?;
6025                validate_expression(&item.expr, &order_by_scope)?;
6026                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
6027                if has_aggregate_in_item && !has_agg {
6028                    return Err(anyhow!(
6029                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
6030                    ));
6031                }
6032                if has_agg && has_aggregate_in_item {
6033                    validate_with_order_by_aggregate_item(
6034                        &item.expr,
6035                        &projected_aggregate_reprs,
6036                        &projected_simple_reprs,
6037                        &projected_aliases,
6038                    )?;
6039                }
6040            }
6041            let rewritten_order_by: Vec<SortItem> = order_by
6042                .iter()
6043                .map(|item| {
6044                    let mut expr =
6045                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
6046                    if has_agg {
6047                        // Rewrite any aggregate calls to the aggregate output
6048                        // columns produced by Aggregate.
6049                        expr = replace_aggregates_with_columns(&expr);
6050                        // Then re-map projected property expressions to aliases
6051                        // from the WITH projection.
6052                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
6053                    }
6054                    SortItem {
6055                        expr,
6056                        ascending: item.ascending,
6057                    }
6058                })
6059                .collect();
6060            plan = LogicalPlan::Sort {
6061                input: Box::new(plan),
6062                order_by: rewritten_order_by,
6063            };
6064        }
6065
6066        // Non-variable expressions in WITH must be aliased.
6067        // This check is intentionally placed after ORDER BY validation so
6068        // higher-priority semantic errors (e.g., ambiguous aggregation in
6069        // ORDER BY) can surface first.
6070        if has_unaliased_non_variable_expr {
6071            return Err(anyhow!(
6072                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
6073            ));
6074        }
6075
6076        // Validate and apply SKIP/LIMIT for WITH clause
6077        let skip = with_clause
6078            .skip
6079            .as_ref()
6080            .map(|e| parse_non_negative_integer(e, "SKIP", &self.params))
6081            .transpose()?
6082            .flatten();
6083        let fetch = with_clause
6084            .limit
6085            .as_ref()
6086            .map(|e| parse_non_negative_integer(e, "LIMIT", &self.params))
6087            .transpose()?
6088            .flatten();
6089
6090        if skip.is_some() || fetch.is_some() {
6091            plan = LogicalPlan::Limit {
6092                input: Box::new(plan),
6093                skip,
6094                fetch,
6095            };
6096        }
6097
6098        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
6099        if needs_cleanup {
6100            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
6101                .iter()
6102                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
6103                .collect();
6104            plan = LogicalPlan::Project {
6105                input: Box::new(plan),
6106                projections: cleanup_projections,
6107            };
6108        }
6109
6110        if with_clause.distinct {
6111            plan = LogicalPlan::Distinct {
6112                input: Box::new(plan),
6113            };
6114        }
6115
6116        Ok((plan, new_vars))
6117    }
6118
6119    fn plan_with_recursive(
6120        &self,
6121        with_recursive: &WithRecursiveClause,
6122        _prev_plan: LogicalPlan,
6123        vars_in_scope: &[VariableInfo],
6124    ) -> Result<LogicalPlan> {
6125        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
6126        match &*with_recursive.query {
6127            Query::Union { left, right, .. } => {
6128                // Plan the anchor (initial) query with current scope
6129                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
6130
6131                // Plan the recursive query with the CTE name added to scope
6132                // so it can reference itself
6133                let mut recursive_scope = vars_in_scope.to_vec();
6134                recursive_scope.push(VariableInfo::new(
6135                    with_recursive.name.clone(),
6136                    VariableType::Scalar,
6137                ));
6138                let recursive_plan =
6139                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
6140
6141                Ok(LogicalPlan::RecursiveCTE {
6142                    cte_name: with_recursive.name.clone(),
6143                    initial: Box::new(initial_plan),
6144                    recursive: Box::new(recursive_plan),
6145                })
6146            }
6147            _ => Err(anyhow::anyhow!(
6148                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
6149            )),
6150        }
6151    }
6152
6153    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
6154        let entries = match properties {
6155            Some(Expr::Map(entries)) => entries,
6156            _ => return None,
6157        };
6158
6159        if entries.is_empty() {
6160            return None;
6161        }
6162        let mut final_expr = None;
6163        for (prop, val_expr) in entries {
6164            let eq_expr = Expr::BinaryOp {
6165                left: Box::new(Expr::Property(
6166                    Box::new(Expr::Variable(variable.to_string())),
6167                    prop.clone(),
6168                )),
6169                op: BinaryOp::Eq,
6170                right: Box::new(val_expr.clone()),
6171            };
6172
6173            if let Some(e) = final_expr {
6174                final_expr = Some(Expr::BinaryOp {
6175                    left: Box::new(e),
6176                    op: BinaryOp::And,
6177                    right: Box::new(eq_expr),
6178                });
6179            } else {
6180                final_expr = Some(eq_expr);
6181            }
6182        }
6183        final_expr
6184    }
6185
6186    /// Build a filter expression from node properties and labels.
6187    ///
6188    /// This is used for TraverseMainByType where we need to filter target nodes
6189    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
6190    pub fn node_filter_expr(
6191        &self,
6192        variable: &str,
6193        labels: &[String],
6194        properties: &Option<Expr>,
6195    ) -> Option<Expr> {
6196        let mut final_expr = None;
6197
6198        // Add label checks using hasLabel(variable, 'label')
6199        for label in labels {
6200            let label_check = Expr::FunctionCall {
6201                name: "hasLabel".to_string(),
6202                args: vec![
6203                    Expr::Variable(variable.to_string()),
6204                    Expr::Literal(CypherLiteral::String(label.clone())),
6205                ],
6206                distinct: false,
6207                window_spec: None,
6208            };
6209
6210            final_expr = match final_expr {
6211                Some(e) => Some(Expr::BinaryOp {
6212                    left: Box::new(e),
6213                    op: BinaryOp::And,
6214                    right: Box::new(label_check),
6215                }),
6216                None => Some(label_check),
6217            };
6218        }
6219
6220        // Add property checks
6221        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
6222            final_expr = match final_expr {
6223                Some(e) => Some(Expr::BinaryOp {
6224                    left: Box::new(e),
6225                    op: BinaryOp::And,
6226                    right: Box::new(prop_expr),
6227                }),
6228                None => Some(prop_expr),
6229            };
6230        }
6231
6232        final_expr
6233    }
6234
6235    /// Create a filter plan that ensures traversed target matches a bound variable.
6236    ///
6237    /// Used in EXISTS subquery patterns where the target is already bound.
6238    /// Compares the target's VID against the bound variable's VID.
6239    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
6240        // Compare the traverse-discovered target's VID against the bound variable's VID.
6241        // Left side: Property access on the variable from current scope.
6242        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
6243        // We use Variable("{var}._vid") to access the VID column from the traverse output,
6244        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
6245        let bound_check = Expr::BinaryOp {
6246            left: Box::new(Expr::Property(
6247                Box::new(Expr::Variable(target_variable.to_string())),
6248                "_vid".to_string(),
6249            )),
6250            op: BinaryOp::Eq,
6251            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
6252        };
6253        LogicalPlan::Filter {
6254            input: Box::new(plan),
6255            predicate: bound_check,
6256            optional_variables: HashSet::new(),
6257        }
6258    }
6259
6260    /// Replace a Scan node matching the variable with a VectorKnn node
6261    fn replace_scan_with_knn(
6262        plan: LogicalPlan,
6263        variable: &str,
6264        property: &str,
6265        query: Expr,
6266        threshold: Option<f32>,
6267    ) -> LogicalPlan {
6268        match plan {
6269            LogicalPlan::Scan {
6270                label_id,
6271                labels,
6272                variable: scan_var,
6273                filter,
6274                optional,
6275            } => {
6276                if scan_var == variable {
6277                    // Inject any existing scan filter into VectorKnn?
6278                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
6279                    // Typically filter is applied post-Knn or during Knn if supported.
6280                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
6281                    // If `filter` is present on Scan, it must be preserved.
6282                    // We can wrap VectorKnn in Filter if Scan had filter.
6283
6284                    let knn = LogicalPlan::VectorKnn {
6285                        label_id,
6286                        variable: variable.to_string(),
6287                        property: property.to_string(),
6288                        query,
6289                        k: 100, // Default K, should push down LIMIT
6290                        threshold,
6291                    };
6292
6293                    if let Some(f) = filter {
6294                        LogicalPlan::Filter {
6295                            input: Box::new(knn),
6296                            predicate: f,
6297                            optional_variables: HashSet::new(),
6298                        }
6299                    } else {
6300                        knn
6301                    }
6302                } else {
6303                    LogicalPlan::Scan {
6304                        label_id,
6305                        labels,
6306                        variable: scan_var,
6307                        filter,
6308                        optional,
6309                    }
6310                }
6311            }
6312            LogicalPlan::Filter {
6313                input,
6314                predicate,
6315                optional_variables,
6316            } => LogicalPlan::Filter {
6317                input: Box::new(Self::replace_scan_with_knn(
6318                    *input, variable, property, query, threshold,
6319                )),
6320                predicate,
6321                optional_variables,
6322            },
6323            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6324                input: Box::new(Self::replace_scan_with_knn(
6325                    *input, variable, property, query, threshold,
6326                )),
6327                projections,
6328            },
6329            LogicalPlan::Limit { input, skip, fetch } => {
6330                // If we encounter Limit, we should ideally push K down to VectorKnn
6331                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
6332                // Limit comes later.
6333                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
6334                LogicalPlan::Limit {
6335                    input: Box::new(Self::replace_scan_with_knn(
6336                        *input, variable, property, query, threshold,
6337                    )),
6338                    skip,
6339                    fetch,
6340                }
6341            }
6342            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6343                left: Box::new(Self::replace_scan_with_knn(
6344                    *left,
6345                    variable,
6346                    property,
6347                    query.clone(),
6348                    threshold,
6349                )),
6350                right: Box::new(Self::replace_scan_with_knn(
6351                    *right, variable, property, query, threshold,
6352                )),
6353            },
6354            other => other,
6355        }
6356    }
6357
6358    /// Find the label_id for a Scan node matching the given variable
6359    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
6360        match plan {
6361            LogicalPlan::Scan {
6362                label_id,
6363                variable: var,
6364                ..
6365            } if var == variable => Some(*label_id),
6366            LogicalPlan::Filter { input, .. }
6367            | LogicalPlan::Project { input, .. }
6368            | LogicalPlan::Sort { input, .. }
6369            | LogicalPlan::Limit { input, .. }
6370            | LogicalPlan::Aggregate { input, .. }
6371            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
6372            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
6373                .or_else(|| Self::find_scan_label_id(right, variable)),
6374            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
6375            _ => None,
6376        }
6377    }
6378
6379    /// Push a predicate into a Scan's filter for the specified variable
6380    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
6381        match plan {
6382            LogicalPlan::Scan {
6383                label_id,
6384                labels,
6385                variable: var,
6386                filter,
6387                optional,
6388            } if var == variable => {
6389                // Merge the predicate with existing filter
6390                let new_filter = match filter {
6391                    Some(existing) => Some(Expr::BinaryOp {
6392                        left: Box::new(existing),
6393                        op: BinaryOp::And,
6394                        right: Box::new(predicate),
6395                    }),
6396                    None => Some(predicate),
6397                };
6398                LogicalPlan::Scan {
6399                    label_id,
6400                    labels,
6401                    variable: var,
6402                    filter: new_filter,
6403                    optional,
6404                }
6405            }
6406            LogicalPlan::Filter {
6407                input,
6408                predicate: p,
6409                optional_variables: opt_vars,
6410            } => LogicalPlan::Filter {
6411                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6412                predicate: p,
6413                optional_variables: opt_vars,
6414            },
6415            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6416                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6417                projections,
6418            },
6419            LogicalPlan::CrossJoin { left, right } => {
6420                // Check which side has the variable
6421                if Self::find_scan_label_id(&left, variable).is_some() {
6422                    LogicalPlan::CrossJoin {
6423                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
6424                        right,
6425                    }
6426                } else {
6427                    LogicalPlan::CrossJoin {
6428                        left,
6429                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
6430                    }
6431                }
6432            }
6433            LogicalPlan::Traverse {
6434                input,
6435                edge_type_ids,
6436                direction,
6437                source_variable,
6438                target_variable,
6439                target_label_id,
6440                step_variable,
6441                min_hops,
6442                max_hops,
6443                optional,
6444                target_filter,
6445                path_variable,
6446                edge_properties,
6447                is_variable_length,
6448                optional_pattern_vars,
6449                scope_match_variables,
6450                edge_filter_expr,
6451                path_mode,
6452                qpp_steps,
6453            } => LogicalPlan::Traverse {
6454                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
6455                edge_type_ids,
6456                direction,
6457                source_variable,
6458                target_variable,
6459                target_label_id,
6460                step_variable,
6461                min_hops,
6462                max_hops,
6463                optional,
6464                target_filter,
6465                path_variable,
6466                edge_properties,
6467                is_variable_length,
6468                optional_pattern_vars,
6469                scope_match_variables,
6470                edge_filter_expr,
6471                path_mode,
6472                qpp_steps,
6473            },
6474            other => other,
6475        }
6476    }
6477
6478    /// Extract predicates that reference only the specified variable
6479    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
6480        let analyzer = PredicateAnalyzer::new();
6481        let analysis = analyzer.analyze(predicate, variable);
6482
6483        // Return pushable predicates and combined residual
6484        let residual = if analysis.residual.is_empty() {
6485            None
6486        } else {
6487            let mut iter = analysis.residual.into_iter();
6488            let first = iter.next().unwrap();
6489            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
6490                left: Box::new(acc),
6491                op: BinaryOp::And,
6492                right: Box::new(e),
6493            }))
6494        };
6495
6496        (analysis.pushable, residual)
6497    }
6498
6499    // =====================================================================
6500    // Apply Predicate Pushdown - Helper Functions
6501    // =====================================================================
6502
6503    /// Split AND-connected predicates into a list.
6504    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
6505        match expr {
6506            Expr::BinaryOp {
6507                left,
6508                op: BinaryOp::And,
6509                right,
6510            } => {
6511                let mut result = Self::split_and_conjuncts(left);
6512                result.extend(Self::split_and_conjuncts(right));
6513                result
6514            }
6515            _ => vec![expr.clone()],
6516        }
6517    }
6518
6519    /// Combine predicates with AND.
6520    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
6521        if predicates.is_empty() {
6522            return None;
6523        }
6524        let mut result = predicates[0].clone();
6525        for pred in predicates.iter().skip(1) {
6526            result = Expr::BinaryOp {
6527                left: Box::new(result),
6528                op: BinaryOp::And,
6529                right: Box::new(pred.clone()),
6530            };
6531        }
6532        Some(result)
6533    }
6534
6535    /// Collect all variable names referenced in an expression.
6536    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
6537        let mut vars = HashSet::new();
6538        Self::collect_expr_variables_impl(expr, &mut vars);
6539        vars
6540    }
6541
6542    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
6543        match expr {
6544            Expr::Variable(name) => {
6545                vars.insert(name.clone());
6546            }
6547            Expr::Property(inner, _) => {
6548                if let Expr::Variable(name) = inner.as_ref() {
6549                    vars.insert(name.clone());
6550                } else {
6551                    Self::collect_expr_variables_impl(inner, vars);
6552                }
6553            }
6554            Expr::BinaryOp { left, right, .. } => {
6555                Self::collect_expr_variables_impl(left, vars);
6556                Self::collect_expr_variables_impl(right, vars);
6557            }
6558            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
6559            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
6560            Expr::FunctionCall { args, .. } => {
6561                for arg in args {
6562                    Self::collect_expr_variables_impl(arg, vars);
6563                }
6564            }
6565            Expr::List(items) => {
6566                for item in items {
6567                    Self::collect_expr_variables_impl(item, vars);
6568                }
6569            }
6570            Expr::Case {
6571                expr,
6572                when_then,
6573                else_expr,
6574            } => {
6575                if let Some(e) = expr {
6576                    Self::collect_expr_variables_impl(e, vars);
6577                }
6578                for (w, t) in when_then {
6579                    Self::collect_expr_variables_impl(w, vars);
6580                    Self::collect_expr_variables_impl(t, vars);
6581                }
6582                if let Some(e) = else_expr {
6583                    Self::collect_expr_variables_impl(e, vars);
6584                }
6585            }
6586            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
6587            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
6588            // they introduce local variable bindings not in outer scope.
6589            _ => {}
6590        }
6591    }
6592
6593    /// Collect all variables produced by a logical plan.
6594    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
6595        let mut vars = HashSet::new();
6596        Self::collect_plan_variables_impl(plan, &mut vars);
6597        vars
6598    }
6599
6600    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
6601        match plan {
6602            LogicalPlan::Scan { variable, .. } => {
6603                vars.insert(variable.clone());
6604            }
6605            LogicalPlan::Traverse {
6606                target_variable,
6607                step_variable,
6608                input,
6609                path_variable,
6610                ..
6611            } => {
6612                vars.insert(target_variable.clone());
6613                if let Some(sv) = step_variable {
6614                    vars.insert(sv.clone());
6615                }
6616                if let Some(pv) = path_variable {
6617                    vars.insert(pv.clone());
6618                }
6619                Self::collect_plan_variables_impl(input, vars);
6620            }
6621            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
6622            LogicalPlan::Project { input, projections } => {
6623                for (expr, alias) in projections {
6624                    if let Some(a) = alias {
6625                        vars.insert(a.clone());
6626                    } else if let Expr::Variable(v) = expr {
6627                        vars.insert(v.clone());
6628                    }
6629                }
6630                Self::collect_plan_variables_impl(input, vars);
6631            }
6632            LogicalPlan::Apply {
6633                input, subquery, ..
6634            } => {
6635                Self::collect_plan_variables_impl(input, vars);
6636                Self::collect_plan_variables_impl(subquery, vars);
6637            }
6638            LogicalPlan::CrossJoin { left, right } => {
6639                Self::collect_plan_variables_impl(left, vars);
6640                Self::collect_plan_variables_impl(right, vars);
6641            }
6642            LogicalPlan::Unwind {
6643                input, variable, ..
6644            } => {
6645                vars.insert(variable.clone());
6646                Self::collect_plan_variables_impl(input, vars);
6647            }
6648            LogicalPlan::Aggregate { input, .. } => {
6649                Self::collect_plan_variables_impl(input, vars);
6650            }
6651            LogicalPlan::Distinct { input } => {
6652                Self::collect_plan_variables_impl(input, vars);
6653            }
6654            LogicalPlan::Sort { input, .. } => {
6655                Self::collect_plan_variables_impl(input, vars);
6656            }
6657            LogicalPlan::Limit { input, .. } => {
6658                Self::collect_plan_variables_impl(input, vars);
6659            }
6660            LogicalPlan::VectorKnn { variable, .. } => {
6661                vars.insert(variable.clone());
6662            }
6663            LogicalPlan::ProcedureCall { yield_items, .. } => {
6664                for (name, alias) in yield_items {
6665                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
6666                }
6667            }
6668            LogicalPlan::ShortestPath {
6669                input,
6670                path_variable,
6671                ..
6672            } => {
6673                vars.insert(path_variable.clone());
6674                Self::collect_plan_variables_impl(input, vars);
6675            }
6676            LogicalPlan::AllShortestPaths {
6677                input,
6678                path_variable,
6679                ..
6680            } => {
6681                vars.insert(path_variable.clone());
6682                Self::collect_plan_variables_impl(input, vars);
6683            }
6684            LogicalPlan::RecursiveCTE {
6685                initial, recursive, ..
6686            } => {
6687                Self::collect_plan_variables_impl(initial, vars);
6688                Self::collect_plan_variables_impl(recursive, vars);
6689            }
6690            LogicalPlan::SubqueryCall {
6691                input, subquery, ..
6692            } => {
6693                Self::collect_plan_variables_impl(input, vars);
6694                Self::collect_plan_variables_impl(subquery, vars);
6695            }
6696            _ => {}
6697        }
6698    }
6699
6700    /// Extract predicates that only reference variables from Apply's input.
6701    /// Returns (input_only_predicates, remaining_predicates).
6702    fn extract_apply_input_predicates(
6703        predicate: &Expr,
6704        input_variables: &HashSet<String>,
6705        subquery_new_variables: &HashSet<String>,
6706    ) -> (Vec<Expr>, Vec<Expr>) {
6707        let conjuncts = Self::split_and_conjuncts(predicate);
6708        let mut input_preds = Vec::new();
6709        let mut remaining = Vec::new();
6710
6711        for conj in conjuncts {
6712            let vars = Self::collect_expr_variables(&conj);
6713
6714            // Predicate only references input variables (none from subquery)
6715            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
6716            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
6717
6718            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
6719                input_preds.push(conj);
6720            } else {
6721                remaining.push(conj);
6722            }
6723        }
6724
6725        (input_preds, remaining)
6726    }
6727
6728    /// Push eligible predicates into Apply.input_filter.
6729    /// This filters input rows BEFORE executing the correlated subquery.
6730    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
6731        match plan {
6732            LogicalPlan::Apply {
6733                input,
6734                subquery,
6735                input_filter,
6736            } => {
6737                // Collect variables from input plan
6738                let input_vars = Self::collect_plan_variables(&input);
6739
6740                // Collect NEW variables introduced by subquery (not in input)
6741                let subquery_vars = Self::collect_plan_variables(&subquery);
6742                let new_subquery_vars: HashSet<String> =
6743                    subquery_vars.difference(&input_vars).cloned().collect();
6744
6745                // Extract predicates that only reference input variables
6746                let (input_preds, remaining) = Self::extract_apply_input_predicates(
6747                    current_predicate,
6748                    &input_vars,
6749                    &new_subquery_vars,
6750                );
6751
6752                // Update current_predicate to only remaining predicates
6753                *current_predicate = if remaining.is_empty() {
6754                    Expr::TRUE
6755                } else {
6756                    Self::combine_predicates(remaining).unwrap()
6757                };
6758
6759                // Combine extracted predicates with existing input_filter
6760                let new_input_filter = if input_preds.is_empty() {
6761                    input_filter
6762                } else {
6763                    let extracted = Self::combine_predicates(input_preds).unwrap();
6764                    match input_filter {
6765                        Some(existing) => Some(Expr::BinaryOp {
6766                            left: Box::new(existing),
6767                            op: BinaryOp::And,
6768                            right: Box::new(extracted),
6769                        }),
6770                        None => Some(extracted),
6771                    }
6772                };
6773
6774                // Recurse into input plan
6775                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
6776
6777                LogicalPlan::Apply {
6778                    input: Box::new(new_input),
6779                    subquery,
6780                    input_filter: new_input_filter,
6781                }
6782            }
6783            // Recurse into other plan nodes
6784            LogicalPlan::Filter {
6785                input,
6786                predicate,
6787                optional_variables,
6788            } => LogicalPlan::Filter {
6789                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6790                predicate,
6791                optional_variables,
6792            },
6793            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6794                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6795                projections,
6796            },
6797            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
6798                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6799                order_by,
6800            },
6801            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
6802                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6803                skip,
6804                fetch,
6805            },
6806            LogicalPlan::Aggregate {
6807                input,
6808                group_by,
6809                aggregates,
6810            } => LogicalPlan::Aggregate {
6811                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6812                group_by,
6813                aggregates,
6814            },
6815            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6816                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
6817                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
6818            },
6819            LogicalPlan::Traverse {
6820                input,
6821                edge_type_ids,
6822                direction,
6823                source_variable,
6824                target_variable,
6825                target_label_id,
6826                step_variable,
6827                min_hops,
6828                max_hops,
6829                optional,
6830                target_filter,
6831                path_variable,
6832                edge_properties,
6833                is_variable_length,
6834                optional_pattern_vars,
6835                scope_match_variables,
6836                edge_filter_expr,
6837                path_mode,
6838                qpp_steps,
6839            } => LogicalPlan::Traverse {
6840                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
6841                edge_type_ids,
6842                direction,
6843                source_variable,
6844                target_variable,
6845                target_label_id,
6846                step_variable,
6847                min_hops,
6848                max_hops,
6849                optional,
6850                target_filter,
6851                path_variable,
6852                edge_properties,
6853                is_variable_length,
6854                optional_pattern_vars,
6855                scope_match_variables,
6856                edge_filter_expr,
6857                path_mode,
6858                qpp_steps,
6859            },
6860            other => other,
6861        }
6862    }
6863}
6864
6865/// Get the expected column name for an aggregate expression.
6866///
6867/// This is the single source of truth for aggregate column naming, used by:
6868/// - Logical planner (to create column references)
6869/// - Physical planner (to rename DataFusion's auto-generated column names)
6870/// - Fallback executor (to name result columns)
6871pub fn aggregate_column_name(expr: &Expr) -> String {
6872    expr.to_string_repr()
6873}
6874
6875/// Output produced by `EXPLAIN` — a human-readable plan with index and cost info.
6876#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6877pub struct ExplainOutput {
6878    /// Debug-formatted logical plan tree.
6879    pub plan_text: String,
6880    /// Index availability report for each scan in the plan.
6881    pub index_usage: Vec<IndexUsage>,
6882    /// Rough row and cost estimates for the full plan.
6883    pub cost_estimates: CostEstimates,
6884    /// Planner warnings (e.g., missing index, forced full scan).
6885    pub warnings: Vec<String>,
6886    /// Suggested indexes that would improve this query.
6887    pub suggestions: Vec<IndexSuggestion>,
6888}
6889
6890/// Suggestion for creating an index to improve query performance.
6891#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6892pub struct IndexSuggestion {
6893    /// Label or edge type that would benefit from the index.
6894    pub label_or_type: String,
6895    /// Property to index.
6896    pub property: String,
6897    /// Recommended index type (e.g., `"SCALAR"`, `"VECTOR"`).
6898    pub index_type: String,
6899    /// Human-readable explanation of the performance benefit.
6900    pub reason: String,
6901    /// Ready-to-execute Cypher statement to create the index.
6902    pub create_statement: String,
6903}
6904
6905/// Index availability report for a single scan operator.
6906#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6907pub struct IndexUsage {
6908    pub label_or_type: String,
6909    pub property: String,
6910    pub index_type: String,
6911    /// Whether the index was actually used for this scan.
6912    pub used: bool,
6913    /// Human-readable explanation of why the index was or was not used.
6914    pub reason: Option<String>,
6915}
6916
6917/// Rough cost and row count estimates for a complete logical plan.
6918#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
6919pub struct CostEstimates {
6920    /// Estimated number of rows the plan will produce.
6921    pub estimated_rows: f64,
6922    /// Abstract cost units (lower is cheaper).
6923    pub estimated_cost: f64,
6924}
6925
6926impl QueryPlanner {
6927    /// Plan a query and produce an EXPLAIN report (plan text, index usage, costs).
6928    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
6929        let plan = self.plan(ast)?;
6930        self.explain_logical_plan(&plan)
6931    }
6932
6933    /// Produce an EXPLAIN report for an already-planned logical plan.
6934    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
6935        let index_usage = self.analyze_index_usage(plan)?;
6936        let cost_estimates = self.estimate_costs(plan)?;
6937        let suggestions = self.collect_index_suggestions(plan);
6938        let warnings = Vec::new();
6939        let plan_text = format!("{:#?}", plan);
6940
6941        Ok(ExplainOutput {
6942            plan_text,
6943            index_usage,
6944            cost_estimates,
6945            warnings,
6946            suggestions,
6947        })
6948    }
6949
6950    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
6951        let mut usage = Vec::new();
6952        self.collect_index_usage(plan, &mut usage);
6953        Ok(usage)
6954    }
6955
6956    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
6957        match plan {
6958            LogicalPlan::Scan { .. } => {
6959                // Placeholder: Scan might use index if it was optimized
6960                // Ideally LogicalPlan::Scan should store if it uses index.
6961                // But typically Planner converts Scan to specific index scan or we infer it here.
6962            }
6963            LogicalPlan::VectorKnn {
6964                label_id, property, ..
6965            } => {
6966                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
6967                usage.push(IndexUsage {
6968                    label_or_type: label_name.to_string(),
6969                    property: property.clone(),
6970                    index_type: "VECTOR".to_string(),
6971                    used: true,
6972                    reason: None,
6973                });
6974            }
6975            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
6976            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
6977            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
6978            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
6979            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
6980            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
6981            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
6982            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
6983                self.collect_index_usage(left, usage);
6984                self.collect_index_usage(right, usage);
6985            }
6986            _ => {}
6987        }
6988    }
6989
6990    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
6991        Ok(CostEstimates {
6992            estimated_rows: 100.0,
6993            estimated_cost: 10.0,
6994        })
6995    }
6996
6997    /// Collect index suggestions based on query patterns.
6998    ///
6999    /// Currently detects:
7000    /// - Temporal predicates from `uni.validAt()` function calls
7001    /// - Temporal predicates from `VALID_AT` macro expansion
7002    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
7003        let mut suggestions = Vec::new();
7004        self.collect_temporal_suggestions(plan, &mut suggestions);
7005        suggestions
7006    }
7007
7008    /// Recursively collect temporal index suggestions from the plan.
7009    fn collect_temporal_suggestions(
7010        &self,
7011        plan: &LogicalPlan,
7012        suggestions: &mut Vec<IndexSuggestion>,
7013    ) {
7014        match plan {
7015            LogicalPlan::Filter {
7016                input, predicate, ..
7017            } => {
7018                // Check for temporal patterns in the predicate
7019                self.detect_temporal_pattern(predicate, suggestions);
7020                // Recurse into input
7021                self.collect_temporal_suggestions(input, suggestions);
7022            }
7023            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
7024            LogicalPlan::Project { input, .. } => {
7025                self.collect_temporal_suggestions(input, suggestions)
7026            }
7027            LogicalPlan::Limit { input, .. } => {
7028                self.collect_temporal_suggestions(input, suggestions)
7029            }
7030            LogicalPlan::Sort { input, .. } => {
7031                self.collect_temporal_suggestions(input, suggestions)
7032            }
7033            LogicalPlan::Aggregate { input, .. } => {
7034                self.collect_temporal_suggestions(input, suggestions)
7035            }
7036            LogicalPlan::Traverse { input, .. } => {
7037                self.collect_temporal_suggestions(input, suggestions)
7038            }
7039            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
7040                self.collect_temporal_suggestions(left, suggestions);
7041                self.collect_temporal_suggestions(right, suggestions);
7042            }
7043            _ => {}
7044        }
7045    }
7046
7047    /// Detect temporal predicate patterns and suggest indexes.
7048    ///
7049    /// Detects two patterns:
7050    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
7051    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
7052    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
7053        match expr {
7054            // Pattern 1: uni.temporal.validAt() function call
7055            Expr::FunctionCall { name, args, .. }
7056                if name.eq_ignore_ascii_case("uni.temporal.validAt")
7057                    || name.eq_ignore_ascii_case("validAt") =>
7058            {
7059                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
7060                if args.len() >= 2 {
7061                    let start_prop =
7062                        if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1) {
7063                            s.clone()
7064                        } else {
7065                            "valid_from".to_string()
7066                        };
7067
7068                    // Try to extract label from the node expression
7069                    if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
7070                        self.suggest_temporal_index(&var, &start_prop, suggestions);
7071                    }
7072                }
7073            }
7074
7075            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
7076            Expr::BinaryOp {
7077                left,
7078                op: BinaryOp::And,
7079                right,
7080            } => {
7081                // Check left side for `prop <= time` pattern (temporal start condition)
7082                if let Expr::BinaryOp {
7083                    left: prop_expr,
7084                    op: BinaryOp::LtEq,
7085                    ..
7086                } = left.as_ref()
7087                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
7088                    && (prop_name == "valid_from"
7089                        || prop_name.contains("start")
7090                        || prop_name.contains("from")
7091                        || prop_name.contains("begin"))
7092                    && let Some(var) = base.extract_variable()
7093                {
7094                    self.suggest_temporal_index(&var, prop_name, suggestions);
7095                }
7096
7097                // Recurse into both sides of AND
7098                self.detect_temporal_pattern(left.as_ref(), suggestions);
7099                self.detect_temporal_pattern(right.as_ref(), suggestions);
7100            }
7101
7102            // Recurse into other binary ops
7103            Expr::BinaryOp { left, right, .. } => {
7104                self.detect_temporal_pattern(left.as_ref(), suggestions);
7105                self.detect_temporal_pattern(right.as_ref(), suggestions);
7106            }
7107
7108            _ => {}
7109        }
7110    }
7111
7112    /// Suggest a scalar index for a temporal property if one doesn't already exist.
7113    fn suggest_temporal_index(
7114        &self,
7115        _variable: &str,
7116        property: &str,
7117        suggestions: &mut Vec<IndexSuggestion>,
7118    ) {
7119        // Check if a scalar index already exists for this property
7120        // We need to check all labels since we may not know the exact label from the variable
7121        let mut has_index = false;
7122
7123        for index in &self.schema.indexes {
7124            if let IndexDefinition::Scalar(config) = index
7125                && config.properties.contains(&property.to_string())
7126            {
7127                has_index = true;
7128                break;
7129            }
7130        }
7131
7132        if !has_index {
7133            // Avoid duplicate suggestions
7134            let already_suggested = suggestions.iter().any(|s| s.property == property);
7135            if !already_suggested {
7136                suggestions.push(IndexSuggestion {
7137                    label_or_type: "(detected from temporal query)".to_string(),
7138                    property: property.to_string(),
7139                    index_type: "SCALAR (BTree)".to_string(),
7140                    reason: format!(
7141                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
7142                        property
7143                    ),
7144                    create_statement: format!(
7145                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
7146                        property, property
7147                    ),
7148                });
7149            }
7150        }
7151    }
7152
7153    /// Helper functions for expression normalization
7154    /// Normalize an expression for storage: strip variable prefixes
7155    /// For simple property: u.email -> "email"
7156    /// For expressions: lower(u.email) -> "lower(email)"
7157    fn normalize_expression_for_storage(expr: &Expr) -> String {
7158        match expr {
7159            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
7160            _ => {
7161                // Serialize expression and strip variable prefix
7162                let expr_str = expr.to_string_repr();
7163                Self::strip_variable_prefix(&expr_str)
7164            }
7165        }
7166    }
7167
7168    /// Strip variable references like "u.prop" from expression strings
7169    /// Converts "lower(u.email)" to "lower(email)"
7170    fn strip_variable_prefix(expr_str: &str) -> String {
7171        use regex::Regex;
7172        // Match patterns like "word.property" and replace with just "property"
7173        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
7174        re.replace_all(expr_str, "$1").to_string()
7175    }
7176
7177    /// Plan a schema command from the new AST
7178    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
7179        match cmd {
7180            SchemaCommand::CreateVectorIndex(c) => {
7181                // Parse index type from options (default: IvfPq)
7182                let opt = |key: &str| {
7183                    c.options
7184                        .get(key)
7185                        .and_then(|v| v.as_str())
7186                        .and_then(|s| s.parse::<u32>().ok())
7187                };
7188                let opt_u8 = |key: &str| -> Option<u8> {
7189                    c.options
7190                        .get(key)
7191                        .and_then(|v| v.as_str())
7192                        .and_then(|s| s.parse::<u8>().ok())
7193                };
7194                let index_type = match c.options.get("type").and_then(|v| v.as_str()) {
7195                    Some("flat") => VectorIndexType::Flat,
7196                    Some("ivf_flat") => VectorIndexType::IvfFlat {
7197                        num_partitions: opt("partitions").unwrap_or(256),
7198                    },
7199                    Some("ivf_sq") => VectorIndexType::IvfSq {
7200                        num_partitions: opt("partitions").unwrap_or(256),
7201                    },
7202                    Some("ivf_rq") => VectorIndexType::IvfRq {
7203                        num_partitions: opt("partitions").unwrap_or(256),
7204                        num_bits: opt_u8("num_bits"),
7205                    },
7206                    Some("hnsw_flat") => VectorIndexType::HnswFlat {
7207                        m: opt("m").unwrap_or(16),
7208                        ef_construction: opt("ef_construction").unwrap_or(200),
7209                        num_partitions: opt("partitions"),
7210                    },
7211                    Some("hnsw") | Some("hnsw_sq") => VectorIndexType::HnswSq {
7212                        m: opt("m").unwrap_or(16),
7213                        ef_construction: opt("ef_construction").unwrap_or(200),
7214                        num_partitions: opt("partitions"),
7215                    },
7216                    Some("hnsw_pq") => VectorIndexType::HnswPq {
7217                        m: opt("m").unwrap_or(16),
7218                        ef_construction: opt("ef_construction").unwrap_or(200),
7219                        num_sub_vectors: opt("sub_vectors").unwrap_or(16),
7220                        num_partitions: opt("partitions"),
7221                    },
7222                    _ => VectorIndexType::IvfPq {
7223                        num_partitions: opt("partitions").unwrap_or(256),
7224                        num_sub_vectors: opt("sub_vectors").unwrap_or(16),
7225                        bits_per_subvector: opt_u8("num_bits").unwrap_or(8),
7226                    },
7227                };
7228
7229                // Parse embedding config from options
7230                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
7231                    Self::parse_embedding_config(emb_val)?
7232                } else {
7233                    None
7234                };
7235
7236                let config = VectorIndexConfig {
7237                    name: c.name,
7238                    label: c.label,
7239                    property: c.property,
7240                    metric: DistanceMetric::Cosine,
7241                    index_type,
7242                    embedding_config,
7243                    metadata: Default::default(),
7244                };
7245                Ok(LogicalPlan::CreateVectorIndex {
7246                    config,
7247                    if_not_exists: c.if_not_exists,
7248                })
7249            }
7250            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
7251                config: FullTextIndexConfig {
7252                    name: cfg.name,
7253                    label: cfg.label,
7254                    properties: cfg.properties,
7255                    tokenizer: TokenizerConfig::Standard,
7256                    with_positions: true,
7257                    metadata: Default::default(),
7258                },
7259                if_not_exists: cfg.if_not_exists,
7260            }),
7261            SchemaCommand::CreateScalarIndex(cfg) => {
7262                // Convert expressions to storage strings (strip variable prefix)
7263                let properties: Vec<String> = cfg
7264                    .expressions
7265                    .iter()
7266                    .map(Self::normalize_expression_for_storage)
7267                    .collect();
7268
7269                Ok(LogicalPlan::CreateScalarIndex {
7270                    config: ScalarIndexConfig {
7271                        name: cfg.name,
7272                        label: cfg.label,
7273                        properties,
7274                        index_type: ScalarIndexType::BTree,
7275                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
7276                        metadata: Default::default(),
7277                    },
7278                    if_not_exists: cfg.if_not_exists,
7279                })
7280            }
7281            SchemaCommand::CreateJsonFtsIndex(cfg) => {
7282                let with_positions = cfg
7283                    .options
7284                    .get("with_positions")
7285                    .and_then(|v| v.as_bool())
7286                    .unwrap_or(false);
7287                Ok(LogicalPlan::CreateJsonFtsIndex {
7288                    config: JsonFtsIndexConfig {
7289                        name: cfg.name,
7290                        label: cfg.label,
7291                        column: cfg.column,
7292                        paths: Vec::new(),
7293                        with_positions,
7294                        metadata: Default::default(),
7295                    },
7296                    if_not_exists: cfg.if_not_exists,
7297                })
7298            }
7299            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
7300                name: drop.name,
7301                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
7302            }),
7303            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
7304            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
7305            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
7306            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
7307            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
7308            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
7309            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
7310            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
7311            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
7312            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
7313            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
7314            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
7315            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
7316            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
7317            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
7318            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
7319                destination: path,
7320                options: HashMap::new(),
7321            }),
7322            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
7323                label: cmd.label,
7324                path: cmd.path,
7325                format: cmd.format,
7326                options: cmd.options,
7327            }),
7328            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
7329                label: cmd.label,
7330                path: cmd.path,
7331                format: cmd.format,
7332                options: cmd.options,
7333            }),
7334        }
7335    }
7336
7337    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
7338        let obj = emb_val
7339            .as_object()
7340            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
7341
7342        // Parse alias (required)
7343        let alias = obj
7344            .get("alias")
7345            .and_then(|v| v.as_str())
7346            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
7347
7348        // Parse source properties (required)
7349        let source_properties = obj
7350            .get("source")
7351            .and_then(|v| v.as_array())
7352            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
7353            .iter()
7354            .filter_map(|v| v.as_str().map(|s| s.to_string()))
7355            .collect::<Vec<_>>();
7356
7357        if source_properties.is_empty() {
7358            return Err(anyhow!(
7359                "embedding.source must contain at least one property"
7360            ));
7361        }
7362
7363        let batch_size = obj
7364            .get("batch_size")
7365            .and_then(|v| v.as_u64())
7366            .map(|v| v as usize)
7367            .unwrap_or(32);
7368
7369        Ok(Some(EmbeddingConfig {
7370            alias: alias.to_string(),
7371            source_properties,
7372            batch_size,
7373        }))
7374    }
7375}
7376
7377/// Collect all properties referenced anywhere in the LogicalPlan tree.
7378///
7379/// This is critical for window functions: properties must be materialized
7380/// at the Scan node so they're available for window operations later.
7381///
7382/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
7383pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
7384    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
7385    collect_properties_recursive(plan, &mut properties);
7386    properties
7387}
7388
7389/// Recursively walk the LogicalPlan tree and collect all property references.
7390fn collect_properties_recursive(
7391    plan: &LogicalPlan,
7392    properties: &mut HashMap<String, HashSet<String>>,
7393) {
7394    match plan {
7395        LogicalPlan::Window {
7396            input,
7397            window_exprs,
7398        } => {
7399            // Collect from window expressions
7400            for expr in window_exprs {
7401                collect_properties_from_expr_into(expr, properties);
7402            }
7403            collect_properties_recursive(input, properties);
7404        }
7405        LogicalPlan::Project { input, projections } => {
7406            for (expr, _alias) in projections {
7407                collect_properties_from_expr_into(expr, properties);
7408            }
7409            collect_properties_recursive(input, properties);
7410        }
7411        LogicalPlan::Sort { input, order_by } => {
7412            for sort_item in order_by {
7413                collect_properties_from_expr_into(&sort_item.expr, properties);
7414            }
7415            collect_properties_recursive(input, properties);
7416        }
7417        LogicalPlan::Filter {
7418            input, predicate, ..
7419        } => {
7420            collect_properties_from_expr_into(predicate, properties);
7421            collect_properties_recursive(input, properties);
7422        }
7423        LogicalPlan::Aggregate {
7424            input,
7425            group_by,
7426            aggregates,
7427        } => {
7428            for expr in group_by {
7429                collect_properties_from_expr_into(expr, properties);
7430            }
7431            for expr in aggregates {
7432                collect_properties_from_expr_into(expr, properties);
7433            }
7434            collect_properties_recursive(input, properties);
7435        }
7436        LogicalPlan::Scan {
7437            filter: Some(expr), ..
7438        } => {
7439            collect_properties_from_expr_into(expr, properties);
7440        }
7441        LogicalPlan::Scan { filter: None, .. } => {}
7442        LogicalPlan::ExtIdLookup {
7443            filter: Some(expr), ..
7444        } => {
7445            collect_properties_from_expr_into(expr, properties);
7446        }
7447        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
7448        LogicalPlan::ScanAll {
7449            filter: Some(expr), ..
7450        } => {
7451            collect_properties_from_expr_into(expr, properties);
7452        }
7453        LogicalPlan::ScanAll { filter: None, .. } => {}
7454        LogicalPlan::ScanMainByLabels {
7455            filter: Some(expr), ..
7456        } => {
7457            collect_properties_from_expr_into(expr, properties);
7458        }
7459        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
7460        LogicalPlan::TraverseMainByType {
7461            input,
7462            target_filter,
7463            ..
7464        } => {
7465            if let Some(expr) = target_filter {
7466                collect_properties_from_expr_into(expr, properties);
7467            }
7468            collect_properties_recursive(input, properties);
7469        }
7470        LogicalPlan::Traverse {
7471            input,
7472            target_filter,
7473            step_variable: _,
7474            ..
7475        } => {
7476            if let Some(expr) = target_filter {
7477                collect_properties_from_expr_into(expr, properties);
7478            }
7479            // Note: Edge properties (step_variable) will be collected from expressions
7480            // that reference them. The edge_properties field in LogicalPlan is populated
7481            // later during physical planning based on this collected map.
7482            collect_properties_recursive(input, properties);
7483        }
7484        LogicalPlan::Unwind { input, expr, .. } => {
7485            collect_properties_from_expr_into(expr, properties);
7486            collect_properties_recursive(input, properties);
7487        }
7488        LogicalPlan::Create { input, pattern } => {
7489            // Mark variables referenced in CREATE patterns with "*" so plan_scan
7490            // adds structural projections (bare entity columns). Without this,
7491            // execute_create_pattern() can't find bound variables and creates
7492            // spurious new nodes instead of using existing MATCH'd ones.
7493            mark_pattern_variables(pattern, properties);
7494            collect_properties_recursive(input, properties);
7495        }
7496        LogicalPlan::CreateBatch { input, patterns } => {
7497            for pattern in patterns {
7498                mark_pattern_variables(pattern, properties);
7499            }
7500            collect_properties_recursive(input, properties);
7501        }
7502        LogicalPlan::Merge {
7503            input,
7504            pattern,
7505            on_match,
7506            on_create,
7507        } => {
7508            mark_pattern_variables(pattern, properties);
7509            if let Some(set_clause) = on_match {
7510                mark_set_item_variables(&set_clause.items, properties);
7511            }
7512            if let Some(set_clause) = on_create {
7513                mark_set_item_variables(&set_clause.items, properties);
7514            }
7515            collect_properties_recursive(input, properties);
7516        }
7517        LogicalPlan::Set { input, items } => {
7518            mark_set_item_variables(items, properties);
7519            collect_properties_recursive(input, properties);
7520        }
7521        LogicalPlan::Remove { input, items } => {
7522            for item in items {
7523                match item {
7524                    RemoveItem::Property(expr) => {
7525                        // REMOVE n.prop — collect the property and mark the variable
7526                        // with "*" so full structural projection is applied.
7527                        collect_properties_from_expr_into(expr, properties);
7528                        if let Expr::Property(base, _) = expr
7529                            && let Expr::Variable(var) = base.as_ref()
7530                        {
7531                            properties
7532                                .entry(var.clone())
7533                                .or_default()
7534                                .insert("*".to_string());
7535                        }
7536                    }
7537                    RemoveItem::Labels { variable, .. } => {
7538                        // REMOVE n:Label — mark n with "*"
7539                        properties
7540                            .entry(variable.clone())
7541                            .or_default()
7542                            .insert("*".to_string());
7543                    }
7544                }
7545            }
7546            collect_properties_recursive(input, properties);
7547        }
7548        LogicalPlan::Delete { input, items, .. } => {
7549            for expr in items {
7550                collect_properties_from_expr_into(expr, properties);
7551            }
7552            collect_properties_recursive(input, properties);
7553        }
7554        LogicalPlan::Foreach {
7555            input, list, body, ..
7556        } => {
7557            collect_properties_from_expr_into(list, properties);
7558            for plan in body {
7559                collect_properties_recursive(plan, properties);
7560            }
7561            collect_properties_recursive(input, properties);
7562        }
7563        LogicalPlan::Limit { input, .. } => {
7564            collect_properties_recursive(input, properties);
7565        }
7566        LogicalPlan::CrossJoin { left, right } => {
7567            collect_properties_recursive(left, properties);
7568            collect_properties_recursive(right, properties);
7569        }
7570        LogicalPlan::Apply {
7571            input,
7572            subquery,
7573            input_filter,
7574        } => {
7575            if let Some(expr) = input_filter {
7576                collect_properties_from_expr_into(expr, properties);
7577            }
7578            collect_properties_recursive(input, properties);
7579            collect_properties_recursive(subquery, properties);
7580        }
7581        LogicalPlan::Union { left, right, .. } => {
7582            collect_properties_recursive(left, properties);
7583            collect_properties_recursive(right, properties);
7584        }
7585        LogicalPlan::RecursiveCTE {
7586            initial, recursive, ..
7587        } => {
7588            collect_properties_recursive(initial, properties);
7589            collect_properties_recursive(recursive, properties);
7590        }
7591        LogicalPlan::ProcedureCall { arguments, .. } => {
7592            for arg in arguments {
7593                collect_properties_from_expr_into(arg, properties);
7594            }
7595        }
7596        LogicalPlan::VectorKnn { query, .. } => {
7597            collect_properties_from_expr_into(query, properties);
7598        }
7599        LogicalPlan::InvertedIndexLookup { terms, .. } => {
7600            collect_properties_from_expr_into(terms, properties);
7601        }
7602        LogicalPlan::ShortestPath { input, .. } => {
7603            collect_properties_recursive(input, properties);
7604        }
7605        LogicalPlan::AllShortestPaths { input, .. } => {
7606            collect_properties_recursive(input, properties);
7607        }
7608        LogicalPlan::Distinct { input } => {
7609            collect_properties_recursive(input, properties);
7610        }
7611        LogicalPlan::QuantifiedPattern {
7612            input,
7613            pattern_plan,
7614            ..
7615        } => {
7616            collect_properties_recursive(input, properties);
7617            collect_properties_recursive(pattern_plan, properties);
7618        }
7619        LogicalPlan::BindZeroLengthPath { input, .. } => {
7620            collect_properties_recursive(input, properties);
7621        }
7622        LogicalPlan::BindPath { input, .. } => {
7623            collect_properties_recursive(input, properties);
7624        }
7625        LogicalPlan::SubqueryCall { input, subquery } => {
7626            collect_properties_recursive(input, properties);
7627            collect_properties_recursive(subquery, properties);
7628        }
7629        LogicalPlan::LocyProject {
7630            input, projections, ..
7631        } => {
7632            for (expr, _alias) in projections {
7633                match expr {
7634                    // Bare variable in LocyProject: only need _vid for node variables
7635                    // (plan_locy_project extracts VID directly). Adding "*" would create
7636                    // a structural Struct column that conflicts with derived scan columns.
7637                    Expr::Variable(name) if !name.contains('.') => {
7638                        properties
7639                            .entry(name.clone())
7640                            .or_default()
7641                            .insert("_vid".to_string());
7642                    }
7643                    _ => collect_properties_from_expr_into(expr, properties),
7644                }
7645            }
7646            collect_properties_recursive(input, properties);
7647        }
7648        LogicalPlan::LocyFold {
7649            input,
7650            fold_bindings,
7651            ..
7652        } => {
7653            for (_name, expr) in fold_bindings {
7654                collect_properties_from_expr_into(expr, properties);
7655            }
7656            collect_properties_recursive(input, properties);
7657        }
7658        LogicalPlan::LocyBestBy {
7659            input, criteria, ..
7660        } => {
7661            for (expr, _asc) in criteria {
7662                collect_properties_from_expr_into(expr, properties);
7663            }
7664            collect_properties_recursive(input, properties);
7665        }
7666        LogicalPlan::LocyPriority { input, .. } => {
7667            collect_properties_recursive(input, properties);
7668        }
7669        // DDL and other plans don't reference properties
7670        _ => {}
7671    }
7672}
7673
7674/// Mark target variables from SET items with "*" and collect value expressions.
7675fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
7676    for item in items {
7677        match item {
7678            SetItem::Property { expr, value } => {
7679                // SET n.prop = val — mark n via the property expr, collect from value.
7680                // Also mark the variable with "*" for full structural projection so
7681                // edge identity fields (_src/_dst) are available for write operations.
7682                collect_properties_from_expr_into(expr, properties);
7683                collect_properties_from_expr_into(value, properties);
7684                if let Expr::Property(base, _) = expr
7685                    && let Expr::Variable(var) = base.as_ref()
7686                {
7687                    properties
7688                        .entry(var.clone())
7689                        .or_default()
7690                        .insert("*".to_string());
7691                }
7692            }
7693            SetItem::Labels { variable, .. } => {
7694                // SET n:Label — need full access to n
7695                properties
7696                    .entry(variable.clone())
7697                    .or_default()
7698                    .insert("*".to_string());
7699            }
7700            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
7701                // SET n = {props} or SET n += {props}
7702                properties
7703                    .entry(variable.clone())
7704                    .or_default()
7705                    .insert("*".to_string());
7706                collect_properties_from_expr_into(value, properties);
7707            }
7708        }
7709    }
7710}
7711
7712/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
7713/// adds structural projections (bare entity Struct columns) for them.
7714/// This is needed so that execute_create_pattern() can find bound variables
7715/// in the row HashMap and reuse existing nodes instead of creating new ones.
7716fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
7717    for path in &pattern.paths {
7718        if let Some(ref v) = path.variable {
7719            properties
7720                .entry(v.clone())
7721                .or_default()
7722                .insert("*".to_string());
7723        }
7724        for element in &path.elements {
7725            match element {
7726                PatternElement::Node(n) => {
7727                    if let Some(ref v) = n.variable {
7728                        properties
7729                            .entry(v.clone())
7730                            .or_default()
7731                            .insert("*".to_string());
7732                    }
7733                    // Also collect properties from inline property expressions
7734                    if let Some(ref props) = n.properties {
7735                        collect_properties_from_expr_into(props, properties);
7736                    }
7737                }
7738                PatternElement::Relationship(r) => {
7739                    if let Some(ref v) = r.variable {
7740                        properties
7741                            .entry(v.clone())
7742                            .or_default()
7743                            .insert("*".to_string());
7744                    }
7745                    if let Some(ref props) = r.properties {
7746                        collect_properties_from_expr_into(props, properties);
7747                    }
7748                }
7749                PatternElement::Parenthesized { pattern, .. } => {
7750                    let sub = Pattern {
7751                        paths: vec![pattern.as_ref().clone()],
7752                    };
7753                    mark_pattern_variables(&sub, properties);
7754                }
7755            }
7756        }
7757    }
7758}
7759
7760/// Collect properties from an expression into a HashMap.
7761fn collect_properties_from_expr_into(
7762    expr: &Expr,
7763    properties: &mut HashMap<String, HashSet<String>>,
7764) {
7765    match expr {
7766        Expr::PatternComprehension {
7767            where_clause,
7768            map_expr,
7769            ..
7770        } => {
7771            // Collect properties from the WHERE clause and map expression.
7772            // The pattern itself creates local bindings that don't need
7773            // property collection from the outer scope.
7774            if let Some(where_expr) = where_clause {
7775                collect_properties_from_expr_into(where_expr, properties);
7776            }
7777            collect_properties_from_expr_into(map_expr, properties);
7778        }
7779        Expr::Variable(name) => {
7780            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
7781            if let Some((var, prop)) = name.split_once('.') {
7782                properties
7783                    .entry(var.to_string())
7784                    .or_default()
7785                    .insert(prop.to_string());
7786            } else {
7787                // Bare variable (e.g., RETURN n) — needs all properties materialized
7788                properties
7789                    .entry(name.clone())
7790                    .or_default()
7791                    .insert("*".to_string());
7792            }
7793        }
7794        Expr::Property(base, name) => {
7795            // Extract variable name from the base expression
7796            if let Expr::Variable(var) = base.as_ref() {
7797                properties
7798                    .entry(var.clone())
7799                    .or_default()
7800                    .insert(name.clone());
7801                // Don't recurse into Variable — that would mark it as a bare
7802                // variable reference (adding "*") when it's just a property base.
7803            } else {
7804                // Recurse for complex base expressions (nested property, function call, etc.)
7805                collect_properties_from_expr_into(base, properties);
7806            }
7807        }
7808        Expr::BinaryOp { left, right, .. } => {
7809            collect_properties_from_expr_into(left, properties);
7810            collect_properties_from_expr_into(right, properties);
7811        }
7812        Expr::FunctionCall {
7813            name,
7814            args,
7815            window_spec,
7816            ..
7817        } => {
7818            // Analyze function for property requirements (pushdown hydration)
7819            analyze_function_property_requirements(name, args, properties);
7820
7821            // Collect from arguments
7822            for arg in args {
7823                collect_properties_from_expr_into(arg, properties);
7824            }
7825
7826            // Collect from window spec (PARTITION BY, ORDER BY)
7827            if let Some(spec) = window_spec {
7828                for part_expr in &spec.partition_by {
7829                    collect_properties_from_expr_into(part_expr, properties);
7830                }
7831                for sort_item in &spec.order_by {
7832                    collect_properties_from_expr_into(&sort_item.expr, properties);
7833                }
7834            }
7835        }
7836        Expr::UnaryOp { expr, .. } => {
7837            collect_properties_from_expr_into(expr, properties);
7838        }
7839        Expr::List(items) => {
7840            for item in items {
7841                collect_properties_from_expr_into(item, properties);
7842            }
7843        }
7844        Expr::Map(entries) => {
7845            for (_key, value) in entries {
7846                collect_properties_from_expr_into(value, properties);
7847            }
7848        }
7849        Expr::ListComprehension {
7850            list,
7851            where_clause,
7852            map_expr,
7853            ..
7854        } => {
7855            collect_properties_from_expr_into(list, properties);
7856            if let Some(where_expr) = where_clause {
7857                collect_properties_from_expr_into(where_expr, properties);
7858            }
7859            collect_properties_from_expr_into(map_expr, properties);
7860        }
7861        Expr::Case {
7862            expr,
7863            when_then,
7864            else_expr,
7865        } => {
7866            if let Some(scrutinee_expr) = expr {
7867                collect_properties_from_expr_into(scrutinee_expr, properties);
7868            }
7869            for (when, then) in when_then {
7870                collect_properties_from_expr_into(when, properties);
7871                collect_properties_from_expr_into(then, properties);
7872            }
7873            if let Some(default_expr) = else_expr {
7874                collect_properties_from_expr_into(default_expr, properties);
7875            }
7876        }
7877        Expr::Quantifier {
7878            list, predicate, ..
7879        } => {
7880            collect_properties_from_expr_into(list, properties);
7881            collect_properties_from_expr_into(predicate, properties);
7882        }
7883        Expr::Reduce {
7884            init, list, expr, ..
7885        } => {
7886            collect_properties_from_expr_into(init, properties);
7887            collect_properties_from_expr_into(list, properties);
7888            collect_properties_from_expr_into(expr, properties);
7889        }
7890        Expr::Exists { query, .. } => {
7891            // Walk into EXISTS body to collect property references for outer-scope variables.
7892            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
7893            // are included in the outer scan's property list. Extra properties collected for
7894            // inner-only variables are harmless — the outer scan ignores unknown variable names.
7895            collect_properties_from_subquery(query, properties);
7896        }
7897        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
7898            collect_properties_from_subquery(query, properties);
7899        }
7900        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
7901            collect_properties_from_expr_into(expr, properties);
7902        }
7903        Expr::In { expr, list } => {
7904            collect_properties_from_expr_into(expr, properties);
7905            collect_properties_from_expr_into(list, properties);
7906        }
7907        Expr::ArrayIndex { array, index } => {
7908            if let Expr::Variable(var) = array.as_ref() {
7909                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
7910                    // Static string key: e['name'] → only need that specific property
7911                    properties
7912                        .entry(var.clone())
7913                        .or_default()
7914                        .insert(prop_name.clone());
7915                } else {
7916                    // Dynamic property access: e[prop] → need all properties
7917                    properties
7918                        .entry(var.clone())
7919                        .or_default()
7920                        .insert("*".to_string());
7921                }
7922            }
7923            collect_properties_from_expr_into(array, properties);
7924            collect_properties_from_expr_into(index, properties);
7925        }
7926        Expr::ArraySlice { array, start, end } => {
7927            collect_properties_from_expr_into(array, properties);
7928            if let Some(start_expr) = start {
7929                collect_properties_from_expr_into(start_expr, properties);
7930            }
7931            if let Some(end_expr) = end {
7932                collect_properties_from_expr_into(end_expr, properties);
7933            }
7934        }
7935        Expr::ValidAt {
7936            entity,
7937            timestamp,
7938            start_prop,
7939            end_prop,
7940        } => {
7941            // Extract property requirements from ValidAt expression
7942            if let Expr::Variable(var) = entity.as_ref() {
7943                if let Some(prop) = start_prop {
7944                    properties
7945                        .entry(var.clone())
7946                        .or_default()
7947                        .insert(prop.clone());
7948                }
7949                if let Some(prop) = end_prop {
7950                    properties
7951                        .entry(var.clone())
7952                        .or_default()
7953                        .insert(prop.clone());
7954                }
7955            }
7956            collect_properties_from_expr_into(entity, properties);
7957            collect_properties_from_expr_into(timestamp, properties);
7958        }
7959        Expr::MapProjection { base, items } => {
7960            collect_properties_from_expr_into(base, properties);
7961            for item in items {
7962                match item {
7963                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
7964                        if let Expr::Variable(var) = base.as_ref() {
7965                            properties
7966                                .entry(var.clone())
7967                                .or_default()
7968                                .insert(prop.clone());
7969                        }
7970                    }
7971                    uni_cypher::ast::MapProjectionItem::AllProperties => {
7972                        if let Expr::Variable(var) = base.as_ref() {
7973                            properties
7974                                .entry(var.clone())
7975                                .or_default()
7976                                .insert("*".to_string());
7977                        }
7978                    }
7979                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
7980                        collect_properties_from_expr_into(expr, properties);
7981                    }
7982                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
7983                }
7984            }
7985        }
7986        Expr::LabelCheck { expr, .. } => {
7987            collect_properties_from_expr_into(expr, properties);
7988        }
7989        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
7990        // Mark them with "*" so the outer scan produces structural projections that
7991        // extract_row_params can resolve.
7992        Expr::Parameter(name) => {
7993            properties
7994                .entry(name.clone())
7995                .or_default()
7996                .insert("*".to_string());
7997        }
7998        // Literals and wildcard don't reference properties
7999        Expr::Literal(_) | Expr::Wildcard => {}
8000    }
8001}
8002
8003/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
8004///
8005/// This is needed so that correlated property accesses like `a.city` inside
8006/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
8007/// scan to include `a.city` in its projected columns.
8008fn collect_properties_from_subquery(
8009    query: &Query,
8010    properties: &mut HashMap<String, HashSet<String>>,
8011) {
8012    match query {
8013        Query::Single(stmt) => {
8014            for clause in &stmt.clauses {
8015                match clause {
8016                    Clause::Match(m) => {
8017                        if let Some(ref wc) = m.where_clause {
8018                            collect_properties_from_expr_into(wc, properties);
8019                        }
8020                    }
8021                    Clause::With(w) => {
8022                        for item in &w.items {
8023                            if let ReturnItem::Expr { expr, .. } = item {
8024                                collect_properties_from_expr_into(expr, properties);
8025                            }
8026                        }
8027                        if let Some(ref wc) = w.where_clause {
8028                            collect_properties_from_expr_into(wc, properties);
8029                        }
8030                    }
8031                    Clause::Return(r) => {
8032                        for item in &r.items {
8033                            if let ReturnItem::Expr { expr, .. } = item {
8034                                collect_properties_from_expr_into(expr, properties);
8035                            }
8036                        }
8037                    }
8038                    _ => {}
8039                }
8040            }
8041        }
8042        Query::Union { left, right, .. } => {
8043            collect_properties_from_subquery(left, properties);
8044            collect_properties_from_subquery(right, properties);
8045        }
8046        _ => {}
8047    }
8048}
8049
8050/// Analyze function calls to extract property requirements for pushdown hydration
8051///
8052/// This function examines function calls and their arguments to determine which properties
8053/// need to be loaded for entity arguments. For example:
8054/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
8055/// - keys(n) -> n needs all properties (*)
8056///
8057/// The extracted requirements are added to the properties map for later use during
8058/// scan planning.
8059fn analyze_function_property_requirements(
8060    name: &str,
8061    args: &[Expr],
8062    properties: &mut HashMap<String, HashSet<String>>,
8063) {
8064    use crate::query::function_props::get_function_spec;
8065
8066    /// Helper to mark a variable as needing all properties.
8067    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
8068        properties
8069            .entry(var.to_string())
8070            .or_default()
8071            .insert("*".to_string());
8072    }
8073
8074    let Some(spec) = get_function_spec(name) else {
8075        // Unknown function: conservatively require all properties for variable args
8076        for arg in args {
8077            if let Expr::Variable(var) = arg {
8078                mark_wildcard(var, properties);
8079            }
8080        }
8081        return;
8082    };
8083
8084    // Extract property names from string literal arguments
8085    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
8086        let entity_arg = args.get(entity_arg_idx);
8087        let prop_arg = args.get(prop_arg_idx);
8088
8089        match (entity_arg, prop_arg) {
8090            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
8091                properties
8092                    .entry(var.clone())
8093                    .or_default()
8094                    .insert(prop.clone());
8095            }
8096            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
8097                // Parameter property name: need all properties
8098                mark_wildcard(var, properties);
8099            }
8100            _ => {}
8101        }
8102    }
8103
8104    // Handle full entity requirement (keys(), properties())
8105    if spec.needs_full_entity {
8106        for &idx in spec.entity_args {
8107            if let Some(Expr::Variable(var)) = args.get(idx) {
8108                mark_wildcard(var, properties);
8109            }
8110        }
8111    }
8112}
8113
8114#[cfg(test)]
8115mod pushdown_tests {
8116    use super::*;
8117
8118    #[test]
8119    fn test_validat_extracts_property_names() {
8120        // validAt(e, 'start', 'end', ts) → e: {start, end}
8121        let mut properties = HashMap::new();
8122
8123        let args = vec![
8124            Expr::Variable("e".to_string()),
8125            Expr::Literal(CypherLiteral::String("start".to_string())),
8126            Expr::Literal(CypherLiteral::String("end".to_string())),
8127            Expr::Variable("ts".to_string()),
8128        ];
8129
8130        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
8131
8132        assert!(properties.contains_key("e"));
8133        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
8134            .iter()
8135            .cloned()
8136            .collect();
8137        assert_eq!(properties.get("e").unwrap(), &e_props);
8138    }
8139
8140    #[test]
8141    fn test_keys_requires_wildcard() {
8142        // keys(n) → n: {*}
8143        let mut properties = HashMap::new();
8144
8145        let args = vec![Expr::Variable("n".to_string())];
8146
8147        analyze_function_property_requirements("keys", &args, &mut properties);
8148
8149        assert!(properties.contains_key("n"));
8150        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8151        assert_eq!(properties.get("n").unwrap(), &n_props);
8152    }
8153
8154    #[test]
8155    fn test_properties_requires_wildcard() {
8156        // properties(n) → n: {*}
8157        let mut properties = HashMap::new();
8158
8159        let args = vec![Expr::Variable("n".to_string())];
8160
8161        analyze_function_property_requirements("properties", &args, &mut properties);
8162
8163        assert!(properties.contains_key("n"));
8164        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8165        assert_eq!(properties.get("n").unwrap(), &n_props);
8166    }
8167
8168    #[test]
8169    fn test_unknown_function_conservative() {
8170        // customUdf(e) → e: {*}
8171        let mut properties = HashMap::new();
8172
8173        let args = vec![Expr::Variable("e".to_string())];
8174
8175        analyze_function_property_requirements("customUdf", &args, &mut properties);
8176
8177        assert!(properties.contains_key("e"));
8178        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
8179        assert_eq!(properties.get("e").unwrap(), &e_props);
8180    }
8181
8182    #[test]
8183    fn test_parameter_property_name() {
8184        // validAt(e, $start, $end, ts) → e: {*}
8185        let mut properties = HashMap::new();
8186
8187        let args = vec![
8188            Expr::Variable("e".to_string()),
8189            Expr::Parameter("start".to_string()),
8190            Expr::Parameter("end".to_string()),
8191            Expr::Variable("ts".to_string()),
8192        ];
8193
8194        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
8195
8196        assert!(properties.contains_key("e"));
8197        assert!(properties.get("e").unwrap().contains("*"));
8198    }
8199
8200    #[test]
8201    fn test_validat_expr_extracts_properties() {
8202        // Test Expr::ValidAt variant property extraction
8203        let mut properties = HashMap::new();
8204
8205        let validat_expr = Expr::ValidAt {
8206            entity: Box::new(Expr::Variable("e".to_string())),
8207            timestamp: Box::new(Expr::Variable("ts".to_string())),
8208            start_prop: Some("valid_from".to_string()),
8209            end_prop: Some("valid_to".to_string()),
8210        };
8211
8212        collect_properties_from_expr_into(&validat_expr, &mut properties);
8213
8214        assert!(properties.contains_key("e"));
8215        assert!(properties.get("e").unwrap().contains("valid_from"));
8216        assert!(properties.get("e").unwrap().contains("valid_to"));
8217    }
8218
8219    #[test]
8220    fn test_array_index_requires_wildcard() {
8221        // e[prop] → e: {*}
8222        let mut properties = HashMap::new();
8223
8224        let array_index_expr = Expr::ArrayIndex {
8225            array: Box::new(Expr::Variable("e".to_string())),
8226            index: Box::new(Expr::Variable("prop".to_string())),
8227        };
8228
8229        collect_properties_from_expr_into(&array_index_expr, &mut properties);
8230
8231        assert!(properties.contains_key("e"));
8232        assert!(properties.get("e").unwrap().contains("*"));
8233    }
8234
8235    #[test]
8236    fn test_property_access_extraction() {
8237        // e.name → e: {name}
8238        let mut properties = HashMap::new();
8239
8240        let prop_access = Expr::Property(
8241            Box::new(Expr::Variable("e".to_string())),
8242            "name".to_string(),
8243        );
8244
8245        collect_properties_from_expr_into(&prop_access, &mut properties);
8246
8247        assert!(properties.contains_key("e"));
8248        assert!(properties.get("e").unwrap().contains("name"));
8249    }
8250}