Skip to main content

uni_query/query/
planner.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use crate::query::pushdown::{PredicateAnalyzer, try_label_or_to_union, try_type_or_to_union};
5use anyhow::{Result, anyhow};
6use arrow_array::RecordBatch;
7use arrow_schema::{DataType, SchemaRef};
8use parking_lot::RwLock;
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11use uni_common::Value;
12use uni_common::core::schema::{
13    DistanceMetric, EmbeddingConfig, FullTextIndexConfig, IndexDefinition, JsonFtsIndexConfig,
14    ScalarIndexConfig, ScalarIndexType, Schema, TokenizerConfig, VectorIndexConfig,
15    VectorIndexType,
16};
17use uni_cypher::ast::{
18    AlterEdgeType, AlterLabel, BinaryOp, CallKind, Clause, CreateConstraint, CreateEdgeType,
19    CreateLabel, CypherLiteral, Direction, DropConstraint, DropEdgeType, DropLabel, Expr,
20    MatchClause, MergeClause, NodePattern, PathPattern, Pattern, PatternElement, Query,
21    RelationshipPattern, RemoveItem, ReturnClause, ReturnItem, SchemaCommand, SetClause, SetItem,
22    ShortestPathMode, ShowConstraints, SortItem, Statement, WindowSpec, WithClause,
23    WithRecursiveClause,
24};
25
26/// Sentinel column name inserted into a variable's property set to request
27/// that the planner build the bare struct column (`add_structural_projection`)
28/// WITHOUT pulling the full schema.
29///
30/// Emitted by `mark_set_item_variables` for `SetItem::Property` targets only.
31/// Other SET variants (`Labels`, `Variable`, `VariablePlus`) and REMOVE still
32/// emit `"*"` because they replace/merge the whole node.
33///
34/// **Union semantics:** When both `"*"` and the sentinel appear in the same
35/// variable's HashSet (e.g. `SET n.x = 1 RETURN n` collects both), `"*"`
36/// dominates — schema expansion still happens. The sentinel only changes
37/// behavior when it's the sole structural marker present.
38///
39/// Reserved-name convention: the double-underscore prefix marks this as
40/// internal. Schema validation should reject user-declared properties with
41/// this name (deferred follow-up).
42pub(crate) const STRUCT_ONLY_SENTINEL: &str = "__set_struct__";
43
44/// Type of variable in scope for semantic validation.
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum VariableType {
47    /// Node variable (from MATCH (n), CREATE (n), etc.)
48    Node,
49    /// Edge/relationship variable (from `MATCH ()-[r]->()`, etc.)
50    Edge,
51    /// Path variable (from `MATCH p = (a)-[*]->(b)`, etc.)
52    Path,
53    /// Scalar variable (from WITH expr AS x, UNWIND list AS item, etc.)
54    /// Could hold a map or dynamic value — property access is allowed.
55    Scalar,
56    /// Scalar from a known non-graph literal (int, float, bool, string, list).
57    /// Property access is NOT allowed on these at compile time.
58    ScalarLiteral,
59    /// Imported from outer scope with unknown type (from plan_with_scope string vars).
60    /// Compatible with any concrete type — allows subqueries to re-bind the variable.
61    Imported,
62}
63
64impl VariableType {
65    /// Returns true if this type is compatible with the expected type.
66    ///
67    /// `Imported` is always compatible because the actual type is unknown at plan time.
68    fn is_compatible_with(self, expected: VariableType) -> bool {
69        self == expected
70            || self == VariableType::Imported
71            // ScalarLiteral behaves like Scalar for compatibility checks
72            || (self == VariableType::ScalarLiteral && expected == VariableType::Scalar)
73    }
74}
75
76/// Information about a variable in scope during planning.
77#[derive(Debug, Clone)]
78pub struct VariableInfo {
79    /// Variable name as written in the query.
80    pub name: String,
81    /// Semantic type of the variable.
82    pub var_type: VariableType,
83    /// True if this is a variable-length path (VLP) step variable.
84    ///
85    /// VLP step variables are typed as Edge but semantically hold edge lists.
86    pub is_vlp: bool,
87}
88
89impl VariableInfo {
90    pub fn new(name: String, var_type: VariableType) -> Self {
91        Self {
92            name,
93            var_type,
94            is_vlp: false,
95        }
96    }
97}
98
99/// Find a variable in scope by name.
100fn find_var_in_scope<'a>(vars: &'a [VariableInfo], name: &str) -> Option<&'a VariableInfo> {
101    vars.iter().find(|v| v.name == name)
102}
103
104/// Check if a variable is in scope.
105fn is_var_in_scope(vars: &[VariableInfo], name: &str) -> bool {
106    find_var_in_scope(vars, name).is_some()
107}
108
109/// Check if an expression contains a pattern predicate.
110fn contains_pattern_predicate(expr: &Expr) -> bool {
111    if matches!(
112        expr,
113        Expr::Exists {
114            from_pattern_predicate: true,
115            ..
116        }
117    ) {
118        return true;
119    }
120    let mut found = false;
121    expr.for_each_child(&mut |child| {
122        if !found {
123            found = contains_pattern_predicate(child);
124        }
125    });
126    found
127}
128
129/// Add a variable to scope with type conflict validation.
130/// Returns an error if the variable already exists with a different type.
131fn add_var_to_scope(
132    vars: &mut Vec<VariableInfo>,
133    name: &str,
134    var_type: VariableType,
135) -> Result<()> {
136    if name.is_empty() {
137        return Ok(());
138    }
139
140    if let Some(existing) = vars.iter_mut().find(|v| v.name == name) {
141        if existing.var_type == VariableType::Imported {
142            // Imported vars upgrade to the concrete type
143            existing.var_type = var_type;
144        } else if var_type == VariableType::Imported || existing.var_type == var_type {
145            // New type is Imported (keep existing) or same type — no conflict
146        } else if matches!(
147            existing.var_type,
148            VariableType::Scalar | VariableType::ScalarLiteral
149        ) && matches!(var_type, VariableType::Node | VariableType::Edge)
150        {
151            // Scalar can be used as Node/Edge in CREATE context — a scalar
152            // holding a node/edge reference is valid for pattern use
153            existing.var_type = var_type;
154        } else {
155            return Err(anyhow!(
156                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as {:?}",
157                name,
158                existing.var_type,
159                var_type
160            ));
161        }
162    } else {
163        vars.push(VariableInfo::new(name.to_string(), var_type));
164    }
165    Ok(())
166}
167
168/// Convert VariableInfo vec to String vec for backward compatibility
169fn vars_to_strings(vars: &[VariableInfo]) -> Vec<String> {
170    vars.iter().map(|v| v.name.clone()).collect()
171}
172
173fn infer_with_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
174    match expr {
175        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
176            .map(|info| info.var_type)
177            .unwrap_or(VariableType::Scalar),
178        Expr::Literal(CypherLiteral::Null) => VariableType::Imported,
179        // Known non-graph literals: property access is NOT valid on these.
180        Expr::Literal(CypherLiteral::Integer(_))
181        | Expr::Literal(CypherLiteral::Float(_))
182        | Expr::Literal(CypherLiteral::String(_))
183        | Expr::Literal(CypherLiteral::Bool(_))
184        | Expr::Literal(CypherLiteral::Bytes(_)) => VariableType::ScalarLiteral,
185        Expr::FunctionCall { name, args, .. } => {
186            let lower = name.to_lowercase();
187            if lower == "coalesce" {
188                infer_coalesce_type(args, vars_in_scope)
189            } else if lower == "collect" && !args.is_empty() {
190                let collected = infer_with_output_type(&args[0], vars_in_scope);
191                if matches!(
192                    collected,
193                    VariableType::Node
194                        | VariableType::Edge
195                        | VariableType::Path
196                        | VariableType::Imported
197                ) {
198                    collected
199                } else {
200                    VariableType::Scalar
201                }
202            } else {
203                VariableType::Scalar
204            }
205        }
206        // WITH list literals/expressions produce scalar list values. Preserving
207        // entity typing here causes invalid node/edge reuse in later MATCH clauses
208        // (e.g. WITH [n] AS users; MATCH (users)-->() should fail at compile time).
209        // Lists are ScalarLiteral since property access is not valid on them.
210        Expr::List(_) => VariableType::ScalarLiteral,
211        _ => VariableType::Scalar,
212    }
213}
214
215fn infer_coalesce_type(args: &[Expr], vars_in_scope: &[VariableInfo]) -> VariableType {
216    let mut resolved: Option<VariableType> = None;
217    let mut saw_imported = false;
218    for arg in args {
219        let t = infer_with_output_type(arg, vars_in_scope);
220        match t {
221            VariableType::Node | VariableType::Edge | VariableType::Path => {
222                if let Some(existing) = resolved {
223                    if existing != t {
224                        return VariableType::Scalar;
225                    }
226                } else {
227                    resolved = Some(t);
228                }
229            }
230            VariableType::Imported => saw_imported = true,
231            VariableType::Scalar | VariableType::ScalarLiteral => {}
232        }
233    }
234    if let Some(t) = resolved {
235        t
236    } else if saw_imported {
237        VariableType::Imported
238    } else {
239        VariableType::Scalar
240    }
241}
242
243fn infer_unwind_output_type(expr: &Expr, vars_in_scope: &[VariableInfo]) -> VariableType {
244    match expr {
245        Expr::Variable(v) => find_var_in_scope(vars_in_scope, v)
246            .map(|info| info.var_type)
247            .unwrap_or(VariableType::Scalar),
248        Expr::FunctionCall { name, args, .. }
249            if name.eq_ignore_ascii_case("collect") && !args.is_empty() =>
250        {
251            infer_with_output_type(&args[0], vars_in_scope)
252        }
253        Expr::List(items) => {
254            let mut inferred: Option<VariableType> = None;
255            for item in items {
256                let t = infer_with_output_type(item, vars_in_scope);
257                if !matches!(
258                    t,
259                    VariableType::Node
260                        | VariableType::Edge
261                        | VariableType::Path
262                        | VariableType::Imported
263                ) {
264                    return VariableType::Scalar;
265                }
266                if let Some(existing) = inferred {
267                    if existing != t
268                        && t != VariableType::Imported
269                        && existing != VariableType::Imported
270                    {
271                        return VariableType::Scalar;
272                    }
273                    if existing == VariableType::Imported && t != VariableType::Imported {
274                        inferred = Some(t);
275                    }
276                } else {
277                    inferred = Some(t);
278                }
279            }
280            inferred.unwrap_or(VariableType::Scalar)
281        }
282        _ => VariableType::Scalar,
283    }
284}
285
286/// Collect all variable names referenced in an expression
287fn collect_expr_variables(expr: &Expr) -> Vec<String> {
288    let mut vars = Vec::new();
289    collect_expr_variables_inner(expr, &mut vars);
290    vars
291}
292
293/// Collect the names of `$param` references in a constant-foldable expression.
294///
295/// Walks the variants that `eval_const_numeric_expr` accepts (the only shapes a
296/// successfully-folded `LIMIT`/`SKIP` expression can take): parameters,
297/// literals, unary/binary arithmetic, and the whitelisted numeric functions.
298/// Used to tell the plan cache which parameter values were baked into the plan.
299fn collect_expr_parameters(expr: &Expr, names: &mut Vec<String>) {
300    match expr {
301        Expr::Parameter(name) => {
302            if !names.contains(name) {
303                names.push(name.clone());
304            }
305        }
306        Expr::UnaryOp { expr: e, .. } => collect_expr_parameters(e, names),
307        Expr::BinaryOp { left, right, .. } => {
308            collect_expr_parameters(left, names);
309            collect_expr_parameters(right, names);
310        }
311        Expr::FunctionCall { args, .. } => {
312            for a in args {
313                collect_expr_parameters(a, names);
314            }
315        }
316        _ => {}
317    }
318}
319
320fn collect_expr_variables_inner(expr: &Expr, vars: &mut Vec<String>) {
321    let mut add_var = |name: &String| {
322        if !vars.contains(name) {
323            vars.push(name.clone());
324        }
325    };
326
327    match expr {
328        Expr::Variable(name) => add_var(name),
329        Expr::Property(base, _) => collect_expr_variables_inner(base, vars),
330        Expr::BinaryOp { left, right, .. } => {
331            collect_expr_variables_inner(left, vars);
332            collect_expr_variables_inner(right, vars);
333        }
334        Expr::UnaryOp { expr: e, .. }
335        | Expr::IsNull(e)
336        | Expr::IsNotNull(e)
337        | Expr::IsUnique(e) => collect_expr_variables_inner(e, vars),
338        Expr::FunctionCall { args, .. } => {
339            for a in args {
340                collect_expr_variables_inner(a, vars);
341            }
342        }
343        Expr::List(items) => {
344            for item in items {
345                collect_expr_variables_inner(item, vars);
346            }
347        }
348        Expr::In { expr: e, list } => {
349            collect_expr_variables_inner(e, vars);
350            collect_expr_variables_inner(list, vars);
351        }
352        Expr::Case {
353            expr: case_expr,
354            when_then,
355            else_expr,
356        } => {
357            if let Some(e) = case_expr {
358                collect_expr_variables_inner(e, vars);
359            }
360            for (w, t) in when_then {
361                collect_expr_variables_inner(w, vars);
362                collect_expr_variables_inner(t, vars);
363            }
364            if let Some(e) = else_expr {
365                collect_expr_variables_inner(e, vars);
366            }
367        }
368        Expr::Map(entries) => {
369            for (_, v) in entries {
370                collect_expr_variables_inner(v, vars);
371            }
372        }
373        Expr::LabelCheck { expr, .. } => collect_expr_variables_inner(expr, vars),
374        Expr::ArrayIndex { array, index } => {
375            collect_expr_variables_inner(array, vars);
376            collect_expr_variables_inner(index, vars);
377        }
378        Expr::ArraySlice { array, start, end } => {
379            collect_expr_variables_inner(array, vars);
380            if let Some(s) = start {
381                collect_expr_variables_inner(s, vars);
382            }
383            if let Some(e) = end {
384                collect_expr_variables_inner(e, vars);
385            }
386        }
387        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
388        // they introduce local variable bindings not in outer scope.
389        _ => {}
390    }
391}
392
393/// Rewrite ORDER BY expressions to resolve projection aliases back to their source expressions.
394///
395/// Example: `RETURN r AS rel ORDER BY rel.id` becomes `ORDER BY r.id` so Sort can run
396/// before the final RETURN projection without losing alias semantics.
397fn rewrite_order_by_expr_with_aliases(expr: &Expr, aliases: &HashMap<String, Expr>) -> Expr {
398    let repr = expr.to_string_repr();
399    if let Some(rewritten) = aliases.get(&repr) {
400        return rewritten.clone();
401    }
402
403    match expr {
404        Expr::Variable(name) => aliases.get(name).cloned().unwrap_or_else(|| expr.clone()),
405        Expr::Property(base, prop) => Expr::Property(
406            Box::new(rewrite_order_by_expr_with_aliases(base, aliases)),
407            prop.clone(),
408        ),
409        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
410            left: Box::new(rewrite_order_by_expr_with_aliases(left, aliases)),
411            op: *op,
412            right: Box::new(rewrite_order_by_expr_with_aliases(right, aliases)),
413        },
414        Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
415            op: *op,
416            expr: Box::new(rewrite_order_by_expr_with_aliases(inner, aliases)),
417        },
418        Expr::FunctionCall {
419            name,
420            args,
421            distinct,
422            window_spec,
423        } => Expr::FunctionCall {
424            name: name.clone(),
425            args: args
426                .iter()
427                .map(|a| rewrite_order_by_expr_with_aliases(a, aliases))
428                .collect(),
429            distinct: *distinct,
430            window_spec: window_spec.clone(),
431        },
432        Expr::List(items) => Expr::List(
433            items
434                .iter()
435                .map(|item| rewrite_order_by_expr_with_aliases(item, aliases))
436                .collect(),
437        ),
438        Expr::Map(entries) => Expr::Map(
439            entries
440                .iter()
441                .map(|(k, v)| (k.clone(), rewrite_order_by_expr_with_aliases(v, aliases)))
442                .collect(),
443        ),
444        Expr::Case {
445            expr: case_expr,
446            when_then,
447            else_expr,
448        } => Expr::Case {
449            expr: case_expr
450                .as_ref()
451                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
452            when_then: when_then
453                .iter()
454                .map(|(w, t)| {
455                    (
456                        rewrite_order_by_expr_with_aliases(w, aliases),
457                        rewrite_order_by_expr_with_aliases(t, aliases),
458                    )
459                })
460                .collect(),
461            else_expr: else_expr
462                .as_ref()
463                .map(|e| Box::new(rewrite_order_by_expr_with_aliases(e, aliases))),
464        },
465        // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
466        // they introduce local variable bindings that could shadow aliases.
467        _ => expr.clone(),
468    }
469}
470
471/// Validate function call argument types.
472/// Returns error if type constraints are violated.
473fn validate_function_call(name: &str, args: &[Expr], vars_in_scope: &[VariableInfo]) -> Result<()> {
474    let name_lower = name.to_lowercase();
475
476    // labels() requires Node
477    if name_lower == "labels"
478        && let Some(Expr::Variable(var_name)) = args.first()
479        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
480        && !info.var_type.is_compatible_with(VariableType::Node)
481    {
482        return Err(anyhow!(
483            "SyntaxError: InvalidArgumentType - labels() requires a node argument"
484        ));
485    }
486
487    // type() requires Edge
488    if name_lower == "type"
489        && let Some(Expr::Variable(var_name)) = args.first()
490        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
491        && !info.var_type.is_compatible_with(VariableType::Edge)
492    {
493        return Err(anyhow!(
494            "SyntaxError: InvalidArgumentType - type() requires a relationship argument"
495        ));
496    }
497
498    // properties() requires Node/Edge/Map (not scalar literals)
499    if name_lower == "properties"
500        && let Some(arg) = args.first()
501    {
502        match arg {
503            Expr::Literal(CypherLiteral::Integer(_))
504            | Expr::Literal(CypherLiteral::Float(_))
505            | Expr::Literal(CypherLiteral::String(_))
506            | Expr::Literal(CypherLiteral::Bool(_))
507            | Expr::List(_) => {
508                return Err(anyhow!(
509                    "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
510                ));
511            }
512            Expr::Variable(var_name) => {
513                if let Some(info) = find_var_in_scope(vars_in_scope, var_name)
514                    && matches!(
515                        info.var_type,
516                        VariableType::Scalar | VariableType::ScalarLiteral
517                    )
518                {
519                    return Err(anyhow!(
520                        "SyntaxError: InvalidArgumentType - properties() requires a node, relationship, or map"
521                    ));
522                }
523            }
524            _ => {}
525        }
526    }
527
528    // nodes()/relationships() require Path
529    if (name_lower == "nodes" || name_lower == "relationships")
530        && let Some(Expr::Variable(var_name)) = args.first()
531        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
532        && !info.var_type.is_compatible_with(VariableType::Path)
533    {
534        return Err(anyhow!(
535            "SyntaxError: InvalidArgumentType - {}() requires a path argument",
536            name_lower
537        ));
538    }
539
540    // size() does NOT accept Path arguments (length() on paths IS valid — returns relationship count)
541    if name_lower == "size"
542        && let Some(Expr::Variable(var_name)) = args.first()
543        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
544        && info.var_type == VariableType::Path
545    {
546        return Err(anyhow!(
547            "SyntaxError: InvalidArgumentType - size() requires a string, list, or map argument"
548        ));
549    }
550
551    // length()/size() do NOT accept Node or single-Edge arguments.
552    // VLP step variables (e.g. `r` in `-[r*1..2]->`) are typed as Edge
553    // but are actually edge lists — size()/length() is valid on those.
554    if (name_lower == "length" || name_lower == "size")
555        && let Some(Expr::Variable(var_name)) = args.first()
556        && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
557        && (info.var_type == VariableType::Node
558            || (info.var_type == VariableType::Edge && !info.is_vlp))
559    {
560        return Err(anyhow!(
561            "SyntaxError: InvalidArgumentType - {}() requires a string, list, or path argument",
562            name_lower
563        ));
564    }
565
566    Ok(())
567}
568
569/// Check if an expression is a non-boolean literal.
570fn is_non_boolean_literal(expr: &Expr) -> bool {
571    matches!(
572        expr,
573        Expr::Literal(CypherLiteral::Integer(_))
574            | Expr::Literal(CypherLiteral::Float(_))
575            | Expr::Literal(CypherLiteral::String(_))
576            | Expr::List(_)
577            | Expr::Map(_)
578    )
579}
580
581/// Validate boolean expressions (AND/OR/NOT require boolean arguments).
582fn validate_boolean_expression(expr: &Expr) -> Result<()> {
583    // Check AND/OR/XOR operands and NOT operand for non-boolean literals
584    if let Expr::BinaryOp { left, op, right } = expr
585        && matches!(op, BinaryOp::And | BinaryOp::Or | BinaryOp::Xor)
586    {
587        let op_name = format!("{op:?}").to_uppercase();
588        for operand in [left.as_ref(), right.as_ref()] {
589            if is_non_boolean_literal(operand) {
590                return Err(anyhow!(
591                    "SyntaxError: InvalidArgumentType - {} requires boolean arguments",
592                    op_name
593                ));
594            }
595        }
596    }
597    if let Expr::UnaryOp {
598        op: uni_cypher::ast::UnaryOp::Not,
599        expr: inner,
600    } = expr
601        && is_non_boolean_literal(inner)
602    {
603        return Err(anyhow!(
604            "SyntaxError: InvalidArgumentType - NOT requires a boolean argument"
605        ));
606    }
607    let mut result = Ok(());
608    expr.for_each_child(&mut |child| {
609        if result.is_ok() {
610            result = validate_boolean_expression(child);
611        }
612    });
613    result
614}
615
616/// Validate that all variables used in an expression are in scope.
617fn validate_expression_variables(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
618    let used_vars = collect_expr_variables(expr);
619    for var in used_vars {
620        if !is_var_in_scope(vars_in_scope, &var) {
621            return Err(anyhow!(
622                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
623                var
624            ));
625        }
626    }
627    Ok(())
628}
629
630/// Check if a function name (lowercase) is an aggregate function.
631fn is_aggregate_function_name(name: &str) -> bool {
632    matches!(
633        name.to_lowercase().as_str(),
634        "count"
635            | "sum"
636            | "avg"
637            | "min"
638            | "max"
639            | "collect"
640            | "stdev"
641            | "stddev"
642            | "stdevp"
643            | "stddevp"
644            | "variance"
645            | "variancep"
646            | "percentiledisc"
647            | "percentilecont"
648            | "btic_min"
649            | "btic_max"
650            | "btic_span_agg"
651            | "btic_count_at"
652    ) || uni_cypher::is_known_plugin_aggregate(name)
653}
654
655/// Returns true if the expression is a window function (FunctionCall with window_spec).
656fn is_window_function(expr: &Expr) -> bool {
657    matches!(
658        expr,
659        Expr::FunctionCall {
660            window_spec: Some(_),
661            ..
662        }
663    )
664}
665
666/// Returns true when `expr` reports `is_aggregate()` but is NOT itself a bare
667/// aggregate FunctionCall (or CountSubquery/CollectSubquery). In other words,
668/// the aggregate lives *inside* a wrapper expression (e.g. a ListComprehension,
669/// size() call, BinaryOp, etc.).
670fn is_compound_aggregate(expr: &Expr) -> bool {
671    if !expr.is_aggregate() {
672        return false;
673    }
674    match expr {
675        Expr::FunctionCall {
676            name, window_spec, ..
677        } => {
678            // A bare aggregate FunctionCall is NOT compound
679            if window_spec.is_some() {
680                return true; // window wrapping an aggregate — treat as compound
681            }
682            !is_aggregate_function_name(name)
683        }
684        // Subquery aggregates are "bare" (not compound)
685        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => false,
686        // Everything else (ListComprehension, BinaryOp, etc.) is compound
687        _ => true,
688    }
689}
690
691/// Recursively collect all bare aggregate FunctionCall sub-expressions from
692/// `expr`. Stops recursing into the *arguments* of an aggregate (we only want
693/// the outermost aggregate boundaries).
694///
695/// For `ListComprehension`, `Quantifier`, and `Reduce`, only the `list` field
696/// is searched because the body (`map_expr`, `predicate`, `expr`) references
697/// the loop variable, not outer-scope aggregates.
698fn extract_inner_aggregates(expr: &Expr) -> Vec<Expr> {
699    let mut out = Vec::new();
700    extract_inner_aggregates_rec(expr, &mut out);
701    out
702}
703
704fn extract_inner_aggregates_rec(expr: &Expr, out: &mut Vec<Expr>) {
705    match expr {
706        Expr::FunctionCall {
707            name, window_spec, ..
708        } if window_spec.is_none() && is_aggregate_function_name(name) => {
709            // Found a bare aggregate — collect it and stop recursing
710            out.push(expr.clone());
711        }
712        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
713            out.push(expr.clone());
714        }
715        // For list comprehension, only search the `list` source for aggregates
716        Expr::ListComprehension { list, .. } => {
717            extract_inner_aggregates_rec(list, out);
718        }
719        // For quantifier, only search the `list` source
720        Expr::Quantifier { list, .. } => {
721            extract_inner_aggregates_rec(list, out);
722        }
723        // For reduce, search `init` and `list` (not the body `expr`)
724        Expr::Reduce { init, list, .. } => {
725            extract_inner_aggregates_rec(init, out);
726            extract_inner_aggregates_rec(list, out);
727        }
728        // Standard recursive cases
729        Expr::FunctionCall { args, .. } => {
730            for arg in args {
731                extract_inner_aggregates_rec(arg, out);
732            }
733        }
734        Expr::BinaryOp { left, right, .. } => {
735            extract_inner_aggregates_rec(left, out);
736            extract_inner_aggregates_rec(right, out);
737        }
738        Expr::UnaryOp { expr: e, .. }
739        | Expr::IsNull(e)
740        | Expr::IsNotNull(e)
741        | Expr::IsUnique(e) => extract_inner_aggregates_rec(e, out),
742        Expr::Property(base, _) => extract_inner_aggregates_rec(base, out),
743        Expr::List(items) => {
744            for item in items {
745                extract_inner_aggregates_rec(item, out);
746            }
747        }
748        Expr::Case {
749            expr: case_expr,
750            when_then,
751            else_expr,
752        } => {
753            if let Some(e) = case_expr {
754                extract_inner_aggregates_rec(e, out);
755            }
756            for (w, t) in when_then {
757                extract_inner_aggregates_rec(w, out);
758                extract_inner_aggregates_rec(t, out);
759            }
760            if let Some(e) = else_expr {
761                extract_inner_aggregates_rec(e, out);
762            }
763        }
764        Expr::In {
765            expr: in_expr,
766            list,
767        } => {
768            extract_inner_aggregates_rec(in_expr, out);
769            extract_inner_aggregates_rec(list, out);
770        }
771        Expr::ArrayIndex { array, index } => {
772            extract_inner_aggregates_rec(array, out);
773            extract_inner_aggregates_rec(index, out);
774        }
775        Expr::ArraySlice { array, start, end } => {
776            extract_inner_aggregates_rec(array, out);
777            if let Some(s) = start {
778                extract_inner_aggregates_rec(s, out);
779            }
780            if let Some(e) = end {
781                extract_inner_aggregates_rec(e, out);
782            }
783        }
784        Expr::Map(entries) => {
785            for (_, v) in entries {
786                extract_inner_aggregates_rec(v, out);
787            }
788        }
789        _ => {}
790    }
791}
792
793/// Return a copy of `expr` with every inner aggregate FunctionCall replaced by
794/// `Expr::Variable(aggregate_column_name(agg))`.
795///
796/// For `ListComprehension`/`Quantifier`/`Reduce`, only the `list` field is
797/// rewritten (the body references the loop variable, not outer-scope columns).
798fn replace_aggregates_with_columns(expr: &Expr) -> Expr {
799    match expr {
800        Expr::FunctionCall {
801            name, window_spec, ..
802        } if window_spec.is_none() && is_aggregate_function_name(name) => {
803            // Replace bare aggregate with column reference
804            Expr::Variable(aggregate_column_name(expr))
805        }
806        Expr::CountSubquery(_) | Expr::CollectSubquery(_) => {
807            Expr::Variable(aggregate_column_name(expr))
808        }
809        Expr::ListComprehension {
810            variable,
811            list,
812            where_clause,
813            map_expr,
814        } => Expr::ListComprehension {
815            variable: variable.clone(),
816            list: Box::new(replace_aggregates_with_columns(list)),
817            where_clause: where_clause.clone(), // don't touch — references loop var
818            map_expr: map_expr.clone(),         // don't touch — references loop var
819        },
820        Expr::Quantifier {
821            quantifier,
822            variable,
823            list,
824            predicate,
825        } => Expr::Quantifier {
826            quantifier: *quantifier,
827            variable: variable.clone(),
828            list: Box::new(replace_aggregates_with_columns(list)),
829            predicate: predicate.clone(), // don't touch — references loop var
830        },
831        Expr::Reduce {
832            accumulator,
833            init,
834            variable,
835            list,
836            expr: body,
837        } => Expr::Reduce {
838            accumulator: accumulator.clone(),
839            init: Box::new(replace_aggregates_with_columns(init)),
840            variable: variable.clone(),
841            list: Box::new(replace_aggregates_with_columns(list)),
842            expr: body.clone(), // don't touch — references loop var
843        },
844        Expr::FunctionCall {
845            name,
846            args,
847            distinct,
848            window_spec,
849        } => Expr::FunctionCall {
850            name: name.clone(),
851            args: args.iter().map(replace_aggregates_with_columns).collect(),
852            distinct: *distinct,
853            window_spec: window_spec.clone(),
854        },
855        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
856            left: Box::new(replace_aggregates_with_columns(left)),
857            op: *op,
858            right: Box::new(replace_aggregates_with_columns(right)),
859        },
860        Expr::UnaryOp { op, expr: e } => Expr::UnaryOp {
861            op: *op,
862            expr: Box::new(replace_aggregates_with_columns(e)),
863        },
864        Expr::IsNull(e) => Expr::IsNull(Box::new(replace_aggregates_with_columns(e))),
865        Expr::IsNotNull(e) => Expr::IsNotNull(Box::new(replace_aggregates_with_columns(e))),
866        Expr::IsUnique(e) => Expr::IsUnique(Box::new(replace_aggregates_with_columns(e))),
867        Expr::Property(base, prop) => Expr::Property(
868            Box::new(replace_aggregates_with_columns(base)),
869            prop.clone(),
870        ),
871        Expr::List(items) => {
872            Expr::List(items.iter().map(replace_aggregates_with_columns).collect())
873        }
874        Expr::Case {
875            expr: case_expr,
876            when_then,
877            else_expr,
878        } => Expr::Case {
879            expr: case_expr
880                .as_ref()
881                .map(|e| Box::new(replace_aggregates_with_columns(e))),
882            when_then: when_then
883                .iter()
884                .map(|(w, t)| {
885                    (
886                        replace_aggregates_with_columns(w),
887                        replace_aggregates_with_columns(t),
888                    )
889                })
890                .collect(),
891            else_expr: else_expr
892                .as_ref()
893                .map(|e| Box::new(replace_aggregates_with_columns(e))),
894        },
895        Expr::In {
896            expr: in_expr,
897            list,
898        } => Expr::In {
899            expr: Box::new(replace_aggregates_with_columns(in_expr)),
900            list: Box::new(replace_aggregates_with_columns(list)),
901        },
902        Expr::ArrayIndex { array, index } => Expr::ArrayIndex {
903            array: Box::new(replace_aggregates_with_columns(array)),
904            index: Box::new(replace_aggregates_with_columns(index)),
905        },
906        Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
907            array: Box::new(replace_aggregates_with_columns(array)),
908            start: start
909                .as_ref()
910                .map(|e| Box::new(replace_aggregates_with_columns(e))),
911            end: end
912                .as_ref()
913                .map(|e| Box::new(replace_aggregates_with_columns(e))),
914        },
915        Expr::Map(entries) => Expr::Map(
916            entries
917                .iter()
918                .map(|(k, v)| (k.clone(), replace_aggregates_with_columns(v)))
919                .collect(),
920        ),
921        // Leaf expressions — return as-is
922        other => other.clone(),
923    }
924}
925
926/// Check if an expression contains any aggregate function (recursively).
927fn contains_aggregate_recursive(expr: &Expr) -> bool {
928    match expr {
929        Expr::FunctionCall { name, args, .. } => {
930            is_aggregate_function_name(name) || args.iter().any(contains_aggregate_recursive)
931        }
932        Expr::BinaryOp { left, right, .. } => {
933            contains_aggregate_recursive(left) || contains_aggregate_recursive(right)
934        }
935        Expr::UnaryOp { expr: e, .. }
936        | Expr::IsNull(e)
937        | Expr::IsNotNull(e)
938        | Expr::IsUnique(e) => contains_aggregate_recursive(e),
939        Expr::List(items) => items.iter().any(contains_aggregate_recursive),
940        Expr::Case {
941            expr,
942            when_then,
943            else_expr,
944        } => {
945            expr.as_deref().is_some_and(contains_aggregate_recursive)
946                || when_then.iter().any(|(w, t)| {
947                    contains_aggregate_recursive(w) || contains_aggregate_recursive(t)
948                })
949                || else_expr
950                    .as_deref()
951                    .is_some_and(contains_aggregate_recursive)
952        }
953        Expr::In { expr, list } => {
954            contains_aggregate_recursive(expr) || contains_aggregate_recursive(list)
955        }
956        Expr::Property(base, _) => contains_aggregate_recursive(base),
957        Expr::ListComprehension { list, .. } => {
958            // Only check the list source — where_clause/map_expr reference the loop variable
959            contains_aggregate_recursive(list)
960        }
961        Expr::Quantifier { list, .. } => contains_aggregate_recursive(list),
962        Expr::Reduce { init, list, .. } => {
963            contains_aggregate_recursive(init) || contains_aggregate_recursive(list)
964        }
965        Expr::ArrayIndex { array, index } => {
966            contains_aggregate_recursive(array) || contains_aggregate_recursive(index)
967        }
968        Expr::ArraySlice { array, start, end } => {
969            contains_aggregate_recursive(array)
970                || start.as_deref().is_some_and(contains_aggregate_recursive)
971                || end.as_deref().is_some_and(contains_aggregate_recursive)
972        }
973        Expr::Map(entries) => entries.iter().any(|(_, v)| contains_aggregate_recursive(v)),
974        _ => false,
975    }
976}
977
978/// Check if an expression contains a non-deterministic function (e.g. rand()).
979fn contains_non_deterministic(expr: &Expr) -> bool {
980    if matches!(expr, Expr::FunctionCall { name, .. } if name.eq_ignore_ascii_case("rand")) {
981        return true;
982    }
983    let mut found = false;
984    expr.for_each_child(&mut |child| {
985        if !found {
986            found = contains_non_deterministic(child);
987        }
988    });
989    found
990}
991
992fn collect_aggregate_reprs(expr: &Expr, out: &mut HashSet<String>) {
993    match expr {
994        Expr::FunctionCall { name, args, .. } => {
995            if is_aggregate_function_name(name) {
996                out.insert(expr.to_string_repr());
997                return;
998            }
999            for arg in args {
1000                collect_aggregate_reprs(arg, out);
1001            }
1002        }
1003        Expr::BinaryOp { left, right, .. } => {
1004            collect_aggregate_reprs(left, out);
1005            collect_aggregate_reprs(right, out);
1006        }
1007        Expr::UnaryOp { expr, .. }
1008        | Expr::IsNull(expr)
1009        | Expr::IsNotNull(expr)
1010        | Expr::IsUnique(expr) => collect_aggregate_reprs(expr, out),
1011        Expr::List(items) => {
1012            for item in items {
1013                collect_aggregate_reprs(item, out);
1014            }
1015        }
1016        Expr::Case {
1017            expr,
1018            when_then,
1019            else_expr,
1020        } => {
1021            if let Some(e) = expr {
1022                collect_aggregate_reprs(e, out);
1023            }
1024            for (w, t) in when_then {
1025                collect_aggregate_reprs(w, out);
1026                collect_aggregate_reprs(t, out);
1027            }
1028            if let Some(e) = else_expr {
1029                collect_aggregate_reprs(e, out);
1030            }
1031        }
1032        Expr::In { expr, list } => {
1033            collect_aggregate_reprs(expr, out);
1034            collect_aggregate_reprs(list, out);
1035        }
1036        Expr::Property(base, _) => collect_aggregate_reprs(base, out),
1037        Expr::ListComprehension { list, .. } => {
1038            collect_aggregate_reprs(list, out);
1039        }
1040        Expr::Quantifier { list, .. } => {
1041            collect_aggregate_reprs(list, out);
1042        }
1043        Expr::Reduce { init, list, .. } => {
1044            collect_aggregate_reprs(init, out);
1045            collect_aggregate_reprs(list, out);
1046        }
1047        Expr::ArrayIndex { array, index } => {
1048            collect_aggregate_reprs(array, out);
1049            collect_aggregate_reprs(index, out);
1050        }
1051        Expr::ArraySlice { array, start, end } => {
1052            collect_aggregate_reprs(array, out);
1053            if let Some(s) = start {
1054                collect_aggregate_reprs(s, out);
1055            }
1056            if let Some(e) = end {
1057                collect_aggregate_reprs(e, out);
1058            }
1059        }
1060        _ => {}
1061    }
1062}
1063
1064#[derive(Debug, Clone)]
1065enum NonAggregateRef {
1066    Var(String),
1067    Property {
1068        repr: String,
1069        base_var: Option<String>,
1070    },
1071}
1072
1073fn collect_non_aggregate_refs(expr: &Expr, inside_agg: bool, out: &mut Vec<NonAggregateRef>) {
1074    match expr {
1075        Expr::FunctionCall { name, args, .. } => {
1076            if is_aggregate_function_name(name) {
1077                return;
1078            }
1079            for arg in args {
1080                collect_non_aggregate_refs(arg, inside_agg, out);
1081            }
1082        }
1083        Expr::Variable(v) if !inside_agg => out.push(NonAggregateRef::Var(v.clone())),
1084        Expr::Property(base, _) if !inside_agg => {
1085            let base_var = if let Expr::Variable(v) = base.as_ref() {
1086                Some(v.clone())
1087            } else {
1088                None
1089            };
1090            out.push(NonAggregateRef::Property {
1091                repr: expr.to_string_repr(),
1092                base_var,
1093            });
1094        }
1095        Expr::BinaryOp { left, right, .. } => {
1096            collect_non_aggregate_refs(left, inside_agg, out);
1097            collect_non_aggregate_refs(right, inside_agg, out);
1098        }
1099        Expr::UnaryOp { expr, .. }
1100        | Expr::IsNull(expr)
1101        | Expr::IsNotNull(expr)
1102        | Expr::IsUnique(expr) => collect_non_aggregate_refs(expr, inside_agg, out),
1103        Expr::List(items) => {
1104            for item in items {
1105                collect_non_aggregate_refs(item, inside_agg, out);
1106            }
1107        }
1108        Expr::Case {
1109            expr,
1110            when_then,
1111            else_expr,
1112        } => {
1113            if let Some(e) = expr {
1114                collect_non_aggregate_refs(e, inside_agg, out);
1115            }
1116            for (w, t) in when_then {
1117                collect_non_aggregate_refs(w, inside_agg, out);
1118                collect_non_aggregate_refs(t, inside_agg, out);
1119            }
1120            if let Some(e) = else_expr {
1121                collect_non_aggregate_refs(e, inside_agg, out);
1122            }
1123        }
1124        Expr::In { expr, list } => {
1125            collect_non_aggregate_refs(expr, inside_agg, out);
1126            collect_non_aggregate_refs(list, inside_agg, out);
1127        }
1128        // For ListComprehension/Quantifier/Reduce, only recurse into the `list`
1129        // source. The body references the loop variable, not outer-scope vars.
1130        Expr::ListComprehension { list, .. } => {
1131            collect_non_aggregate_refs(list, inside_agg, out);
1132        }
1133        Expr::Quantifier { list, .. } => {
1134            collect_non_aggregate_refs(list, inside_agg, out);
1135        }
1136        Expr::Reduce { init, list, .. } => {
1137            collect_non_aggregate_refs(init, inside_agg, out);
1138            collect_non_aggregate_refs(list, inside_agg, out);
1139        }
1140        _ => {}
1141    }
1142}
1143
1144fn validate_with_order_by_aggregate_item(
1145    expr: &Expr,
1146    projected_aggregate_reprs: &HashSet<String>,
1147    projected_simple_reprs: &HashSet<String>,
1148    projected_aliases: &HashSet<String>,
1149) -> Result<()> {
1150    let mut aggregate_reprs = HashSet::new();
1151    collect_aggregate_reprs(expr, &mut aggregate_reprs);
1152    for agg in aggregate_reprs {
1153        if !projected_aggregate_reprs.contains(&agg) {
1154            return Err(anyhow!(
1155                "SyntaxError: UndefinedVariable - Aggregation expression '{}' is not projected in WITH",
1156                agg
1157            ));
1158        }
1159    }
1160
1161    let mut refs = Vec::new();
1162    collect_non_aggregate_refs(expr, false, &mut refs);
1163    refs.retain(|r| match r {
1164        NonAggregateRef::Var(v) => !projected_aliases.contains(v),
1165        NonAggregateRef::Property { repr, .. } => !projected_simple_reprs.contains(repr),
1166    });
1167
1168    let mut dedup = HashSet::new();
1169    refs.retain(|r| {
1170        let key = match r {
1171            NonAggregateRef::Var(v) => format!("v:{v}"),
1172            NonAggregateRef::Property { repr, .. } => format!("p:{repr}"),
1173        };
1174        dedup.insert(key)
1175    });
1176
1177    if refs.len() > 1 {
1178        return Err(anyhow!(
1179            "SyntaxError: AmbiguousAggregationExpression - ORDER BY item mixes aggregation with multiple non-grouping references"
1180        ));
1181    }
1182
1183    if let Some(r) = refs.first() {
1184        return match r {
1185            NonAggregateRef::Var(v) => Err(anyhow!(
1186                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1187                v
1188            )),
1189            NonAggregateRef::Property { base_var, .. } => Err(anyhow!(
1190                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1191                base_var
1192                    .clone()
1193                    .unwrap_or_else(|| "<property-base>".to_string())
1194            )),
1195        };
1196    }
1197
1198    Ok(())
1199}
1200
1201/// Validate that no aggregation functions appear in WHERE clause.
1202fn validate_no_aggregation_in_where(predicate: &Expr) -> Result<()> {
1203    if contains_aggregate_recursive(predicate) {
1204        return Err(anyhow!(
1205            "SyntaxError: InvalidAggregation - Aggregation functions not allowed in WHERE"
1206        ));
1207    }
1208    Ok(())
1209}
1210
1211#[derive(Debug, Clone, Copy)]
1212enum ConstNumber {
1213    Int(i64),
1214    Float(f64),
1215}
1216
1217impl ConstNumber {
1218    fn to_f64(self) -> f64 {
1219        match self {
1220            Self::Int(v) => v as f64,
1221            Self::Float(v) => v,
1222        }
1223    }
1224}
1225
1226fn eval_const_numeric_expr(
1227    expr: &Expr,
1228    params: &HashMap<String, uni_common::Value>,
1229) -> Result<ConstNumber> {
1230    match expr {
1231        Expr::Literal(CypherLiteral::Integer(n)) => Ok(ConstNumber::Int(*n)),
1232        Expr::Literal(CypherLiteral::Float(f)) => Ok(ConstNumber::Float(*f)),
1233        Expr::Parameter(name) => match params.get(name) {
1234            Some(uni_common::Value::Int(n)) => Ok(ConstNumber::Int(*n)),
1235            Some(uni_common::Value::Float(f)) => Ok(ConstNumber::Float(*f)),
1236            Some(uni_common::Value::Null) => Err(anyhow!(
1237                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got null",
1238                name
1239            )),
1240            Some(other) => Err(anyhow!(
1241                "TypeError: InvalidArgumentType - expected numeric value for parameter ${}, got {:?}",
1242                name,
1243                other
1244            )),
1245            None => Err(anyhow!(
1246                "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1247            )),
1248        },
1249        Expr::UnaryOp {
1250            op: uni_cypher::ast::UnaryOp::Neg,
1251            expr,
1252        } => match eval_const_numeric_expr(expr, params)? {
1253            ConstNumber::Int(v) => Ok(ConstNumber::Int(-v)),
1254            ConstNumber::Float(v) => Ok(ConstNumber::Float(-v)),
1255        },
1256        Expr::BinaryOp { left, op, right } => {
1257            let l = eval_const_numeric_expr(left, params)?;
1258            let r = eval_const_numeric_expr(right, params)?;
1259            match op {
1260                BinaryOp::Add => match (l, r) {
1261                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a + b)),
1262                    _ => Ok(ConstNumber::Float(l.to_f64() + r.to_f64())),
1263                },
1264                BinaryOp::Sub => match (l, r) {
1265                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a - b)),
1266                    _ => Ok(ConstNumber::Float(l.to_f64() - r.to_f64())),
1267                },
1268                BinaryOp::Mul => match (l, r) {
1269                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a * b)),
1270                    _ => Ok(ConstNumber::Float(l.to_f64() * r.to_f64())),
1271                },
1272                BinaryOp::Div => Ok(ConstNumber::Float(l.to_f64() / r.to_f64())),
1273                BinaryOp::Mod => match (l, r) {
1274                    (ConstNumber::Int(a), ConstNumber::Int(b)) => Ok(ConstNumber::Int(a % b)),
1275                    _ => Ok(ConstNumber::Float(l.to_f64() % r.to_f64())),
1276                },
1277                BinaryOp::Pow => Ok(ConstNumber::Float(l.to_f64().powf(r.to_f64()))),
1278                _ => Err(anyhow!(
1279                    "SyntaxError: InvalidArgumentType - unsupported operator in constant expression"
1280                )),
1281            }
1282        }
1283        Expr::FunctionCall { name, args, .. } => {
1284            let lower = name.to_lowercase();
1285            match lower.as_str() {
1286                "rand" if args.is_empty() => {
1287                    use rand::RngExt;
1288                    let mut rng = rand::rng();
1289                    Ok(ConstNumber::Float(rng.random::<f64>()))
1290                }
1291                "tointeger" | "toint" if args.len() == 1 => {
1292                    match eval_const_numeric_expr(&args[0], params)? {
1293                        ConstNumber::Int(v) => Ok(ConstNumber::Int(v)),
1294                        ConstNumber::Float(v) => Ok(ConstNumber::Int(v.trunc() as i64)),
1295                    }
1296                }
1297                "ceil" if args.len() == 1 => Ok(ConstNumber::Float(
1298                    eval_const_numeric_expr(&args[0], params)?.to_f64().ceil(),
1299                )),
1300                "floor" if args.len() == 1 => Ok(ConstNumber::Float(
1301                    eval_const_numeric_expr(&args[0], params)?.to_f64().floor(),
1302                )),
1303                "abs" if args.len() == 1 => match eval_const_numeric_expr(&args[0], params)? {
1304                    ConstNumber::Int(v) => Ok(ConstNumber::Int(v.abs())),
1305                    ConstNumber::Float(v) => Ok(ConstNumber::Float(v.abs())),
1306                },
1307                _ => Err(anyhow!(
1308                    "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1309                )),
1310            }
1311        }
1312        _ => Err(anyhow!(
1313            "SyntaxError: InvalidArgumentType - expression is not a constant integer expression"
1314        )),
1315    }
1316}
1317
1318/// Parse and validate a non-negative integer expression for SKIP or LIMIT.
1319/// Returns `Ok(Some(n))` for valid constants, or an error for negative/float/non-constant values.
1320fn parse_non_negative_integer(
1321    expr: &Expr,
1322    clause_name: &str,
1323    params: &HashMap<String, uni_common::Value>,
1324) -> Result<Option<usize>> {
1325    let referenced_vars = collect_expr_variables(expr);
1326    if !referenced_vars.is_empty() {
1327        return Err(anyhow!(
1328            "SyntaxError: NonConstantExpression - {} requires expression independent of row variables",
1329            clause_name
1330        ));
1331    }
1332
1333    let value = eval_const_numeric_expr(expr, params)?;
1334    let as_int = match value {
1335        ConstNumber::Int(v) => v,
1336        ConstNumber::Float(v) => {
1337            if !v.is_finite() || (v.fract().abs() > f64::EPSILON) {
1338                return Err(anyhow!(
1339                    "SyntaxError: InvalidArgumentType - {} requires integer, got float",
1340                    clause_name
1341                ));
1342            }
1343            v as i64
1344        }
1345    };
1346    if as_int < 0 {
1347        return Err(anyhow!(
1348            "SyntaxError: NegativeIntegerArgument - {} requires non-negative integer",
1349            clause_name
1350        ));
1351    }
1352    Ok(Some(as_int as usize))
1353}
1354
1355/// Validate that aggregation functions are not nested.
1356fn validate_no_nested_aggregation(expr: &Expr) -> Result<()> {
1357    if let Expr::FunctionCall { name, args, .. } = expr
1358        && is_aggregate_function_name(name)
1359    {
1360        for arg in args {
1361            if contains_aggregate_recursive(arg) {
1362                return Err(anyhow!(
1363                    "SyntaxError: NestedAggregation - Cannot nest aggregation functions"
1364                ));
1365            }
1366            if contains_non_deterministic(arg) {
1367                return Err(anyhow!(
1368                    "SyntaxError: NonConstantExpression - Non-deterministic function inside aggregation"
1369                ));
1370            }
1371        }
1372    }
1373    let mut result = Ok(());
1374    expr.for_each_child(&mut |child| {
1375        if result.is_ok() {
1376            result = validate_no_nested_aggregation(child);
1377        }
1378    });
1379    result
1380}
1381
1382/// Validate that an expression does not access properties or labels of
1383/// deleted entities. `type(r)` on a deleted relationship is allowed per
1384/// OpenCypher spec, but `n.prop` and `labels(n)` are not.
1385fn validate_no_deleted_entity_access(expr: &Expr, deleted_vars: &HashSet<String>) -> Result<()> {
1386    // Check n.prop on a deleted variable
1387    if let Expr::Property(inner, _) = expr
1388        && let Expr::Variable(name) = inner.as_ref()
1389        && deleted_vars.contains(name)
1390    {
1391        return Err(anyhow!(
1392            "EntityNotFound: DeletedEntityAccess - Cannot access properties of deleted entity '{}'",
1393            name
1394        ));
1395    }
1396    // Check labels(n) or keys(n) on a deleted variable
1397    if let Expr::FunctionCall { name, args, .. } = expr
1398        && matches!(name.to_lowercase().as_str(), "labels" | "keys")
1399        && args.len() == 1
1400        && let Expr::Variable(var) = &args[0]
1401        && deleted_vars.contains(var)
1402    {
1403        return Err(anyhow!(
1404            "EntityNotFound: DeletedEntityAccess - Cannot access {} of deleted entity '{}'",
1405            name.to_lowercase(),
1406            var
1407        ));
1408    }
1409    let mut result = Ok(());
1410    expr.for_each_child(&mut |child| {
1411        if result.is_ok() {
1412            result = validate_no_deleted_entity_access(child, deleted_vars);
1413        }
1414    });
1415    result
1416}
1417
1418/// Validate that all variables referenced in properties are defined,
1419/// either in scope or in the local CREATE variable list.
1420fn validate_property_variables(
1421    properties: &Option<Expr>,
1422    vars_in_scope: &[VariableInfo],
1423    create_vars: &[&str],
1424) -> Result<()> {
1425    if let Some(props) = properties {
1426        for var in collect_expr_variables(props) {
1427            if !is_var_in_scope(vars_in_scope, &var) && !create_vars.contains(&var.as_str()) {
1428                return Err(anyhow!(
1429                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1430                    var
1431                ));
1432            }
1433        }
1434    }
1435    Ok(())
1436}
1437
1438/// Check that a variable name is not already bound in scope or in the local CREATE list.
1439/// Used to prevent rebinding in CREATE clauses.
1440fn check_not_already_bound(
1441    name: &str,
1442    vars_in_scope: &[VariableInfo],
1443    create_vars: &[&str],
1444) -> Result<()> {
1445    if is_var_in_scope(vars_in_scope, name) {
1446        return Err(anyhow!(
1447            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1448            name
1449        ));
1450    }
1451    if create_vars.contains(&name) {
1452        return Err(anyhow!(
1453            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined in CREATE",
1454            name
1455        ));
1456    }
1457    Ok(())
1458}
1459
1460fn build_merge_scope(pattern: &Pattern, vars_in_scope: &[VariableInfo]) -> Vec<VariableInfo> {
1461    let mut scope = vars_in_scope.to_vec();
1462
1463    for path in &pattern.paths {
1464        if let Some(path_var) = &path.variable
1465            && !path_var.is_empty()
1466            && !is_var_in_scope(&scope, path_var)
1467        {
1468            scope.push(VariableInfo::new(path_var.clone(), VariableType::Path));
1469        }
1470        for element in &path.elements {
1471            match element {
1472                PatternElement::Node(n) => {
1473                    if let Some(v) = &n.variable
1474                        && !v.is_empty()
1475                        && !is_var_in_scope(&scope, v)
1476                    {
1477                        scope.push(VariableInfo::new(v.clone(), VariableType::Node));
1478                    }
1479                }
1480                PatternElement::Relationship(r) => {
1481                    if let Some(v) = &r.variable
1482                        && !v.is_empty()
1483                        && !is_var_in_scope(&scope, v)
1484                    {
1485                        scope.push(VariableInfo::new(v.clone(), VariableType::Edge));
1486                    }
1487                }
1488                PatternElement::Parenthesized { .. } => {}
1489            }
1490        }
1491    }
1492
1493    scope
1494}
1495
1496fn validate_merge_set_item(item: &SetItem, vars_in_scope: &[VariableInfo]) -> Result<()> {
1497    match item {
1498        SetItem::Property { expr, value } => {
1499            validate_expression_variables(expr, vars_in_scope)?;
1500            validate_expression(expr, vars_in_scope)?;
1501            validate_expression_variables(value, vars_in_scope)?;
1502            validate_expression(value, vars_in_scope)?;
1503            if contains_pattern_predicate(expr) || contains_pattern_predicate(value) {
1504                return Err(anyhow!(
1505                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1506                ));
1507            }
1508        }
1509        SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
1510            if !is_var_in_scope(vars_in_scope, variable) {
1511                return Err(anyhow!(
1512                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1513                    variable
1514                ));
1515            }
1516            validate_expression_variables(value, vars_in_scope)?;
1517            validate_expression(value, vars_in_scope)?;
1518            if contains_pattern_predicate(value) {
1519                return Err(anyhow!(
1520                    "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
1521                ));
1522            }
1523        }
1524        SetItem::Labels { variable, .. } => {
1525            if !is_var_in_scope(vars_in_scope, variable) {
1526                return Err(anyhow!(
1527                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1528                    variable
1529                ));
1530            }
1531        }
1532    }
1533
1534    Ok(())
1535}
1536
1537/// Reject MERGE patterns containing null property values (e.g. `MERGE ({k: null})`).
1538/// The OpenCypher spec requires all property values in MERGE to be non-null.
1539fn reject_null_merge_properties(properties: &Option<Expr>) -> Result<()> {
1540    if let Some(Expr::Map(entries)) = properties {
1541        for (key, value) in entries {
1542            if matches!(value, Expr::Literal(CypherLiteral::Null)) {
1543                return Err(anyhow!(
1544                    "SemanticError: MergeReadOwnWrites - MERGE cannot use null property value for '{}'",
1545                    key
1546                ));
1547            }
1548        }
1549    }
1550    Ok(())
1551}
1552
1553/// Flatten every label name appearing in a `Pattern` (across all paths
1554/// and node elements). Used by the M5 follow-up #6 write-rejection
1555/// guard to refuse CREATE/MERGE that names a virtual catalog-resolved
1556/// label.
1557fn collect_pattern_labels(pattern: &uni_cypher::ast::Pattern) -> Vec<String> {
1558    let mut out = Vec::new();
1559    for path in &pattern.paths {
1560        for element in &path.elements {
1561            if let PatternElement::Node(n) = element {
1562                for l in n.labels.names() {
1563                    out.push(l.clone());
1564                }
1565            }
1566        }
1567    }
1568    out
1569}
1570
1571fn validate_merge_clause(merge_clause: &MergeClause, vars_in_scope: &[VariableInfo]) -> Result<()> {
1572    for path in &merge_clause.pattern.paths {
1573        for element in &path.elements {
1574            match element {
1575                PatternElement::Node(n) => {
1576                    if let Some(Expr::Parameter(_)) = &n.properties {
1577                        return Err(anyhow!(
1578                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
1579                        ));
1580                    }
1581                    reject_null_merge_properties(&n.properties)?;
1582                    // VariableAlreadyBound: reject if a bound variable is used
1583                    // as a standalone MERGE node or introduces new labels/properties.
1584                    // Bare endpoint references like (a) in MERGE (a)-[:R]->(b) are valid.
1585                    if let Some(variable) = &n.variable
1586                        && !variable.is_empty()
1587                        && is_var_in_scope(vars_in_scope, variable)
1588                    {
1589                        let is_standalone = path.elements.len() == 1;
1590                        let has_new_labels = !n.labels.is_empty();
1591                        let has_new_properties = n.properties.is_some();
1592                        if is_standalone || has_new_labels || has_new_properties {
1593                            return Err(anyhow!(
1594                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1595                                variable
1596                            ));
1597                        }
1598                    }
1599                }
1600                PatternElement::Relationship(r) => {
1601                    if let Some(variable) = &r.variable
1602                        && !variable.is_empty()
1603                        && is_var_in_scope(vars_in_scope, variable)
1604                    {
1605                        return Err(anyhow!(
1606                            "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
1607                            variable
1608                        ));
1609                    }
1610                    if r.types.len() != 1 {
1611                        return Err(anyhow!(
1612                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for MERGE"
1613                        ));
1614                    }
1615                    if r.range.is_some() {
1616                        return Err(anyhow!(
1617                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
1618                        ));
1619                    }
1620                    if let Some(Expr::Parameter(_)) = &r.properties {
1621                        return Err(anyhow!(
1622                            "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
1623                        ));
1624                    }
1625                    reject_null_merge_properties(&r.properties)?;
1626                }
1627                PatternElement::Parenthesized { .. } => {}
1628            }
1629        }
1630    }
1631
1632    let merge_scope = build_merge_scope(&merge_clause.pattern, vars_in_scope);
1633    for item in &merge_clause.on_create {
1634        validate_merge_set_item(item, &merge_scope)?;
1635    }
1636    for item in &merge_clause.on_match {
1637        validate_merge_set_item(item, &merge_scope)?;
1638    }
1639
1640    Ok(())
1641}
1642
1643/// Recursively validate an expression for type errors, undefined variables, etc.
1644fn validate_expression(expr: &Expr, vars_in_scope: &[VariableInfo]) -> Result<()> {
1645    // Validate boolean operators and nested aggregation first
1646    validate_boolean_expression(expr)?;
1647    validate_no_nested_aggregation(expr)?;
1648
1649    // Helper to validate multiple expressions
1650    fn validate_all(exprs: &[Expr], vars: &[VariableInfo]) -> Result<()> {
1651        for e in exprs {
1652            validate_expression(e, vars)?;
1653        }
1654        Ok(())
1655    }
1656
1657    match expr {
1658        Expr::FunctionCall { name, args, .. } => {
1659            validate_function_call(name, args, vars_in_scope)?;
1660            validate_all(args, vars_in_scope)
1661        }
1662        Expr::BinaryOp { left, right, .. } => {
1663            validate_expression(left, vars_in_scope)?;
1664            validate_expression(right, vars_in_scope)
1665        }
1666        Expr::UnaryOp { expr: e, .. }
1667        | Expr::IsNull(e)
1668        | Expr::IsNotNull(e)
1669        | Expr::IsUnique(e) => validate_expression(e, vars_in_scope),
1670        Expr::Property(base, prop) => {
1671            if let Expr::Variable(var_name) = base.as_ref()
1672                && let Some(var_info) = find_var_in_scope(vars_in_scope, var_name)
1673            {
1674                // Paths don't have properties
1675                if var_info.var_type == VariableType::Path {
1676                    return Err(anyhow!(
1677                        "SyntaxError: InvalidArgumentType - Type mismatch: expected Node or Relationship but was Path for property access '{}.{}'",
1678                        var_name,
1679                        prop
1680                    ));
1681                }
1682                // Known non-graph literals (int, float, bool, string, list) don't have properties
1683                if var_info.var_type == VariableType::ScalarLiteral {
1684                    return Err(anyhow!(
1685                        "TypeError: InvalidArgumentType - Property access on a non-graph element is not allowed"
1686                    ));
1687                }
1688            }
1689            validate_expression(base, vars_in_scope)
1690        }
1691        Expr::List(items) => validate_all(items, vars_in_scope),
1692        Expr::Case {
1693            expr: case_expr,
1694            when_then,
1695            else_expr,
1696        } => {
1697            if let Some(e) = case_expr {
1698                validate_expression(e, vars_in_scope)?;
1699            }
1700            for (w, t) in when_then {
1701                validate_expression(w, vars_in_scope)?;
1702                validate_expression(t, vars_in_scope)?;
1703            }
1704            if let Some(e) = else_expr {
1705                validate_expression(e, vars_in_scope)?;
1706            }
1707            Ok(())
1708        }
1709        Expr::In { expr: e, list } => {
1710            validate_expression(e, vars_in_scope)?;
1711            validate_expression(list, vars_in_scope)
1712        }
1713        Expr::Exists {
1714            query,
1715            from_pattern_predicate: true,
1716        } => {
1717            // Pattern predicates cannot introduce new named variables.
1718            // Extract named vars from inner MATCH pattern, check each is in scope.
1719            if let Query::Single(stmt) = query.as_ref() {
1720                for clause in &stmt.clauses {
1721                    if let Clause::Match(m) = clause {
1722                        for path in &m.pattern.paths {
1723                            for elem in &path.elements {
1724                                match elem {
1725                                    PatternElement::Node(n) => {
1726                                        if let Some(var) = &n.variable
1727                                            && !is_var_in_scope(vars_in_scope, var)
1728                                        {
1729                                            return Err(anyhow!(
1730                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1731                                                var
1732                                            ));
1733                                        }
1734                                    }
1735                                    PatternElement::Relationship(r) => {
1736                                        if let Some(var) = &r.variable
1737                                            && !is_var_in_scope(vars_in_scope, var)
1738                                        {
1739                                            return Err(anyhow!(
1740                                                "SyntaxError: UndefinedVariable - Variable '{}' not defined",
1741                                                var
1742                                            ));
1743                                        }
1744                                    }
1745                                    _ => {}
1746                                }
1747                            }
1748                        }
1749                    }
1750                }
1751            }
1752            Ok(())
1753        }
1754        _ => Ok(()),
1755    }
1756}
1757
1758/// One step (hop) in a Quantified Path Pattern sub-pattern.
1759///
1760/// Used by `LogicalPlan::Traverse` when `qpp_steps` is `Some`.
1761#[derive(Debug, Clone)]
1762pub struct QppStepInfo {
1763    /// Edge type IDs that this step can traverse.
1764    pub edge_type_ids: Vec<u32>,
1765    /// Traversal direction for this step.
1766    pub direction: Direction,
1767    /// Optional label constraint on the target node.
1768    pub target_label: Option<String>,
1769}
1770
1771/// Phase 5a-impl: per-type fusion strategy for `LogicalPlan::FusedIndexScan`.
1772///
1773/// `#[non_exhaustive]` so Phase 5b can add `AnnRerank` and `Bm25Rrf`
1774/// without breaking downstream pattern-match exhaustiveness.
1775#[derive(Debug, Clone)]
1776#[non_exhaustive]
1777pub enum FusionKind {
1778    /// Union of parent + fork-local BTree hits, deduped by VID.
1779    BtreeUnion,
1780    /// k-way merge of pre-sorted parent + fork streams (ORDER BY).
1781    SortedKWayMerge,
1782    /// Fork-first UID lookup; falls back to parent on miss. Used
1783    /// when a fork rebinds an external UID and queries must see the
1784    /// fork's binding before the parent's.
1785    VidUidForkFirst,
1786    /// Phase 5b — vector ANN rerank: top-k from primary's index +
1787    /// top-k from fork-local index, merged and reranked by exact
1788    /// distance. Recall ≥ 95% per spec §8.2.
1789    AnnRerank,
1790    /// Phase 5b — BM25 reciprocal rank fusion: ranked lists from
1791    /// primary's and fork-local FTS indexes combined via standard
1792    /// RRF (`score = sum 1 / (k_rrf + rank_i)`, k_rrf = 60).
1793    Bm25Rrf,
1794}
1795
1796/// Logical query plan produced by [`QueryPlanner`].
1797///
1798/// Each variant represents one step in the Cypher execution pipeline.
1799/// Plans are tree-structured — leaf nodes produce rows, intermediate nodes
1800/// transform or join them, and the root node defines the final output.
1801#[derive(Debug, Clone)]
1802pub enum LogicalPlan {
1803    /// UNION / UNION ALL of two sub-plans.
1804    Union {
1805        left: Box<LogicalPlan>,
1806        right: Box<LogicalPlan>,
1807        /// When `true`, duplicate rows are preserved (UNION ALL semantics).
1808        all: bool,
1809    },
1810    /// Scan vertices of a single labeled dataset.
1811    Scan {
1812        label_id: u16,
1813        labels: Vec<String>,
1814        variable: String,
1815        filter: Option<Expr>,
1816        optional: bool,
1817    },
1818    /// Phase 5a-impl: fused scan over both primary's index and the
1819    /// forked session's fork-local index. Emitted by the planner only
1820    /// when (a) the session is forked AND (b) `StorageManager::fork_index_exists`
1821    /// returns `Some(_)` for the target column. Otherwise the planner
1822    /// keeps emitting `Scan` and Lance's `base_paths` chain transparently
1823    /// covers parent-inherited indexes.
1824    ///
1825    /// `kind` selects the per-type fusion strategy:
1826    /// - `BtreeUnion` — union of parent + fork hits, dedup by VID.
1827    /// - `SortedKWayMerge` — k-way merge of two pre-sorted streams.
1828    /// - `VidUidForkFirst` — probe fork's branch first, fall back to
1829    ///   parent's UID index on miss.
1830    FusedIndexScan {
1831        label_id: u16,
1832        labels: Vec<String>,
1833        variable: String,
1834        filter: Option<Expr>,
1835        optional: bool,
1836        kind: FusionKind,
1837    },
1838    /// Phase 5b followup: planner-side observability marker for the
1839    /// lossy fusion types. Wraps the original `VectorKnn` or
1840    /// `InvertedIndexLookup` (or any future leaf operator whose
1841    /// shape differs from `Scan`) without changing its fields, so
1842    /// the physical planner can decay it to `inner` unchanged.
1843    ///
1844    /// Runtime behavior is identical to running `inner` directly;
1845    /// the wrap is purely for explain-plan and runtime-stats
1846    /// observability. The actual fusion happens at the
1847    /// `BranchedBackend` layer (per-branch Lance reads via
1848    /// `base_paths`), exactly as in Phase 5b's core ship.
1849    FusedIndexScanWrapped {
1850        inner: Box<LogicalPlan>,
1851        kind: FusionKind,
1852    },
1853    /// Lookup vertices by ext_id using the main vertices table.
1854    /// Used when a query references ext_id without specifying a label.
1855    ExtIdLookup {
1856        variable: String,
1857        ext_id: String,
1858        filter: Option<Expr>,
1859        optional: bool,
1860    },
1861    /// Scan all vertices from main table (MATCH (n) without label).
1862    /// Used for schemaless queries that don't specify any label.
1863    ScanAll {
1864        variable: String,
1865        filter: Option<Expr>,
1866        optional: bool,
1867    },
1868    /// Scan main table filtering by label name (MATCH (n:Unknown)).
1869    /// Used for labels not defined in schema (schemaless support).
1870    /// Scan main vertices table by label name(s) for schemaless support.
1871    /// When labels has multiple entries, uses intersection semantics (must have ALL labels).
1872    ScanMainByLabels {
1873        labels: Vec<String>,
1874        variable: String,
1875        filter: Option<Expr>,
1876        optional: bool,
1877    },
1878    /// Produces exactly one empty row (used to bootstrap pipelines with no source).
1879    Empty,
1880    /// UNWIND: expand a list expression into one row per element.
1881    Unwind {
1882        input: Box<LogicalPlan>,
1883        expr: Expr,
1884        variable: String,
1885    },
1886    Traverse {
1887        input: Box<LogicalPlan>,
1888        edge_type_ids: Vec<u32>,
1889        direction: Direction,
1890        source_variable: String,
1891        target_variable: String,
1892        target_label_id: u16,
1893        step_variable: Option<String>,
1894        min_hops: usize,
1895        max_hops: usize,
1896        optional: bool,
1897        target_filter: Option<Expr>,
1898        path_variable: Option<String>,
1899        edge_properties: HashSet<String>,
1900        /// Whether this is a variable-length pattern (has `*` range specifier).
1901        /// When true, step_variable holds a list of edges (even for *1..1).
1902        is_variable_length: bool,
1903        /// All variables from this OPTIONAL MATCH pattern.
1904        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1905        /// This ensures proper multi-hop OPTIONAL MATCH semantics.
1906        optional_pattern_vars: HashSet<String>,
1907        /// Variable names (node + edge) from the current MATCH clause scope.
1908        /// Used for relationship uniqueness scoping: only edge ID columns whose
1909        /// associated variable is in this set participate in uniqueness filtering.
1910        /// Variables from previous disconnected MATCH clauses are excluded.
1911        scope_match_variables: HashSet<String>,
1912        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1913        edge_filter_expr: Option<Expr>,
1914        /// Path traversal semantics (Trail by default for OpenCypher).
1915        path_mode: crate::query::df_graph::nfa::PathMode,
1916        /// QPP steps for multi-hop quantified path patterns.
1917        /// `None` for simple VLP patterns; `Some` for QPP with per-step edge types/constraints.
1918        /// When present, `min_hops`/`max_hops` are derived from iterations × steps.len().
1919        qpp_steps: Option<Vec<QppStepInfo>>,
1920    },
1921    /// Traverse main edges table filtering by type name(s) (`MATCH (a)-[:Unknown]->(b)`).
1922    /// Used for edge types not defined in schema (schemaless support).
1923    /// Supports OR relationship types like `[:KNOWS|HATES]` via multiple type_names.
1924    TraverseMainByType {
1925        type_names: Vec<String>,
1926        input: Box<LogicalPlan>,
1927        direction: Direction,
1928        source_variable: String,
1929        target_variable: String,
1930        step_variable: Option<String>,
1931        min_hops: usize,
1932        max_hops: usize,
1933        optional: bool,
1934        target_filter: Option<Expr>,
1935        path_variable: Option<String>,
1936        /// Whether this is a variable-length pattern (has `*` range specifier).
1937        /// When true, step_variable holds a list of edges (even for *1..1).
1938        is_variable_length: bool,
1939        /// All variables from this OPTIONAL MATCH pattern.
1940        /// When any hop in the pattern fails, ALL these variables should be set to NULL.
1941        optional_pattern_vars: HashSet<String>,
1942        /// Variables belonging to the current MATCH clause scope.
1943        /// Used for relationship uniqueness scoping: only edge columns whose
1944        /// associated variable is in this set participate in uniqueness filtering.
1945        scope_match_variables: HashSet<String>,
1946        /// Edge property predicate for VLP inline filtering (instead of post-Filter).
1947        edge_filter_expr: Option<Expr>,
1948        /// Path traversal semantics (Trail by default for OpenCypher).
1949        path_mode: crate::query::df_graph::nfa::PathMode,
1950    },
1951    Filter {
1952        input: Box<LogicalPlan>,
1953        predicate: Expr,
1954        /// Variables from OPTIONAL MATCH that should preserve NULL rows.
1955        /// When evaluating the filter, if any of these variables are NULL,
1956        /// the row is preserved regardless of the predicate result.
1957        optional_variables: HashSet<String>,
1958    },
1959    Create {
1960        input: Box<LogicalPlan>,
1961        pattern: Pattern,
1962    },
1963    /// Batched CREATE operations for multiple consecutive CREATE clauses.
1964    ///
1965    /// This variant combines multiple CREATE patterns into a single plan node
1966    /// to avoid deep recursion when executing many CREATEs sequentially.
1967    CreateBatch {
1968        input: Box<LogicalPlan>,
1969        patterns: Vec<Pattern>,
1970    },
1971    Merge {
1972        input: Box<LogicalPlan>,
1973        pattern: Pattern,
1974        on_match: Option<SetClause>,
1975        on_create: Option<SetClause>,
1976    },
1977    Set {
1978        input: Box<LogicalPlan>,
1979        items: Vec<SetItem>,
1980    },
1981    Remove {
1982        input: Box<LogicalPlan>,
1983        items: Vec<RemoveItem>,
1984    },
1985    Delete {
1986        input: Box<LogicalPlan>,
1987        items: Vec<Expr>,
1988        detach: bool,
1989    },
1990    /// FOREACH (variable IN list | clauses)
1991    Foreach {
1992        input: Box<LogicalPlan>,
1993        variable: String,
1994        list: Expr,
1995        body: Vec<LogicalPlan>,
1996    },
1997    Sort {
1998        input: Box<LogicalPlan>,
1999        order_by: Vec<SortItem>,
2000    },
2001    Limit {
2002        input: Box<LogicalPlan>,
2003        skip: Option<usize>,
2004        fetch: Option<usize>,
2005    },
2006    Aggregate {
2007        input: Box<LogicalPlan>,
2008        group_by: Vec<Expr>,
2009        aggregates: Vec<Expr>,
2010    },
2011    Distinct {
2012        input: Box<LogicalPlan>,
2013    },
2014    Window {
2015        input: Box<LogicalPlan>,
2016        window_exprs: Vec<Expr>,
2017    },
2018    Project {
2019        input: Box<LogicalPlan>,
2020        projections: Vec<(Expr, Option<String>)>,
2021    },
2022    CrossJoin {
2023        left: Box<LogicalPlan>,
2024        right: Box<LogicalPlan>,
2025    },
2026    Apply {
2027        input: Box<LogicalPlan>,
2028        subquery: Box<LogicalPlan>,
2029        input_filter: Option<Expr>,
2030    },
2031    RecursiveCTE {
2032        cte_name: String,
2033        initial: Box<LogicalPlan>,
2034        recursive: Box<LogicalPlan>,
2035    },
2036    ProcedureCall {
2037        procedure_name: String,
2038        arguments: Vec<Expr>,
2039        yield_items: Vec<(String, Option<String>)>,
2040    },
2041    SubqueryCall {
2042        input: Box<LogicalPlan>,
2043        subquery: Box<LogicalPlan>,
2044    },
2045    VectorKnn {
2046        label_id: u16,
2047        variable: String,
2048        property: String,
2049        query: Expr,
2050        k: usize,
2051        threshold: Option<f32>,
2052    },
2053    InvertedIndexLookup {
2054        label_id: u16,
2055        variable: String,
2056        property: String,
2057        terms: Expr,
2058    },
2059    ShortestPath {
2060        input: Box<LogicalPlan>,
2061        edge_type_ids: Vec<u32>,
2062        direction: Direction,
2063        source_variable: String,
2064        target_variable: String,
2065        target_label_id: u16,
2066        path_variable: String,
2067        /// Minimum number of hops (edges) in the path. Default is 1.
2068        min_hops: u32,
2069        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2070        max_hops: u32,
2071    },
2072    /// allShortestPaths() - Returns all paths with minimum length
2073    AllShortestPaths {
2074        input: Box<LogicalPlan>,
2075        edge_type_ids: Vec<u32>,
2076        direction: Direction,
2077        source_variable: String,
2078        target_variable: String,
2079        target_label_id: u16,
2080        path_variable: String,
2081        /// Minimum number of hops (edges) in the path. Default is 1.
2082        min_hops: u32,
2083        /// Maximum number of hops (edges) in the path. Default is u32::MAX (unlimited).
2084        max_hops: u32,
2085    },
2086    QuantifiedPattern {
2087        input: Box<LogicalPlan>,
2088        pattern_plan: Box<LogicalPlan>, // Plan for one iteration
2089        min_iterations: u32,
2090        max_iterations: u32,
2091        path_variable: Option<String>,
2092        start_variable: String, // Input variable for iteration (e.g. 'a' in (a)-[:R]->(b))
2093        binding_variable: String, // Output variable of iteration (e.g. 'b')
2094    },
2095    // DDL Plans
2096    CreateVectorIndex {
2097        config: VectorIndexConfig,
2098        if_not_exists: bool,
2099    },
2100    CreateFullTextIndex {
2101        config: FullTextIndexConfig,
2102        if_not_exists: bool,
2103    },
2104    CreateScalarIndex {
2105        config: ScalarIndexConfig,
2106        if_not_exists: bool,
2107    },
2108    CreateJsonFtsIndex {
2109        config: JsonFtsIndexConfig,
2110        if_not_exists: bool,
2111    },
2112    DropIndex {
2113        name: String,
2114        if_exists: bool,
2115    },
2116    ShowIndexes {
2117        filter: Option<String>,
2118    },
2119    Copy {
2120        target: String,
2121        source: String,
2122        is_export: bool,
2123        options: HashMap<String, Value>,
2124    },
2125    Backup {
2126        destination: String,
2127        options: HashMap<String, Value>,
2128    },
2129    Explain {
2130        plan: Box<LogicalPlan>,
2131    },
2132    // Admin Plans
2133    ShowDatabase,
2134    ShowConfig,
2135    ShowStatistics,
2136    Vacuum,
2137    Checkpoint,
2138    CopyTo {
2139        label: String,
2140        path: String,
2141        format: String,
2142        options: HashMap<String, Value>,
2143    },
2144    CopyFrom {
2145        label: String,
2146        path: String,
2147        format: String,
2148        options: HashMap<String, Value>,
2149    },
2150    // Schema DDL
2151    CreateLabel(CreateLabel),
2152    CreateEdgeType(CreateEdgeType),
2153    AlterLabel(AlterLabel),
2154    AlterEdgeType(AlterEdgeType),
2155    DropLabel(DropLabel),
2156    DropEdgeType(DropEdgeType),
2157    // Constraints
2158    CreateConstraint(CreateConstraint),
2159    DropConstraint(DropConstraint),
2160    ShowConstraints(ShowConstraints),
2161    /// Bind a zero-length path (single node pattern with path variable).
2162    /// E.g., `p = (a)` creates a Path with one node and zero edges.
2163    BindZeroLengthPath {
2164        input: Box<LogicalPlan>,
2165        node_variable: String,
2166        path_variable: String,
2167    },
2168    /// Bind a fixed-length path from already-computed node and edge columns.
2169    /// E.g., `p = (a)-[r]->(b)` or `p = (a)-[r1]->(b)-[r2]->(c)`.
2170    BindPath {
2171        input: Box<LogicalPlan>,
2172        node_variables: Vec<String>,
2173        edge_variables: Vec<String>,
2174        path_variable: String,
2175    },
2176
2177    // ── Locy variants ──────────────────────────────────────────
2178    /// Top-level Locy program: stratified rules + commands.
2179    LocyProgram {
2180        strata: Vec<super::planner_locy_types::LocyStratum>,
2181        commands: Vec<super::planner_locy_types::LocyCommand>,
2182        derived_scan_registry: Arc<super::df_graph::locy_fixpoint::DerivedScanRegistry>,
2183        max_iterations: usize,
2184        timeout: std::time::Duration,
2185        max_derived_bytes: usize,
2186        deterministic_best_by: bool,
2187        strict_probability_domain: bool,
2188        probability_epsilon: f64,
2189        exact_probability: bool,
2190        max_bdd_variables: usize,
2191        top_k_proofs: usize,
2192        /// Active probability semiring (rollout D-7). Defaults to
2193        /// `AddMultProb` (Phase 1/2 byte-identical behavior). `BddExact`
2194        /// is selected by `LocyConfig::resolve()` when `exact_probability`
2195        /// is true.
2196        semiring_kind: uni_locy::SemiringKind,
2197        /// Phase B Slice 3: per-evaluation registry of neural classifiers
2198        /// keyed by model name. Empty for programs without `CREATE MODEL`.
2199        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2200        /// Phase B follow-up: optional memoization cache. `None` →
2201        /// runtime creates a fresh per-query cache; `Some` → shared
2202        /// across queries (caller-managed).
2203        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2204        /// Phase C B1-B3 follow-up: per-query side-channel store
2205        /// for per-invocation (raw, calibrated, confidence_band)
2206        /// records. Flows alongside `classifier_cache` into
2207        /// `LocyProgramExec`.
2208        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2209    },
2210    /// FOLD operator: lattice-join non-key columns per KEY group.
2211    LocyFold {
2212        input: Box<LogicalPlan>,
2213        key_columns: Vec<String>,
2214        fold_bindings: Vec<(String, Expr)>,
2215        strict_probability_domain: bool,
2216        probability_epsilon: f64,
2217    },
2218    /// BEST BY operator: select best row per KEY group by ordered criteria.
2219    LocyBestBy {
2220        input: Box<LogicalPlan>,
2221        key_columns: Vec<String>,
2222        /// (expression, ascending) pairs.
2223        criteria: Vec<(Expr, bool)>,
2224    },
2225    /// PRIORITY operator: keep only highest-priority clause's rows per KEY group.
2226    LocyPriority {
2227        input: Box<LogicalPlan>,
2228        key_columns: Vec<String>,
2229    },
2230    /// Scan a derived relation's in-memory buffer during fixpoint iteration.
2231    LocyDerivedScan {
2232        scan_index: usize,
2233        data: Arc<RwLock<Vec<RecordBatch>>>,
2234        schema: SchemaRef,
2235    },
2236    /// Compact projection for Locy YIELD — emits ONLY the listed expressions,
2237    /// without carrying through helper/property columns like the regular Project.
2238    LocyProject {
2239        input: Box<LogicalPlan>,
2240        projections: Vec<(Expr, Option<String>)>,
2241        /// Expected output Arrow type per projection (for CAST support).
2242        target_types: Vec<DataType>,
2243    },
2244    /// Phase B A4: invoke registered neural classifiers against the
2245    /// input batches and overwrite the per-invocation placeholder
2246    /// column with each row's predicted probability. Wraps a Locy
2247    /// clause body plan when `CompiledClause.model_invocations` is
2248    /// non-empty; transparent (passes batches through unchanged) when
2249    /// the list is empty.
2250    ///
2251    /// Registry and cache are carried on the node so that
2252    /// `execute_subplan` — which spins up a fresh
2253    /// `HybridPhysicalPlanner` per call — can lower it to a physical
2254    /// `LocyModelInvokeExec` without depending on planner-side
2255    /// runtime state.
2256    LocyModelInvoke {
2257        input: Box<LogicalPlan>,
2258        invocations: Vec<uni_locy::ModelInvocation>,
2259        classifier_registry: Arc<uni_locy::ClassifierRegistry>,
2260        classifier_cache: Option<Arc<uni_locy::ModelInvocationCache>>,
2261        /// Phase C B1-B3 follow-up: per-query side-channel store
2262        /// for per-invocation (raw, calibrated, confidence_band)
2263        /// records. `LocyModelInvokeExec` writes here after each
2264        /// classifier call; EXPLAIN reads via collect_neural_calls
2265        /// to surface NeuralProvenance for ALONG/FOLD-position
2266        /// invocations and Mode B re-execution paths.
2267        classifier_provenance_store: Option<Arc<uni_locy::NeuralProvenanceStore>>,
2268        /// Phase D D3 runtime: one handle per `path_context.source_rule`
2269        /// referenced by any invocation on this node. The handle's
2270        /// `data: Arc<RwLock<Vec<RecordBatch>>>` is shared with the
2271        /// `DerivedScanRegistry`; the source rule's derived facts are
2272        /// already converged by the time this node executes (the
2273        /// dependency-graph builder ensures source rules sit in
2274        /// earlier strata).
2275        path_context_handles: std::collections::HashMap<
2276            String,
2277            super::df_graph::locy_model_invoke::PathContextHandle,
2278        >,
2279    },
2280}
2281
2282/// Extracted vector similarity predicate info for optimization
2283struct VectorSimilarityPredicate {
2284    variable: String,
2285    property: String,
2286    query: Expr,
2287    threshold: Option<f32>,
2288}
2289
2290/// Result of extracting vector_similarity from a predicate
2291struct VectorSimilarityExtraction {
2292    /// The extracted vector similarity predicate
2293    predicate: VectorSimilarityPredicate,
2294    /// Remaining predicates that couldn't be optimized (if any)
2295    residual: Option<Expr>,
2296}
2297
2298/// Try to extract a vector_similarity predicate from an expression.
2299/// Matches patterns like:
2300/// - vector_similarity(n.embedding, [1,2,3]) > 0.8
2301/// - n.embedding ~= $query
2302///
2303/// Also handles AND predicates.
2304fn extract_vector_similarity(expr: &Expr) -> Option<VectorSimilarityExtraction> {
2305    match expr {
2306        Expr::BinaryOp { left, op, right } => {
2307            // Handle AND: check both sides for vector_similarity
2308            if matches!(op, BinaryOp::And) {
2309                // Try left side first
2310                if let Some(vs) = extract_simple_vector_similarity(left) {
2311                    return Some(VectorSimilarityExtraction {
2312                        predicate: vs,
2313                        residual: Some(right.as_ref().clone()),
2314                    });
2315                }
2316                // Try right side
2317                if let Some(vs) = extract_simple_vector_similarity(right) {
2318                    return Some(VectorSimilarityExtraction {
2319                        predicate: vs,
2320                        residual: Some(left.as_ref().clone()),
2321                    });
2322                }
2323                // Recursively check within left/right for nested ANDs
2324                if let Some(mut extraction) = extract_vector_similarity(left) {
2325                    extraction.residual = Some(combine_with_and(
2326                        extraction.residual,
2327                        right.as_ref().clone(),
2328                    ));
2329                    return Some(extraction);
2330                }
2331                if let Some(mut extraction) = extract_vector_similarity(right) {
2332                    extraction.residual =
2333                        Some(combine_with_and(extraction.residual, left.as_ref().clone()));
2334                    return Some(extraction);
2335                }
2336                return None;
2337            }
2338
2339            // Simple case: direct vector_similarity comparison
2340            if let Some(vs) = extract_simple_vector_similarity(expr) {
2341                return Some(VectorSimilarityExtraction {
2342                    predicate: vs,
2343                    residual: None,
2344                });
2345            }
2346            None
2347        }
2348        _ => None,
2349    }
2350}
2351
2352/// Helper to combine an optional expression with another using AND
2353fn combine_with_and(opt_expr: Option<Expr>, other: Expr) -> Expr {
2354    match opt_expr {
2355        Some(e) => Expr::BinaryOp {
2356            left: Box::new(e),
2357            op: BinaryOp::And,
2358            right: Box::new(other),
2359        },
2360        None => other,
2361    }
2362}
2363
2364/// Extract a simple vector_similarity comparison (no AND)
2365fn extract_simple_vector_similarity(expr: &Expr) -> Option<VectorSimilarityPredicate> {
2366    match expr {
2367        Expr::BinaryOp { left, op, right } => {
2368            // Pattern: vector_similarity(...) > threshold or vector_similarity(...) >= threshold
2369            if matches!(op, BinaryOp::Gt | BinaryOp::GtEq)
2370                && let (Some(vs), Some(thresh)) = (
2371                    extract_vector_similarity_call(left),
2372                    extract_float_literal(right),
2373                )
2374            {
2375                return Some(VectorSimilarityPredicate {
2376                    variable: vs.0,
2377                    property: vs.1,
2378                    query: vs.2,
2379                    threshold: Some(thresh),
2380                });
2381            }
2382            // Pattern: threshold < vector_similarity(...) or threshold <= vector_similarity(...)
2383            if matches!(op, BinaryOp::Lt | BinaryOp::LtEq)
2384                && let (Some(thresh), Some(vs)) = (
2385                    extract_float_literal(left),
2386                    extract_vector_similarity_call(right),
2387                )
2388            {
2389                return Some(VectorSimilarityPredicate {
2390                    variable: vs.0,
2391                    property: vs.1,
2392                    query: vs.2,
2393                    threshold: Some(thresh),
2394                });
2395            }
2396            // Pattern: n.embedding ~= query
2397            if matches!(op, BinaryOp::ApproxEq)
2398                && let Expr::Property(var_expr, prop) = left.as_ref()
2399                && let Expr::Variable(var) = var_expr.as_ref()
2400            {
2401                return Some(VectorSimilarityPredicate {
2402                    variable: var.clone(),
2403                    property: prop.clone(),
2404                    query: right.as_ref().clone(),
2405                    threshold: None,
2406                });
2407            }
2408            None
2409        }
2410        _ => None,
2411    }
2412}
2413
2414/// Extract (variable, property, query_expr) from vector_similarity(n.prop, query)
2415fn extract_vector_similarity_call(expr: &Expr) -> Option<(String, String, Expr)> {
2416    if let Expr::FunctionCall { name, args, .. } = expr
2417        && name.eq_ignore_ascii_case("vector_similarity")
2418        && args.len() == 2
2419    {
2420        // First arg should be Property(Identifier(var), prop)
2421        if let Expr::Property(var_expr, prop) = &args[0]
2422            && let Expr::Variable(var) = var_expr.as_ref()
2423        {
2424            // Second arg is query
2425            return Some((var.clone(), prop.clone(), args[1].clone()));
2426        }
2427    }
2428    None
2429}
2430
2431/// Extract a float value from a literal expression
2432fn extract_float_literal(expr: &Expr) -> Option<f32> {
2433    match expr {
2434        Expr::Literal(CypherLiteral::Integer(i)) => Some(*i as f32),
2435        Expr::Literal(CypherLiteral::Float(f)) => Some(*f as f32),
2436        _ => None,
2437    }
2438}
2439
2440/// Translates a parsed Cypher AST into a [`LogicalPlan`].
2441///
2442/// `QueryPlanner` applies semantic validation (variable scoping, label
2443/// resolution, type checking) and produces a plan tree that the executor
2444/// can run against storage.
2445#[derive(Debug)]
2446pub struct QueryPlanner {
2447    schema: Arc<Schema>,
2448    /// Cache of parsed generation expressions, keyed by (label_name, gen_col_name).
2449    gen_expr_cache: HashMap<(String, String), Expr>,
2450    /// Counter for generating unique anonymous variable names.
2451    anon_counter: std::sync::atomic::AtomicUsize,
2452    /// Optional query parameters for resolving $param in SKIP/LIMIT.
2453    params: HashMap<String, uni_common::Value>,
2454    /// Optional plugin registry consulted when label / edge-type / identifier
2455    /// resolution misses the local schema (M5b — Catalog / ReplacementScan).
2456    plugin_registry: Option<Arc<uni_plugin::PluginRegistry>>,
2457    /// Gate for replacement-scan dispatch on unknown identifiers (M5b).
2458    replacement_scans_enabled: bool,
2459    /// Names of parameters folded into a `LIMIT`/`SKIP` position during the
2460    /// plan. The resulting `LogicalPlan::Limit` bakes the concrete values in, so
2461    /// a plan cache keyed on query text must additionally key on these
2462    /// parameters' values (see `folded_limit_skip_params`). Interior-mutable
2463    /// because `plan` takes `&self`.
2464    folded_limit_skip_params: std::sync::Mutex<std::collections::BTreeSet<String>>,
2465}
2466
2467struct TraverseParams<'a> {
2468    rel: &'a RelationshipPattern,
2469    target_node: &'a NodePattern,
2470    optional: bool,
2471    path_variable: Option<String>,
2472    /// All variables from this OPTIONAL MATCH pattern.
2473    /// Used to ensure multi-hop patterns correctly NULL all vars when any hop fails.
2474    optional_pattern_vars: HashSet<String>,
2475}
2476
2477impl QueryPlanner {
2478    /// Create a new planner for the given schema.
2479    ///
2480    /// Pre-parses all generation expressions defined in the schema so that
2481    /// repeated plan calls avoid redundant parsing.
2482    pub fn new(schema: Arc<Schema>) -> Self {
2483        // Pre-parse all generation expressions for caching
2484        let mut gen_expr_cache = HashMap::new();
2485        for (label, props) in &schema.properties {
2486            for (gen_col, meta) in props {
2487                if let Some(expr_str) = &meta.generation_expression
2488                    && let Ok(parsed_expr) = uni_cypher::parse_expression(expr_str)
2489                {
2490                    gen_expr_cache.insert((label.clone(), gen_col.clone()), parsed_expr);
2491                }
2492            }
2493        }
2494        Self {
2495            schema,
2496            gen_expr_cache,
2497            anon_counter: std::sync::atomic::AtomicUsize::new(0),
2498            params: HashMap::new(),
2499            plugin_registry: None,
2500            replacement_scans_enabled: false,
2501            folded_limit_skip_params: std::sync::Mutex::new(std::collections::BTreeSet::new()),
2502        }
2503    }
2504
2505    /// Graph schema this planner resolves labels and property types against.
2506    pub(crate) fn schema(&self) -> &Schema {
2507        &self.schema
2508    }
2509
2510    /// Record the parameters referenced by a successfully-folded `LIMIT`/`SKIP`
2511    /// expression so the caller's plan cache can key on their values.
2512    fn note_folded_limit_skip(&self, expr: &Expr) {
2513        let mut names = Vec::new();
2514        collect_expr_parameters(expr, &mut names);
2515        if !names.is_empty()
2516            && let Ok(mut acc) = self.folded_limit_skip_params.lock()
2517        {
2518            acc.extend(names);
2519        }
2520    }
2521
2522    /// Parameter names folded into `LIMIT`/`SKIP` positions during the last
2523    /// [`plan`](Self::plan).
2524    ///
2525    /// The cached plan bakes these values in, so a text-keyed plan cache must
2526    /// fold their current values into its key — otherwise two calls differing
2527    /// only in a LIMIT/SKIP parameter would wrongly share one cached plan.
2528    /// Returns an empty vector when no parameter was folded.
2529    #[must_use]
2530    pub fn folded_limit_skip_params(&self) -> Vec<String> {
2531        self.folded_limit_skip_params
2532            .lock()
2533            .map(|acc| acc.iter().cloned().collect())
2534            .unwrap_or_default()
2535    }
2536
2537    /// Set query parameters for resolving `$param` references in SKIP/LIMIT.
2538    pub fn with_params(mut self, params: HashMap<String, uni_common::Value>) -> Self {
2539        self.params = params;
2540        self
2541    }
2542
2543    /// Attach a plugin registry for catalog / replacement-scan fallbacks
2544    /// (M5b). When absent, label / edge-type resolution behaves exactly as
2545    /// before; when present, an unknown label is offered to each
2546    /// `CatalogProvider` before erroring.
2547    #[must_use]
2548    pub fn with_plugin_registry(mut self, registry: Arc<uni_plugin::PluginRegistry>) -> Self {
2549        self.plugin_registry = Some(registry);
2550        self
2551    }
2552
2553    /// Enable replacement-scan dispatch on unknown identifiers (M5b §4.23).
2554    /// Default off; opt-in only.
2555    #[must_use]
2556    pub fn with_replacement_scans(mut self, enabled: bool) -> Self {
2557        self.replacement_scans_enabled = enabled;
2558        self
2559    }
2560
2561    /// Allocate (or look up) a virtual label ID for `name` by consulting
2562    /// every registered `CatalogProvider` and then every registered
2563    /// `ReplacementScanProvider` (only the latter when the replacement-
2564    /// scan gate is on). On a first claim the catalog table is stashed
2565    /// on the host's [`uni_plugin::PluginRegistry`] under a freshly
2566    /// allocated virtual ID; subsequent calls with the same name return
2567    /// the cached ID and refresh the stashed table.
2568    ///
2569    /// Returns `None` if no provider claims the label or no plugin
2570    /// registry is attached. Returns `Some((id, table))` on a hit; the
2571    /// `id` lies in `[VIRTUAL_LABEL_ID_START, VIRTUAL_LABEL_ID_SENTINEL)`.
2572    /// Errors are surfaced as `Some(Err(_))`-equivalent via `Result`.
2573    fn allocate_virtual_label(
2574        &self,
2575        name: &str,
2576    ) -> Result<Option<(u16, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2577        let Some(registry) = self.plugin_registry.as_ref() else {
2578            return Ok(None);
2579        };
2580        // 1. CatalogProvider (always consulted, no gate — Batch 2 semantics).
2581        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2582        for cat in registry.catalogs() {
2583            if let Some(t) = cat.resolve_label(name) {
2584                claimed = Some(t);
2585                break;
2586            }
2587        }
2588        // 2. ReplacementScanProvider (gated). Only consult if no
2589        //    CatalogProvider already claimed.
2590        if claimed.is_none() {
2591            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2592            if let Some(Replacement::CatalogTable(t)) =
2593                self.consult_replacement_scan(ReplacementRequest::Label(name))
2594            {
2595                claimed = Some(t);
2596            }
2597        }
2598        let Some(table) = claimed else {
2599            return Ok(None);
2600        };
2601        let id = registry
2602            .register_virtual_label(name, Arc::clone(&table))
2603            .map_err(|e| anyhow!("virtual label registration failed for `{name}`: {e}"))?;
2604        Ok(Some((id, table)))
2605    }
2606
2607    /// Reject any write operation that names a label currently allocated
2608    /// as a virtual (catalog-backed) label. Catalog tables are read-only
2609    /// in this milestone — there is no write-back path through
2610    /// `CatalogTable::scan` to the originating provider, so silently
2611    /// allowing the write would produce ghosted state on the host side
2612    /// without affecting the external catalog. Errors with a clear,
2613    /// actionable message.
2614    fn reject_virtual_label_writes(&self, labels: &[String], op: &str) -> Result<()> {
2615        let Some(registry) = self.plugin_registry.as_ref() else {
2616            return Ok(());
2617        };
2618        for label in labels {
2619            if registry.virtual_label_by_name(label).is_some() {
2620                return Err(anyhow!(
2621                    "Cannot {op} on virtual (catalog-resolved) label `{label}` — virtual \
2622                     labels are read-only; write back via the originating catalog \
2623                     instead"
2624                ));
2625            }
2626        }
2627        Ok(())
2628    }
2629
2630    /// Edge-type analog of [`Self::allocate_virtual_label`].
2631    fn allocate_virtual_edge_type(
2632        &self,
2633        name: &str,
2634    ) -> Result<Option<(u32, Arc<dyn uni_plugin::traits::catalog::CatalogTable>)>> {
2635        let Some(registry) = self.plugin_registry.as_ref() else {
2636            return Ok(None);
2637        };
2638        let mut claimed: Option<Arc<dyn uni_plugin::traits::catalog::CatalogTable>> = None;
2639        for cat in registry.catalogs() {
2640            if let Some(t) = cat.resolve_edge_type(name) {
2641                claimed = Some(t);
2642                break;
2643            }
2644        }
2645        let Some(table) = claimed else {
2646            return Ok(None);
2647        };
2648        let id = registry
2649            .register_virtual_edge_type(name, Arc::clone(&table))
2650            .map_err(|e| anyhow!("virtual edge-type registration failed for `{name}`: {e}"))?;
2651        Ok(Some((id, table)))
2652    }
2653
2654    /// Try to resolve an unknown identifier through replacement-scan providers
2655    /// (gated by [`Self::with_replacement_scans`]). Returns the first
2656    /// [`Replacement`] any registered provider produces, or `None` if the
2657    /// gate is off, no registry is attached, or no provider claims the
2658    /// identifier. First-match wins (mirrors DuckDB).
2659    pub(crate) fn consult_replacement_scan(
2660        &self,
2661        request: uni_plugin::traits::catalog::ReplacementRequest<'_>,
2662    ) -> Option<uni_plugin::traits::catalog::Replacement> {
2663        if !self.replacement_scans_enabled {
2664            return None;
2665        }
2666        let registry = self.plugin_registry.as_ref()?;
2667        for r in registry.replacement_scans().iter() {
2668            if let Some(replacement) = r.replace(&request) {
2669                tracing::debug!(
2670                    target: "uni.plugin.registry",
2671                    ?request,
2672                    ?replacement,
2673                    "identifier resolved via ReplacementScanProvider"
2674                );
2675                return Some(replacement);
2676            }
2677        }
2678        None
2679    }
2680
2681    /// Resolve a user-typed procedure name against the attached plugin
2682    /// registry, applying the same namespace-prefix rules as
2683    /// `ProcedureRegistry::resolve_user_procedure` (host-coupled
2684    /// procedure dispatch). Returns `true` if any namespace claims the
2685    /// name. Used by the procedure-call replacement-scan gate to decide
2686    /// whether to consult before substituting.
2687    fn procedure_resolves(&self, user_name: &str) -> bool {
2688        let Some(registry) = self.plugin_registry.as_ref() else {
2689            return false;
2690        };
2691        // Try every namespace/local split (first-dot → last-dot) so dotted
2692        // plugin ids resolve alongside the first-dot M9/builtin convention.
2693        // Mirrors `ProcedureRegistry::resolve_user_procedure`.
2694        if uni_plugin::QName::candidate_splits(user_name).any(|q| registry.procedure(&q).is_some())
2695        {
2696            return true;
2697        }
2698        let stripped = user_name.strip_prefix("uni.").unwrap_or(user_name);
2699        for plugin_id in ["uni", "builtin", "apoc-core", "custom"] {
2700            if registry
2701                .procedure(&uni_plugin::QName::new(plugin_id, stripped))
2702                .is_some()
2703            {
2704                return true;
2705            }
2706        }
2707        false
2708    }
2709
2710    /// Construct a [`uni_plugin::QName`] from a user-typed identifier for
2711    /// passing to [`Replacement`]-scan providers. If the name is dotted,
2712    /// the last segment is the local and the rest is the namespace
2713    /// (mirroring `QName::parse`). Bare names — which Cypher allows for
2714    /// procedures (`CALL foo()`) and functions (`RETURN foo(x)`) — are
2715    /// encoded with the conventional `"user"` namespace; providers that
2716    /// want to match a bare-typed name should inspect `.local()`.
2717    fn qname_from_user(name: &str) -> uni_plugin::QName {
2718        uni_plugin::QName::parse(name).unwrap_or_else(|_| uni_plugin::QName::new("user", name))
2719    }
2720
2721    /// Apply `ReplacementScanProvider`-driven function rewrites to the
2722    /// query's AST. When the gate is off or no registry is attached, the
2723    /// walker is short-circuited and the query is returned unchanged.
2724    /// Otherwise, every [`uni_cypher::ast::Expr::FunctionCall`] is offered
2725    /// to registered providers (first-match wins); a returned
2726    /// `Replacement::Function(new_qname)` substitutes the name in place.
2727    /// Rewrite depth is capped at 1 — the rewritten name is NOT re-
2728    /// consulted (a chained `A→B→A` provider therefore stops after the
2729    /// first hop). Wrong-variant returns (`CatalogTable`, `Procedure`)
2730    /// error immediately.
2731    fn rewrite_function_calls_in_query(
2732        &self,
2733        query: uni_cypher::ast::Query,
2734    ) -> Result<uni_cypher::ast::Query> {
2735        if !self.replacement_scans_enabled || self.plugin_registry.is_none() {
2736            return Ok(query);
2737        }
2738        let mut rename = |name: &str| -> Result<Option<String>> {
2739            let qname = Self::qname_from_user(name);
2740            use uni_plugin::traits::catalog::{Replacement, ReplacementRequest};
2741            match self.consult_replacement_scan(ReplacementRequest::Function(&qname)) {
2742                Some(Replacement::Function(new_qname)) => {
2743                    // Cypher function-call dispatch is bare-name-keyed
2744                    // (the per-category translators in `df_expr` match on
2745                    // `name.to_uppercase()` against bare local strings —
2746                    // "UPPER", "ABS", etc.). When the provider returns a
2747                    // synthetic-namespace target (`builtin.*` or `user.*`),
2748                    // strip the namespace so the AST name is what those
2749                    // dispatchers expect; for plugin-namespaced targets,
2750                    // preserve the full dotted form (matches how users
2751                    // type them).
2752                    let rewritten = match new_qname.namespace() {
2753                        "builtin" | "user" => new_qname.local().to_string(),
2754                        _ => new_qname.to_string(),
2755                    };
2756                    tracing::debug!(
2757                        target: "uni.plugin.registry",
2758                        from = %name,
2759                        to = %rewritten,
2760                        "function call rerouted via ReplacementScanProvider"
2761                    );
2762                    Ok(Some(rewritten))
2763                }
2764                Some(other) => Err(anyhow!(
2765                    "ReplacementScanProvider returned wrong variant for Function \
2766                     request `{}`: expected `Function`, got {:?}",
2767                    name,
2768                    other
2769                )),
2770                None => Ok(None),
2771            }
2772        };
2773        crate::query::rewrite::function_rename::rewrite_function_calls_in_query(query, &mut rename)
2774    }
2775
2776    /// Plan a Cypher query with no pre-bound variables.
2777    pub fn plan(&self, query: Query) -> Result<LogicalPlan> {
2778        self.plan_with_scope(query, Vec::new())
2779    }
2780
2781    /// Plan a Cypher query with a set of externally pre-bound variable names.
2782    ///
2783    /// `vars` lists variable names already in scope before this query executes
2784    /// (e.g., from an enclosing Locy rule body).
2785    pub fn plan_with_scope(&self, query: Query, vars: Vec<String>) -> Result<LogicalPlan> {
2786        // Apply query rewrites before planning
2787        let rewritten_query = crate::query::rewrite::rewrite_query(query)?;
2788        // M5 follow-up #5: function-call rewrite via ReplacementScanProvider.
2789        // Done as an AST pass *before* planning so the rewritten name flows
2790        // through every downstream stage (translation, UDF resolution,
2791        // execution) as if the user had typed it. No-op when the gate is
2792        // off or no provider claims the call. First-match wins; hard-cap
2793        // at one rewrite per call site (the rewritten name is NOT re-
2794        // consulted) — see `rewrite_function_calls_in_query`.
2795        let rewritten_query = self.rewrite_function_calls_in_query(rewritten_query)?;
2796        if Self::has_mixed_union_modes(&rewritten_query) {
2797            return Err(anyhow!(
2798                "SyntaxError: InvalidClauseComposition - Cannot mix UNION and UNION ALL in the same query"
2799            ));
2800        }
2801
2802        match rewritten_query {
2803            Query::Single(stmt) => self.plan_single(stmt, vars),
2804            Query::Union { left, right, all } => {
2805                let l = self.plan_with_scope(*left, vars.clone())?;
2806                let r = self.plan_with_scope(*right, vars)?;
2807
2808                // Validate that both sides have the same column names
2809                let left_cols = Self::extract_projection_columns(&l);
2810                let right_cols = Self::extract_projection_columns(&r);
2811
2812                if left_cols != right_cols {
2813                    return Err(anyhow!(
2814                        "SyntaxError: DifferentColumnsInUnion - UNION queries must have same column names"
2815                    ));
2816                }
2817
2818                Ok(LogicalPlan::Union {
2819                    left: Box::new(l),
2820                    right: Box::new(r),
2821                    all,
2822                })
2823            }
2824            Query::Schema(cmd) => self.plan_schema_command(*cmd),
2825            Query::Explain(inner) => {
2826                let inner_plan = self.plan_with_scope(*inner, vars)?;
2827                Ok(LogicalPlan::Explain {
2828                    plan: Box::new(inner_plan),
2829                })
2830            }
2831            Query::TimeTravel { .. } => {
2832                unreachable!("TimeTravel should be resolved at API layer before planning")
2833            }
2834        }
2835    }
2836
2837    fn collect_union_modes(query: &Query, out: &mut HashSet<bool>) {
2838        match query {
2839            Query::Union { left, right, all } => {
2840                out.insert(*all);
2841                Self::collect_union_modes(left, out);
2842                Self::collect_union_modes(right, out);
2843            }
2844            Query::Explain(inner) => Self::collect_union_modes(inner, out),
2845            Query::TimeTravel { query, .. } => Self::collect_union_modes(query, out),
2846            Query::Single(_) | Query::Schema(_) => {}
2847        }
2848    }
2849
2850    fn has_mixed_union_modes(query: &Query) -> bool {
2851        let mut modes = HashSet::new();
2852        Self::collect_union_modes(query, &mut modes);
2853        modes.len() > 1
2854    }
2855
2856    fn next_anon_var(&self) -> String {
2857        let id = self
2858            .anon_counter
2859            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2860        format!("_anon_{}", id)
2861    }
2862
2863    /// Extract projection column names from a logical plan.
2864    /// Used for UNION column validation.
2865    fn extract_projection_columns(plan: &LogicalPlan) -> Vec<String> {
2866        match plan {
2867            LogicalPlan::Project { projections, .. } => projections
2868                .iter()
2869                .map(|(expr, alias)| alias.clone().unwrap_or_else(|| expr.to_string_repr()))
2870                .collect(),
2871            LogicalPlan::Limit { input, .. }
2872            | LogicalPlan::Sort { input, .. }
2873            | LogicalPlan::Distinct { input, .. }
2874            | LogicalPlan::Filter { input, .. } => Self::extract_projection_columns(input),
2875            LogicalPlan::Union { left, right, .. } => {
2876                let left_cols = Self::extract_projection_columns(left);
2877                if left_cols.is_empty() {
2878                    Self::extract_projection_columns(right)
2879                } else {
2880                    left_cols
2881                }
2882            }
2883            LogicalPlan::Aggregate {
2884                group_by,
2885                aggregates,
2886                ..
2887            } => {
2888                let mut cols: Vec<String> = group_by.iter().map(|e| e.to_string_repr()).collect();
2889                cols.extend(aggregates.iter().map(|e| e.to_string_repr()));
2890                cols
2891            }
2892            _ => Vec::new(),
2893        }
2894    }
2895
2896    fn plan_return_clause(
2897        &self,
2898        return_clause: &ReturnClause,
2899        plan: LogicalPlan,
2900        vars_in_scope: &[VariableInfo],
2901    ) -> Result<LogicalPlan> {
2902        let mut plan = plan;
2903        let mut group_by = Vec::new();
2904        let mut aggregates = Vec::new();
2905        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
2906        let mut has_agg = false;
2907        let mut projections = Vec::new();
2908        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
2909        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
2910        let mut projected_aliases: HashSet<String> = HashSet::new();
2911
2912        for item in &return_clause.items {
2913            match item {
2914                ReturnItem::All => {
2915                    // RETURN * - add all user-named variables in scope
2916                    // (anonymous variables like _anon_0 are excluded)
2917                    let user_vars: Vec<_> = vars_in_scope
2918                        .iter()
2919                        .filter(|v| !v.name.starts_with("_anon_"))
2920                        .collect();
2921                    if user_vars.is_empty() {
2922                        return Err(anyhow!(
2923                            "SyntaxError: NoVariablesInScope - RETURN * is not allowed when there are no variables in scope"
2924                        ));
2925                    }
2926                    for v in user_vars {
2927                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2928                        if !group_by.contains(&Expr::Variable(v.name.clone())) {
2929                            group_by.push(Expr::Variable(v.name.clone()));
2930                        }
2931                        projected_aliases.insert(v.name.clone());
2932                        projected_simple_reprs.insert(v.name.clone());
2933                    }
2934                }
2935                ReturnItem::Expr {
2936                    expr,
2937                    alias,
2938                    source_text,
2939                } => {
2940                    if matches!(expr, Expr::Wildcard) {
2941                        for v in vars_in_scope {
2942                            projections
2943                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
2944                            if !group_by.contains(&Expr::Variable(v.name.clone())) {
2945                                group_by.push(Expr::Variable(v.name.clone()));
2946                            }
2947                            projected_aliases.insert(v.name.clone());
2948                            projected_simple_reprs.insert(v.name.clone());
2949                        }
2950                    } else {
2951                        // Validate expression variables are defined
2952                        validate_expression_variables(expr, vars_in_scope)?;
2953                        // Validate function argument types and boolean operators
2954                        validate_expression(expr, vars_in_scope)?;
2955                        // Pattern predicates are not allowed in RETURN
2956                        if contains_pattern_predicate(expr) {
2957                            return Err(anyhow!(
2958                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in RETURN"
2959                            ));
2960                        }
2961
2962                        // Use source text as column name when no explicit alias
2963                        let effective_alias = alias.clone().or_else(|| source_text.clone());
2964                        projections.push((expr.clone(), effective_alias));
2965                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
2966                            // Bare aggregate — push directly
2967                            has_agg = true;
2968                            aggregates.push(expr.clone());
2969                            projected_aggregate_reprs.insert(expr.to_string_repr());
2970                        } else if !is_window_function(expr)
2971                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
2972                        {
2973                            // Compound aggregate or expression containing aggregates —
2974                            // extract the inner bare aggregates for the Aggregate node
2975                            has_agg = true;
2976                            compound_agg_exprs.push(expr.clone());
2977                            for inner in extract_inner_aggregates(expr) {
2978                                let repr = inner.to_string_repr();
2979                                if !projected_aggregate_reprs.contains(&repr) {
2980                                    aggregates.push(inner);
2981                                    projected_aggregate_reprs.insert(repr);
2982                                }
2983                            }
2984                        } else if !group_by.contains(expr) {
2985                            group_by.push(expr.clone());
2986                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
2987                                projected_simple_reprs.insert(expr.to_string_repr());
2988                            }
2989                        }
2990
2991                        if let Some(a) = alias {
2992                            if projected_aliases.contains(a) {
2993                                return Err(anyhow!(
2994                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
2995                                    a
2996                                ));
2997                            }
2998                            projected_aliases.insert(a.clone());
2999                        } else if let Expr::Variable(v) = expr {
3000                            if projected_aliases.contains(v) {
3001                                return Err(anyhow!(
3002                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in RETURN",
3003                                    v
3004                                ));
3005                            }
3006                            projected_aliases.insert(v.clone());
3007                        }
3008                    }
3009                }
3010            }
3011        }
3012
3013        // Validate compound aggregate expressions: non-aggregate refs must be
3014        // individually present in the group_by as simple variables or properties.
3015        if has_agg {
3016            let group_by_reprs: HashSet<String> =
3017                group_by.iter().map(|e| e.to_string_repr()).collect();
3018            for expr in &compound_agg_exprs {
3019                let mut refs = Vec::new();
3020                collect_non_aggregate_refs(expr, false, &mut refs);
3021                for r in &refs {
3022                    let is_covered = match r {
3023                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
3024                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
3025                    };
3026                    if !is_covered {
3027                        return Err(anyhow!(
3028                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
3029                        ));
3030                    }
3031                }
3032            }
3033        }
3034
3035        if has_agg {
3036            plan = LogicalPlan::Aggregate {
3037                input: Box::new(plan),
3038                group_by,
3039                aggregates,
3040            };
3041        }
3042
3043        let mut window_exprs = Vec::new();
3044        for (expr, _) in &projections {
3045            Self::collect_window_functions(expr, &mut window_exprs);
3046        }
3047
3048        if let Some(order_by) = &return_clause.order_by {
3049            for item in order_by {
3050                Self::collect_window_functions(&item.expr, &mut window_exprs);
3051            }
3052        }
3053
3054        let has_window_exprs = !window_exprs.is_empty();
3055
3056        if has_window_exprs {
3057            // Before creating the Window node, we need to ensure all properties
3058            // referenced by window functions are available. Create a Project node
3059            // that loads these properties.
3060            let mut props_needed_for_window: Vec<Expr> = Vec::new();
3061            for window_expr in &window_exprs {
3062                Self::collect_properties_from_expr(window_expr, &mut props_needed_for_window);
3063            }
3064
3065            // Also include non-window expressions from projections that might be needed
3066            // Preserve qualified names (e.g., "e.salary") as aliases for properties
3067            let non_window_projections: Vec<_> = projections
3068                .iter()
3069                .filter_map(|(expr, alias)| {
3070                    // Keep expressions that don't have window_spec
3071                    let keep = if let Expr::FunctionCall { window_spec, .. } = expr {
3072                        window_spec.is_none()
3073                    } else {
3074                        true
3075                    };
3076
3077                    if keep {
3078                        // For property references, use the qualified name as alias
3079                        let new_alias = if matches!(expr, Expr::Property(..)) {
3080                            Some(expr.to_string_repr())
3081                        } else {
3082                            alias.clone()
3083                        };
3084                        Some((expr.clone(), new_alias))
3085                    } else {
3086                        None
3087                    }
3088                })
3089                .collect();
3090
3091            if !non_window_projections.is_empty() || !props_needed_for_window.is_empty() {
3092                let mut intermediate_projections = non_window_projections;
3093                // Add any additional property references needed by window functions
3094                // IMPORTANT: Preserve qualified names (e.g., "e.salary") as aliases so window functions can reference them
3095                for prop in &props_needed_for_window {
3096                    if !intermediate_projections
3097                        .iter()
3098                        .any(|(e, _)| e.to_string_repr() == prop.to_string_repr())
3099                    {
3100                        let qualified_name = prop.to_string_repr();
3101                        intermediate_projections.push((prop.clone(), Some(qualified_name)));
3102                    }
3103                }
3104
3105                if !intermediate_projections.is_empty() {
3106                    plan = LogicalPlan::Project {
3107                        input: Box::new(plan),
3108                        projections: intermediate_projections,
3109                    };
3110                }
3111            }
3112
3113            // Transform property expressions in window functions to use qualified variable names
3114            // so that e.dept becomes "e.dept" variable that can be looked up from the row HashMap
3115            let transformed_window_exprs: Vec<Expr> = window_exprs
3116                .into_iter()
3117                .map(Self::transform_window_expr_properties)
3118                .collect();
3119
3120            plan = LogicalPlan::Window {
3121                input: Box::new(plan),
3122                window_exprs: transformed_window_exprs,
3123            };
3124        }
3125
3126        if let Some(order_by) = &return_clause.order_by {
3127            let alias_exprs: HashMap<String, Expr> = projections
3128                .iter()
3129                .filter_map(|(expr, alias)| {
3130                    alias.as_ref().map(|a| {
3131                        // ORDER BY is planned before the final RETURN projection.
3132                        // In aggregate contexts, aliases must resolve to the
3133                        // post-aggregate output columns, not raw aggregate calls.
3134                        let rewritten = if has_agg && !has_window_exprs {
3135                            if expr.is_aggregate() && !is_compound_aggregate(expr) {
3136                                Expr::Variable(aggregate_column_name(expr))
3137                            } else if is_compound_aggregate(expr)
3138                                || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
3139                            {
3140                                replace_aggregates_with_columns(expr)
3141                            } else {
3142                                Expr::Variable(expr.to_string_repr())
3143                            }
3144                        } else {
3145                            expr.clone()
3146                        };
3147                        (a.clone(), rewritten)
3148                    })
3149                })
3150                .collect();
3151
3152            // Build an extended scope that includes RETURN aliases so ORDER BY
3153            // can reference them (e.g. RETURN n.age AS age ORDER BY age).
3154            let order_by_scope: Vec<VariableInfo> = if return_clause.distinct {
3155                // DISTINCT in RETURN narrows ORDER BY visibility to returned columns.
3156                // Keep aliases and directly returned variables in scope.
3157                let mut scope = Vec::new();
3158                for (expr, alias) in &projections {
3159                    if let Some(a) = alias
3160                        && !is_var_in_scope(&scope, a)
3161                    {
3162                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3163                    }
3164                    if let Expr::Variable(v) = expr
3165                        && !is_var_in_scope(&scope, v)
3166                    {
3167                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3168                    }
3169                }
3170                scope
3171            } else {
3172                let mut scope = vars_in_scope.to_vec();
3173                for (expr, alias) in &projections {
3174                    if let Some(a) = alias
3175                        && !is_var_in_scope(&scope, a)
3176                    {
3177                        scope.push(VariableInfo::new(a.clone(), VariableType::Scalar));
3178                    } else if let Expr::Variable(v) = expr
3179                        && !is_var_in_scope(&scope, v)
3180                    {
3181                        scope.push(VariableInfo::new(v.clone(), VariableType::Scalar));
3182                    }
3183                }
3184                scope
3185            };
3186            // Validate ORDER BY expressions against the extended scope
3187            for item in order_by {
3188                // DISTINCT allows ORDER BY on the same projected expression
3189                // even when underlying variables are not otherwise visible.
3190                let matches_projected_expr = return_clause.distinct
3191                    && projections
3192                        .iter()
3193                        .any(|(expr, _)| expr.to_string_repr() == item.expr.to_string_repr());
3194                if !matches_projected_expr {
3195                    validate_expression_variables(&item.expr, &order_by_scope)?;
3196                    validate_expression(&item.expr, &order_by_scope)?;
3197                }
3198                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
3199                if has_aggregate_in_item && !has_agg {
3200                    return Err(anyhow!(
3201                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY after RETURN"
3202                    ));
3203                }
3204                if has_agg && has_aggregate_in_item {
3205                    validate_with_order_by_aggregate_item(
3206                        &item.expr,
3207                        &projected_aggregate_reprs,
3208                        &projected_simple_reprs,
3209                        &projected_aliases,
3210                    )?;
3211                }
3212            }
3213            let rewritten_order_by: Vec<SortItem> = order_by
3214                .iter()
3215                .map(|item| SortItem {
3216                    expr: {
3217                        let mut rewritten =
3218                            rewrite_order_by_expr_with_aliases(&item.expr, &alias_exprs);
3219                        if has_agg && !has_window_exprs {
3220                            rewritten = replace_aggregates_with_columns(&rewritten);
3221                        }
3222                        rewritten
3223                    },
3224                    ascending: item.ascending,
3225                })
3226                .collect();
3227            plan = LogicalPlan::Sort {
3228                input: Box::new(plan),
3229                order_by: rewritten_order_by,
3230            };
3231        }
3232
3233        if return_clause.skip.is_some() || return_clause.limit.is_some() {
3234            let skip = return_clause
3235                .skip
3236                .as_ref()
3237                .map(|e| {
3238                    self.note_folded_limit_skip(e);
3239                    parse_non_negative_integer(e, "SKIP", &self.params)
3240                })
3241                .transpose()?
3242                .flatten();
3243            let fetch = return_clause
3244                .limit
3245                .as_ref()
3246                .map(|e| {
3247                    self.note_folded_limit_skip(e);
3248                    parse_non_negative_integer(e, "LIMIT", &self.params)
3249                })
3250                .transpose()?
3251                .flatten();
3252
3253            plan = LogicalPlan::Limit {
3254                input: Box::new(plan),
3255                skip,
3256                fetch,
3257            };
3258        }
3259
3260        if !projections.is_empty() {
3261            // If we created an Aggregate or Window node, we need to adjust the final projections
3262            // to reference aggregate/window function results as columns instead of re-evaluating them
3263            let final_projections = if has_agg || has_window_exprs {
3264                projections
3265                    .into_iter()
3266                    .map(|(expr, alias)| {
3267                        // Check if this expression is an aggregate function
3268                        if expr.is_aggregate() && !is_compound_aggregate(&expr) && !has_window_exprs
3269                        {
3270                            // Bare aggregate — replace with column reference
3271                            let col_name = aggregate_column_name(&expr);
3272                            (Expr::Variable(col_name), alias)
3273                        } else if !has_window_exprs
3274                            && (is_compound_aggregate(&expr)
3275                                || (!expr.is_aggregate() && contains_aggregate_recursive(&expr)))
3276                        {
3277                            // Compound aggregate — replace inner aggregates with
3278                            // column references, keep outer expression for Project
3279                            (replace_aggregates_with_columns(&expr), alias)
3280                        }
3281                        // For grouped RETURN projections, reference the pre-computed
3282                        // group-by output column instead of re-evaluating the expression
3283                        // against the aggregate schema (which no longer has original vars).
3284                        else if has_agg
3285                            && !has_window_exprs
3286                            && !matches!(expr, Expr::Variable(_) | Expr::Property(_, _))
3287                        {
3288                            (Expr::Variable(expr.to_string_repr()), alias)
3289                        }
3290                        // Check if this expression is a window function
3291                        else if let Expr::FunctionCall {
3292                            window_spec: Some(_),
3293                            ..
3294                        } = &expr
3295                        {
3296                            // Replace window function with a column reference to its result
3297                            // The column name in the Window output is the full expression string
3298                            let window_col_name = expr.to_string_repr();
3299                            // Keep the original alias for the final output
3300                            (Expr::Variable(window_col_name), alias)
3301                        } else {
3302                            (expr, alias)
3303                        }
3304                    })
3305                    .collect()
3306            } else {
3307                projections
3308            };
3309
3310            plan = LogicalPlan::Project {
3311                input: Box::new(plan),
3312                projections: final_projections,
3313            };
3314        }
3315
3316        if return_clause.distinct {
3317            plan = LogicalPlan::Distinct {
3318                input: Box::new(plan),
3319            };
3320        }
3321
3322        Ok(plan)
3323    }
3324
3325    fn plan_single(&self, query: Statement, initial_vars: Vec<String>) -> Result<LogicalPlan> {
3326        let typed_vars: Vec<VariableInfo> = initial_vars
3327            .into_iter()
3328            .map(|name| VariableInfo::new(name, VariableType::Imported))
3329            .collect();
3330        self.plan_single_typed(query, typed_vars)
3331    }
3332
3333    /// Rewrite a query then plan it, preserving typed variable scope when possible.
3334    ///
3335    /// For `Query::Single` statements, uses `plan_single_typed` to carry typed
3336    /// variable info through and avoid false type-conflict errors in subqueries.
3337    /// For unions and other compound queries, falls back to `plan_with_scope`.
3338    fn rewrite_and_plan_typed(
3339        &self,
3340        query: Query,
3341        typed_vars: &[VariableInfo],
3342    ) -> Result<LogicalPlan> {
3343        let rewritten = crate::query::rewrite::rewrite_query(query)?;
3344        match rewritten {
3345            Query::Single(stmt) => self.plan_single_typed(stmt, typed_vars.to_vec()),
3346            other => self.plan_with_scope(other, vars_to_strings(typed_vars)),
3347        }
3348    }
3349
3350    fn plan_single_typed(
3351        &self,
3352        query: Statement,
3353        initial_vars: Vec<VariableInfo>,
3354    ) -> Result<LogicalPlan> {
3355        let mut plan = LogicalPlan::Empty;
3356
3357        if !initial_vars.is_empty() {
3358            // Project bound variables from outer scope as parameters.
3359            // These come from the enclosing query's row (passed as sub_params in EXISTS evaluation).
3360            // Use Parameter expressions to read from params, not Variable which would read from input row.
3361            let projections = initial_vars
3362                .iter()
3363                .map(|v| (Expr::Parameter(v.name.clone()), Some(v.name.clone())))
3364                .collect();
3365            plan = LogicalPlan::Project {
3366                input: Box::new(plan),
3367                projections,
3368            };
3369        }
3370
3371        let mut vars_in_scope: Vec<VariableInfo> = initial_vars;
3372        // Track variables introduced by CREATE clauses so we can distinguish
3373        // MATCH-introduced variables (which cannot be re-created as bare nodes)
3374        // from CREATE-introduced variables (which can be referenced as bare nodes).
3375        let mut create_introduced_vars: HashSet<String> = HashSet::new();
3376        // Track variables targeted by DELETE so we can reject property/label
3377        // access on deleted entities in subsequent RETURN clauses.
3378        let mut deleted_vars: HashSet<String> = HashSet::new();
3379
3380        let clause_count = query.clauses.len();
3381        for (clause_idx, clause) in query.clauses.into_iter().enumerate() {
3382            match clause {
3383                Clause::Match(match_clause) => {
3384                    plan = self.plan_match_clause(&match_clause, plan, &mut vars_in_scope)?;
3385                }
3386                Clause::Unwind(unwind) => {
3387                    plan = LogicalPlan::Unwind {
3388                        input: Box::new(plan),
3389                        expr: unwind.expr.clone(),
3390                        variable: unwind.variable.clone(),
3391                    };
3392                    let unwind_out_type = infer_unwind_output_type(&unwind.expr, &vars_in_scope);
3393                    add_var_to_scope(&mut vars_in_scope, &unwind.variable, unwind_out_type)?;
3394                }
3395                Clause::Call(call_clause) => {
3396                    match &call_clause.kind {
3397                        CallKind::Procedure {
3398                            procedure,
3399                            arguments,
3400                        } => {
3401                            // Validate that procedure arguments don't contain aggregation functions
3402                            for arg in arguments {
3403                                if contains_aggregate_recursive(arg) {
3404                                    return Err(anyhow!(
3405                                        "SyntaxError: InvalidAggregation - Aggregation expressions are not allowed as arguments to procedure calls"
3406                                    ));
3407                                }
3408                            }
3409
3410                            let has_yield_star = call_clause.yield_items.len() == 1
3411                                && call_clause.yield_items[0].name == "*"
3412                                && call_clause.yield_items[0].alias.is_none();
3413                            if has_yield_star && clause_idx + 1 < clause_count {
3414                                return Err(anyhow!(
3415                                    "SyntaxError: UnexpectedSyntax - YIELD * is only allowed in standalone procedure calls"
3416                                ));
3417                            }
3418
3419                            // Validate for duplicate yield names (VariableAlreadyBound)
3420                            let mut yield_names = Vec::new();
3421                            for item in &call_clause.yield_items {
3422                                if item.name == "*" {
3423                                    continue;
3424                                }
3425                                let output_name = item.alias.as_ref().unwrap_or(&item.name);
3426                                if yield_names.contains(output_name) {
3427                                    return Err(anyhow!(
3428                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already appears in YIELD clause",
3429                                        output_name
3430                                    ));
3431                                }
3432                                // Check against existing scope (in-query CALL must not shadow)
3433                                if clause_idx > 0
3434                                    && vars_in_scope.iter().any(|v| v.name == *output_name)
3435                                {
3436                                    return Err(anyhow!(
3437                                        "SyntaxError: VariableAlreadyBound - Variable '{}' already declared in outer scope",
3438                                        output_name
3439                                    ));
3440                                }
3441                                yield_names.push(output_name.clone());
3442                            }
3443
3444                            let mut yields = Vec::new();
3445                            for item in &call_clause.yield_items {
3446                                if item.name == "*" {
3447                                    continue;
3448                                }
3449                                yields.push((item.name.clone(), item.alias.clone()));
3450                                let var_name = item.alias.as_ref().unwrap_or(&item.name);
3451                                // Use Imported because procedure return types are unknown
3452                                // at plan time (could be nodes, edges, or scalars)
3453                                add_var_to_scope(
3454                                    &mut vars_in_scope,
3455                                    var_name,
3456                                    VariableType::Imported,
3457                                )?;
3458                            }
3459                            // M5 follow-up #5: if replacement-scan dispatch is
3460                            // enabled and the procedure name does not resolve
3461                            // against the plugin registry, consult registered
3462                            // `ReplacementScanProvider`s. A `Replacement::Procedure`
3463                            // substitutes the call's target name in the logical
3464                            // plan; the rewritten name must itself resolve or
3465                            // we error immediately (no second-tier consult — caps
3466                            // rewrite depth at one).
3467                            let procedure_name = if self.replacement_scans_enabled
3468                                && !self.procedure_resolves(procedure)
3469                            {
3470                                use uni_plugin::traits::catalog::{
3471                                    Replacement, ReplacementRequest,
3472                                };
3473                                let qname = Self::qname_from_user(procedure);
3474                                match self
3475                                    .consult_replacement_scan(ReplacementRequest::Procedure(&qname))
3476                                {
3477                                    Some(Replacement::Procedure(new_qname)) => {
3478                                        let rewritten = new_qname.to_string();
3479                                        if !self.procedure_resolves(&rewritten) {
3480                                            return Err(anyhow!(
3481                                                "ReplacementScanProvider rerouted procedure \
3482                                                 `{}` to `{}`, which also did not resolve",
3483                                                procedure,
3484                                                rewritten
3485                                            ));
3486                                        }
3487                                        tracing::debug!(
3488                                            target: "uni.plugin.registry",
3489                                            from = %procedure,
3490                                            to = %rewritten,
3491                                            "procedure rerouted via ReplacementScanProvider"
3492                                        );
3493                                        rewritten
3494                                    }
3495                                    Some(other) => {
3496                                        return Err(anyhow!(
3497                                            "ReplacementScanProvider returned wrong variant \
3498                                             for Procedure request `{}`: expected \
3499                                             `Procedure`, got {:?}",
3500                                            procedure,
3501                                            other
3502                                        ));
3503                                    }
3504                                    None => procedure.clone(),
3505                                }
3506                            } else {
3507                                procedure.clone()
3508                            };
3509                            let proc_plan = LogicalPlan::ProcedureCall {
3510                                procedure_name,
3511                                arguments: arguments.clone(),
3512                                yield_items: yields.clone(),
3513                            };
3514
3515                            if matches!(plan, LogicalPlan::Empty) {
3516                                // Standalone CALL (first clause) — use directly
3517                                plan = proc_plan;
3518                            } else if yields.is_empty() {
3519                                // In-query CALL with no YIELD (void procedure):
3520                                // preserve the input rows unchanged
3521                            } else {
3522                                // In-query CALL with YIELD: cross-join input × procedure output
3523                                plan = LogicalPlan::Apply {
3524                                    input: Box::new(plan),
3525                                    subquery: Box::new(proc_plan),
3526                                    input_filter: None,
3527                                };
3528                            }
3529                        }
3530                        CallKind::Subquery(query) => {
3531                            let subquery_plan =
3532                                self.rewrite_and_plan_typed(*query.clone(), &vars_in_scope)?;
3533
3534                            // Extract variables from subquery RETURN clause
3535                            let subquery_vars = Self::collect_plan_variables(&subquery_plan);
3536
3537                            // Add new variables to scope (as Scalar since they come from subquery projection)
3538                            for var in subquery_vars {
3539                                if !is_var_in_scope(&vars_in_scope, &var) {
3540                                    add_var_to_scope(
3541                                        &mut vars_in_scope,
3542                                        &var,
3543                                        VariableType::Scalar,
3544                                    )?;
3545                                }
3546                            }
3547
3548                            plan = LogicalPlan::SubqueryCall {
3549                                input: Box::new(plan),
3550                                subquery: Box::new(subquery_plan),
3551                            };
3552                        }
3553                    }
3554                }
3555                Clause::Merge(merge_clause) => {
3556                    validate_merge_clause(&merge_clause, &vars_in_scope)?;
3557                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3558                    // read-only — reject MERGE that names one.
3559                    let merge_labels = collect_pattern_labels(&merge_clause.pattern);
3560                    self.reject_virtual_label_writes(&merge_labels, "MERGE")?;
3561
3562                    plan = LogicalPlan::Merge {
3563                        input: Box::new(plan),
3564                        pattern: merge_clause.pattern.clone(),
3565                        on_match: Some(SetClause {
3566                            items: merge_clause.on_match.clone(),
3567                        }),
3568                        on_create: Some(SetClause {
3569                            items: merge_clause.on_create.clone(),
3570                        }),
3571                    };
3572
3573                    for path in &merge_clause.pattern.paths {
3574                        if let Some(path_var) = &path.variable
3575                            && !path_var.is_empty()
3576                            && !is_var_in_scope(&vars_in_scope, path_var)
3577                        {
3578                            add_var_to_scope(&mut vars_in_scope, path_var, VariableType::Path)?;
3579                        }
3580                        for element in &path.elements {
3581                            if let PatternElement::Node(n) = element {
3582                                if let Some(v) = &n.variable
3583                                    && !is_var_in_scope(&vars_in_scope, v)
3584                                {
3585                                    add_var_to_scope(&mut vars_in_scope, v, VariableType::Node)?;
3586                                }
3587                            } else if let PatternElement::Relationship(r) = element
3588                                && let Some(v) = &r.variable
3589                                && !is_var_in_scope(&vars_in_scope, v)
3590                            {
3591                                add_var_to_scope(&mut vars_in_scope, v, VariableType::Edge)?;
3592                            }
3593                        }
3594                    }
3595                }
3596                Clause::Create(create_clause) => {
3597                    // M5 follow-up #6: virtual (catalog-resolved) labels are
3598                    // read-only — reject CREATE that names one.
3599                    let create_labels = collect_pattern_labels(&create_clause.pattern);
3600                    self.reject_virtual_label_writes(&create_labels, "CREATE")?;
3601                    // Validate CREATE patterns:
3602                    // - Nodes with labels/properties are "creations" - can't rebind existing variables
3603                    // - Bare nodes (v) are "references" if bound, "creations" if not
3604                    // - Relationships are always creations - can't rebind
3605                    // - Within CREATE, each new variable can only be defined once
3606                    // - Variables used in properties must be defined
3607                    let mut create_vars: Vec<&str> = Vec::new();
3608                    for path in &create_clause.pattern.paths {
3609                        let is_standalone_node = path.elements.len() == 1;
3610                        for element in &path.elements {
3611                            match element {
3612                                PatternElement::Node(n) => {
3613                                    validate_property_variables(
3614                                        &n.properties,
3615                                        &vars_in_scope,
3616                                        &create_vars,
3617                                    )?;
3618
3619                                    if let Some(v) = n.variable.as_deref()
3620                                        && !v.is_empty()
3621                                    {
3622                                        // A node is a "creation" if it has labels or properties
3623                                        let is_creation =
3624                                            !n.labels.is_empty() || n.properties.is_some();
3625
3626                                        if is_creation {
3627                                            check_not_already_bound(
3628                                                v,
3629                                                &vars_in_scope,
3630                                                &create_vars,
3631                                            )?;
3632                                            create_vars.push(v);
3633                                        } else if is_standalone_node
3634                                            && is_var_in_scope(&vars_in_scope, v)
3635                                            && !create_introduced_vars.contains(v)
3636                                        {
3637                                            // Standalone bare node referencing a variable from a
3638                                            // non-CREATE clause (e.g. MATCH (a) CREATE (a)) — invalid.
3639                                            // Bare nodes used as relationship endpoints
3640                                            // (e.g. CREATE (a)-[:R]->(b)) are valid references.
3641                                            return Err(anyhow!(
3642                                                "SyntaxError: VariableAlreadyBound - '{}'",
3643                                                v
3644                                            ));
3645                                        } else if !create_vars.contains(&v) {
3646                                            // New bare variable — register it
3647                                            create_vars.push(v);
3648                                        }
3649                                        // else: bare reference to same-CREATE or previous-CREATE variable — OK
3650                                    }
3651                                }
3652                                PatternElement::Relationship(r) => {
3653                                    validate_property_variables(
3654                                        &r.properties,
3655                                        &vars_in_scope,
3656                                        &create_vars,
3657                                    )?;
3658
3659                                    if let Some(v) = r.variable.as_deref()
3660                                        && !v.is_empty()
3661                                    {
3662                                        check_not_already_bound(v, &vars_in_scope, &create_vars)?;
3663                                        create_vars.push(v);
3664                                    }
3665
3666                                    // Validate relationship constraints for CREATE
3667                                    if r.types.len() != 1 {
3668                                        return Err(anyhow!(
3669                                            "SyntaxError: NoSingleRelationshipType - Exactly one relationship type required for CREATE"
3670                                        ));
3671                                    }
3672                                    if r.direction == Direction::Both {
3673                                        return Err(anyhow!(
3674                                            "SyntaxError: RequiresDirectedRelationship - Only directed relationships are supported in CREATE"
3675                                        ));
3676                                    }
3677                                    if r.range.is_some() {
3678                                        return Err(anyhow!(
3679                                            "SyntaxError: CreatingVarLength - Variable length relationships cannot be created"
3680                                        ));
3681                                    }
3682                                }
3683                                PatternElement::Parenthesized { .. } => {}
3684                            }
3685                        }
3686                    }
3687
3688                    // Batch consecutive CREATEs to avoid deep recursion
3689                    match &mut plan {
3690                        LogicalPlan::CreateBatch { patterns, .. } => {
3691                            // Append to existing batch
3692                            patterns.push(create_clause.pattern.clone());
3693                        }
3694                        LogicalPlan::Create { input, pattern } => {
3695                            // Convert single Create to CreateBatch with both patterns
3696                            let first_pattern = pattern.clone();
3697                            plan = LogicalPlan::CreateBatch {
3698                                input: input.clone(),
3699                                patterns: vec![first_pattern, create_clause.pattern.clone()],
3700                            };
3701                        }
3702                        _ => {
3703                            // Start new Create (may become batch if more CREATEs follow)
3704                            plan = LogicalPlan::Create {
3705                                input: Box::new(plan),
3706                                pattern: create_clause.pattern.clone(),
3707                            };
3708                        }
3709                    }
3710                    // Add variables from created nodes and relationships to scope
3711                    for path in &create_clause.pattern.paths {
3712                        for element in &path.elements {
3713                            match element {
3714                                PatternElement::Node(n) => {
3715                                    if let Some(var) = &n.variable
3716                                        && !var.is_empty()
3717                                    {
3718                                        create_introduced_vars.insert(var.clone());
3719                                        add_var_to_scope(
3720                                            &mut vars_in_scope,
3721                                            var,
3722                                            VariableType::Node,
3723                                        )?;
3724                                    }
3725                                }
3726                                PatternElement::Relationship(r) => {
3727                                    if let Some(var) = &r.variable
3728                                        && !var.is_empty()
3729                                    {
3730                                        create_introduced_vars.insert(var.clone());
3731                                        add_var_to_scope(
3732                                            &mut vars_in_scope,
3733                                            var,
3734                                            VariableType::Edge,
3735                                        )?;
3736                                    }
3737                                }
3738                                PatternElement::Parenthesized { .. } => {
3739                                    // Skip for now - not commonly used in CREATE
3740                                }
3741                            }
3742                        }
3743                    }
3744                }
3745                Clause::Set(set_clause) => {
3746                    // Validate SET value expressions
3747                    for item in &set_clause.items {
3748                        match item {
3749                            SetItem::Property { value, .. }
3750                            | SetItem::Variable { value, .. }
3751                            | SetItem::VariablePlus { value, .. } => {
3752                                validate_expression_variables(value, &vars_in_scope)?;
3753                                validate_expression(value, &vars_in_scope)?;
3754                                if contains_pattern_predicate(value) {
3755                                    return Err(anyhow!(
3756                                        "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in SET"
3757                                    ));
3758                                }
3759                            }
3760                            SetItem::Labels { .. } => {}
3761                        }
3762                    }
3763                    plan = LogicalPlan::Set {
3764                        input: Box::new(plan),
3765                        items: set_clause.items.clone(),
3766                    };
3767                }
3768                Clause::Remove(remove_clause) => {
3769                    plan = LogicalPlan::Remove {
3770                        input: Box::new(plan),
3771                        items: remove_clause.items.clone(),
3772                    };
3773                }
3774                Clause::Delete(delete_clause) => {
3775                    // Validate DELETE targets
3776                    for item in &delete_clause.items {
3777                        // DELETE n:Label is invalid syntax (label expressions not allowed)
3778                        if matches!(item, Expr::LabelCheck { .. }) {
3779                            return Err(anyhow!(
3780                                "SyntaxError: InvalidDelete - DELETE requires a simple variable reference, not a label expression"
3781                            ));
3782                        }
3783                        let vars_used = collect_expr_variables(item);
3784                        // Reject expressions with no variable references (e.g. DELETE 1+1)
3785                        if vars_used.is_empty() {
3786                            return Err(anyhow!(
3787                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, not a literal expression"
3788                            ));
3789                        }
3790                        for var in &vars_used {
3791                            // Check if variable is defined
3792                            if find_var_in_scope(&vars_in_scope, var).is_none() {
3793                                return Err(anyhow!(
3794                                    "SyntaxError: UndefinedVariable - Variable '{}' not defined",
3795                                    var
3796                                ));
3797                            }
3798                        }
3799                        // Strict type check only for simple variable references —
3800                        // complex expressions (property access, array index, etc.)
3801                        // may resolve to a node/edge at runtime even if the base
3802                        // variable is typed as Scalar (e.g. nodes(p)[0]).
3803                        if let Expr::Variable(name) = item
3804                            && let Some(info) = find_var_in_scope(&vars_in_scope, name)
3805                            && matches!(
3806                                info.var_type,
3807                                VariableType::Scalar | VariableType::ScalarLiteral
3808                            )
3809                        {
3810                            return Err(anyhow!(
3811                                "SyntaxError: InvalidArgumentType - DELETE requires node or relationship, '{}' is a scalar value",
3812                                name
3813                            ));
3814                        }
3815                    }
3816                    // Track deleted variables for later validation
3817                    for item in &delete_clause.items {
3818                        if let Expr::Variable(name) = item {
3819                            deleted_vars.insert(name.clone());
3820                        }
3821                    }
3822                    plan = LogicalPlan::Delete {
3823                        input: Box::new(plan),
3824                        items: delete_clause.items.clone(),
3825                        detach: delete_clause.detach,
3826                    };
3827                }
3828                Clause::With(with_clause) => {
3829                    let (new_plan, new_vars) =
3830                        self.plan_with_clause(&with_clause, plan, &vars_in_scope)?;
3831                    plan = new_plan;
3832                    vars_in_scope = new_vars;
3833                }
3834                Clause::WithRecursive(with_recursive) => {
3835                    // Plan the recursive CTE
3836                    plan = self.plan_with_recursive(&with_recursive, plan, &vars_in_scope)?;
3837                    // Add the CTE name to the scope (as Scalar since it's a table reference)
3838                    add_var_to_scope(
3839                        &mut vars_in_scope,
3840                        &with_recursive.name,
3841                        VariableType::Scalar,
3842                    )?;
3843                }
3844                Clause::Return(return_clause) => {
3845                    // Check for property/label access on deleted entities
3846                    if !deleted_vars.is_empty() {
3847                        for item in &return_clause.items {
3848                            if let ReturnItem::Expr { expr, .. } = item {
3849                                validate_no_deleted_entity_access(expr, &deleted_vars)?;
3850                            }
3851                        }
3852                    }
3853                    plan = self.plan_return_clause(&return_clause, plan, &vars_in_scope)?;
3854                } // All Clause variants are handled above - no catch-all needed
3855            }
3856        }
3857
3858        // Wrap write operations without RETURN in Limit(0) per OpenCypher spec.
3859        // CREATE (n) should return 0 rows, but CREATE (n) RETURN n should return 1 row.
3860        // If RETURN was used, the plan will have been wrapped in Project, so we only
3861        // wrap terminal Create/CreateBatch/Delete/Set/Remove nodes.
3862        let plan = match &plan {
3863            LogicalPlan::Create { .. }
3864            | LogicalPlan::CreateBatch { .. }
3865            | LogicalPlan::Delete { .. }
3866            | LogicalPlan::Set { .. }
3867            | LogicalPlan::Remove { .. }
3868            | LogicalPlan::Merge { .. } => LogicalPlan::Limit {
3869                input: Box::new(plan),
3870                skip: None,
3871                fetch: Some(0),
3872            },
3873            _ => plan,
3874        };
3875
3876        Ok(plan)
3877    }
3878
3879    fn collect_properties_from_expr(expr: &Expr, collected: &mut Vec<Expr>) {
3880        match expr {
3881            Expr::Property(_, _)
3882                if !collected
3883                    .iter()
3884                    .any(|e| e.to_string_repr() == expr.to_string_repr()) =>
3885            {
3886                collected.push(expr.clone());
3887            }
3888            Expr::Property(_, _) => {}
3889            Expr::Variable(_) => {
3890                // Variables are already available, don't need to project them
3891            }
3892            Expr::BinaryOp { left, right, .. } => {
3893                Self::collect_properties_from_expr(left, collected);
3894                Self::collect_properties_from_expr(right, collected);
3895            }
3896            Expr::FunctionCall {
3897                args, window_spec, ..
3898            } => {
3899                for arg in args {
3900                    Self::collect_properties_from_expr(arg, collected);
3901                }
3902                if let Some(spec) = window_spec {
3903                    for partition_expr in &spec.partition_by {
3904                        Self::collect_properties_from_expr(partition_expr, collected);
3905                    }
3906                    for sort_item in &spec.order_by {
3907                        Self::collect_properties_from_expr(&sort_item.expr, collected);
3908                    }
3909                }
3910            }
3911            Expr::List(items) => {
3912                for item in items {
3913                    Self::collect_properties_from_expr(item, collected);
3914                }
3915            }
3916            Expr::UnaryOp { expr: e, .. }
3917            | Expr::IsNull(e)
3918            | Expr::IsNotNull(e)
3919            | Expr::IsUnique(e) => {
3920                Self::collect_properties_from_expr(e, collected);
3921            }
3922            Expr::Case {
3923                expr,
3924                when_then,
3925                else_expr,
3926            } => {
3927                if let Some(e) = expr {
3928                    Self::collect_properties_from_expr(e, collected);
3929                }
3930                for (w, t) in when_then {
3931                    Self::collect_properties_from_expr(w, collected);
3932                    Self::collect_properties_from_expr(t, collected);
3933                }
3934                if let Some(e) = else_expr {
3935                    Self::collect_properties_from_expr(e, collected);
3936                }
3937            }
3938            Expr::In { expr, list } => {
3939                Self::collect_properties_from_expr(expr, collected);
3940                Self::collect_properties_from_expr(list, collected);
3941            }
3942            Expr::ArrayIndex { array, index } => {
3943                Self::collect_properties_from_expr(array, collected);
3944                Self::collect_properties_from_expr(index, collected);
3945            }
3946            Expr::ArraySlice { array, start, end } => {
3947                Self::collect_properties_from_expr(array, collected);
3948                if let Some(s) = start {
3949                    Self::collect_properties_from_expr(s, collected);
3950                }
3951                if let Some(e) = end {
3952                    Self::collect_properties_from_expr(e, collected);
3953                }
3954            }
3955            _ => {}
3956        }
3957    }
3958
3959    fn collect_window_functions(expr: &Expr, collected: &mut Vec<Expr>) {
3960        if let Expr::FunctionCall { window_spec, .. } = expr {
3961            // Collect any function with a window spec (OVER clause)
3962            if window_spec.is_some() {
3963                if !collected
3964                    .iter()
3965                    .any(|e| e.to_string_repr() == expr.to_string_repr())
3966                {
3967                    collected.push(expr.clone());
3968                }
3969                return;
3970            }
3971        }
3972
3973        match expr {
3974            Expr::BinaryOp { left, right, .. } => {
3975                Self::collect_window_functions(left, collected);
3976                Self::collect_window_functions(right, collected);
3977            }
3978            Expr::FunctionCall { args, .. } => {
3979                for arg in args {
3980                    Self::collect_window_functions(arg, collected);
3981                }
3982            }
3983            Expr::List(items) => {
3984                for i in items {
3985                    Self::collect_window_functions(i, collected);
3986                }
3987            }
3988            Expr::Map(items) => {
3989                for (_, i) in items {
3990                    Self::collect_window_functions(i, collected);
3991                }
3992            }
3993            Expr::IsNull(e) | Expr::IsNotNull(e) | Expr::UnaryOp { expr: e, .. } => {
3994                Self::collect_window_functions(e, collected);
3995            }
3996            Expr::Case {
3997                expr,
3998                when_then,
3999                else_expr,
4000            } => {
4001                if let Some(e) = expr {
4002                    Self::collect_window_functions(e, collected);
4003                }
4004                for (w, t) in when_then {
4005                    Self::collect_window_functions(w, collected);
4006                    Self::collect_window_functions(t, collected);
4007                }
4008                if let Some(e) = else_expr {
4009                    Self::collect_window_functions(e, collected);
4010                }
4011            }
4012            Expr::Reduce {
4013                init, list, expr, ..
4014            } => {
4015                Self::collect_window_functions(init, collected);
4016                Self::collect_window_functions(list, collected);
4017                Self::collect_window_functions(expr, collected);
4018            }
4019            Expr::Quantifier {
4020                list, predicate, ..
4021            } => {
4022                Self::collect_window_functions(list, collected);
4023                Self::collect_window_functions(predicate, collected);
4024            }
4025            Expr::In { expr, list } => {
4026                Self::collect_window_functions(expr, collected);
4027                Self::collect_window_functions(list, collected);
4028            }
4029            Expr::ArrayIndex { array, index } => {
4030                Self::collect_window_functions(array, collected);
4031                Self::collect_window_functions(index, collected);
4032            }
4033            Expr::ArraySlice { array, start, end } => {
4034                Self::collect_window_functions(array, collected);
4035                if let Some(s) = start {
4036                    Self::collect_window_functions(s, collected);
4037                }
4038                if let Some(e) = end {
4039                    Self::collect_window_functions(e, collected);
4040                }
4041            }
4042            Expr::Property(e, _) => Self::collect_window_functions(e, collected),
4043            Expr::CountSubquery(_) | Expr::Exists { .. } => {}
4044            _ => {}
4045        }
4046    }
4047
4048    /// Transform property expressions in manual window functions to use qualified variable names.
4049    ///
4050    /// Converts `Expr::Property(Expr::Variable("e"), "dept")` to `Expr::Variable("e.dept")`
4051    /// so the executor can look up values directly from the row HashMap after the
4052    /// intermediate projection has materialized these properties with qualified names.
4053    ///
4054    /// Transforms ALL window functions (both manual and aggregate).
4055    /// Properties like `e.dept` become variables like `Expr::Variable("e.dept")`.
4056    fn transform_window_expr_properties(expr: Expr) -> Expr {
4057        let Expr::FunctionCall {
4058            name,
4059            args,
4060            window_spec: Some(spec),
4061            distinct,
4062        } = expr
4063        else {
4064            return expr;
4065        };
4066
4067        // Transform arguments for ALL window functions
4068        // Both manual (ROW_NUMBER, etc.) and aggregate (SUM, AVG, etc.) need this
4069        let transformed_args = args
4070            .into_iter()
4071            .map(Self::transform_property_to_variable)
4072            .collect();
4073
4074        // CRITICAL: ALL window functions (manual and aggregate) need partition_by/order_by transformed
4075        let transformed_partition_by = spec
4076            .partition_by
4077            .into_iter()
4078            .map(Self::transform_property_to_variable)
4079            .collect();
4080
4081        let transformed_order_by = spec
4082            .order_by
4083            .into_iter()
4084            .map(|item| SortItem {
4085                expr: Self::transform_property_to_variable(item.expr),
4086                ascending: item.ascending,
4087            })
4088            .collect();
4089
4090        Expr::FunctionCall {
4091            name,
4092            args: transformed_args,
4093            window_spec: Some(WindowSpec {
4094                partition_by: transformed_partition_by,
4095                order_by: transformed_order_by,
4096            }),
4097            distinct,
4098        }
4099    }
4100
4101    /// Transform a property expression to a variable expression with qualified name.
4102    ///
4103    /// `Expr::Property(Expr::Variable("e"), "dept")` becomes `Expr::Variable("e.dept")`
4104    fn transform_property_to_variable(expr: Expr) -> Expr {
4105        let Expr::Property(base, prop) = expr else {
4106            return expr;
4107        };
4108
4109        match *base {
4110            Expr::Variable(var) => Expr::Variable(format!("{}.{}", var, prop)),
4111            other => Expr::Property(Box::new(Self::transform_property_to_variable(other)), prop),
4112        }
4113    }
4114
4115    /// Transform VALID_AT macro into function call
4116    ///
4117    /// `e VALID_AT timestamp` becomes `uni.temporal.validAt(e, 'valid_from', 'valid_to', timestamp)`
4118    /// `e VALID_AT(timestamp, 'start', 'end')` becomes `uni.temporal.validAt(e, 'start', 'end', timestamp)`
4119    fn transform_valid_at_to_function(expr: Expr) -> Expr {
4120        match expr {
4121            Expr::ValidAt {
4122                entity,
4123                timestamp,
4124                start_prop,
4125                end_prop,
4126            } => {
4127                let start = start_prop.unwrap_or_else(|| "valid_from".to_string());
4128                let end = end_prop.unwrap_or_else(|| "valid_to".to_string());
4129
4130                Expr::FunctionCall {
4131                    name: "uni.temporal.validAt".to_string(),
4132                    args: vec![
4133                        Self::transform_valid_at_to_function(*entity),
4134                        Expr::Literal(CypherLiteral::String(start)),
4135                        Expr::Literal(CypherLiteral::String(end)),
4136                        Self::transform_valid_at_to_function(*timestamp),
4137                    ],
4138                    distinct: false,
4139                    window_spec: None,
4140                }
4141            }
4142            // Recursively transform nested expressions
4143            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4144                left: Box::new(Self::transform_valid_at_to_function(*left)),
4145                op,
4146                right: Box::new(Self::transform_valid_at_to_function(*right)),
4147            },
4148            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
4149                op,
4150                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4151            },
4152            Expr::FunctionCall {
4153                name,
4154                args,
4155                distinct,
4156                window_spec,
4157            } => Expr::FunctionCall {
4158                name,
4159                args: args
4160                    .into_iter()
4161                    .map(Self::transform_valid_at_to_function)
4162                    .collect(),
4163                distinct,
4164                window_spec,
4165            },
4166            Expr::Property(base, prop) => {
4167                Expr::Property(Box::new(Self::transform_valid_at_to_function(*base)), prop)
4168            }
4169            Expr::List(items) => Expr::List(
4170                items
4171                    .into_iter()
4172                    .map(Self::transform_valid_at_to_function)
4173                    .collect(),
4174            ),
4175            Expr::In { expr, list } => Expr::In {
4176                expr: Box::new(Self::transform_valid_at_to_function(*expr)),
4177                list: Box::new(Self::transform_valid_at_to_function(*list)),
4178            },
4179            Expr::IsNull(e) => Expr::IsNull(Box::new(Self::transform_valid_at_to_function(*e))),
4180            Expr::IsNotNull(e) => {
4181                Expr::IsNotNull(Box::new(Self::transform_valid_at_to_function(*e)))
4182            }
4183            Expr::IsUnique(e) => Expr::IsUnique(Box::new(Self::transform_valid_at_to_function(*e))),
4184            // Other cases: return as-is
4185            other => other,
4186        }
4187    }
4188
4189    /// Rewrite system-metadata function calls (`id(v)`, `created_at(v)`,
4190    /// `updated_at(v)`) to direct property access on the corresponding
4191    /// internal column (`v._vid`, `v._created_at`, `v._updated_at`). This
4192    /// normalization enables predicate pushdown via the Property pattern
4193    /// recognized by `PredicateAnalyzer`.
4194    ///
4195    /// All three functions share the same shape: single-arg, argument
4196    /// must be a node/edge variable, returns the column value directly.
4197    fn rewrite_id_to_vid(expr: Expr, vars_in_scope: &[VariableInfo]) -> Expr {
4198        match expr {
4199            Expr::FunctionCall {
4200                name,
4201                args,
4202                distinct,
4203                window_spec,
4204            } if args.len() == 1 && Self::metadata_function_column(&name, None).is_some() => {
4205                if let Expr::Variable(ref var) = args[0] {
4206                    // `id()` resolves to `_eid` for an edge binding and `_vid`
4207                    // for a node — edge rows expose `_eid`, not `_vid`. Mirror
4208                    // the projection path (`df_expr.rs` translate of `id`).
4209                    let var_type = find_var_in_scope(vars_in_scope, var).map(|v| v.var_type);
4210                    let column = Self::metadata_function_column(&name, var_type)
4211                        .unwrap()
4212                        .to_string();
4213                    Expr::Property(Box::new(Expr::Variable(var.clone())), column)
4214                } else {
4215                    Expr::FunctionCall {
4216                        name,
4217                        args,
4218                        distinct,
4219                        window_spec,
4220                    }
4221                }
4222            }
4223            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
4224                left: Box::new(Self::rewrite_id_to_vid(*left, vars_in_scope)),
4225                op,
4226                right: Box::new(Self::rewrite_id_to_vid(*right, vars_in_scope)),
4227            },
4228            Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
4229                op,
4230                expr: Box::new(Self::rewrite_id_to_vid(*inner, vars_in_scope)),
4231            },
4232            other => other,
4233        }
4234    }
4235
4236    /// Return the internal column name for a system-metadata function, or
4237    /// `None` if the name is not one of the recognised metadata functions.
4238    ///
4239    /// `id()` maps to `_eid` when its argument is a relationship
4240    /// (`VariableType::Edge`) and `_vid` otherwise; `var_type` is `None` when the
4241    /// caller only needs the is-metadata-function test.
4242    fn metadata_function_column(
4243        name: &str,
4244        var_type: Option<VariableType>,
4245    ) -> Option<&'static str> {
4246        if name.eq_ignore_ascii_case("id") {
4247            if matches!(var_type, Some(VariableType::Edge)) {
4248                Some("_eid")
4249            } else {
4250                Some("_vid")
4251            }
4252        } else if name.eq_ignore_ascii_case("created_at") {
4253            Some("_created_at")
4254        } else if name.eq_ignore_ascii_case("updated_at") {
4255            Some("_updated_at")
4256        } else {
4257            None
4258        }
4259    }
4260
4261    /// Plan a MATCH clause, handling both shortestPath and regular patterns.
4262    fn plan_match_clause(
4263        &self,
4264        match_clause: &MatchClause,
4265        plan: LogicalPlan,
4266        vars_in_scope: &mut Vec<VariableInfo>,
4267    ) -> Result<LogicalPlan> {
4268        let mut plan = plan;
4269
4270        if match_clause.pattern.paths.is_empty() {
4271            return Err(anyhow!("Empty pattern"));
4272        }
4273
4274        // Track variables introduced by this OPTIONAL MATCH
4275        let vars_before_pattern = vars_in_scope.len();
4276
4277        for path in &match_clause.pattern.paths {
4278            if let Some(mode) = &path.shortest_path_mode {
4279                plan =
4280                    self.plan_shortest_path(path, plan, vars_in_scope, mode, vars_before_pattern)?;
4281            } else {
4282                plan = self.plan_path(
4283                    path,
4284                    plan,
4285                    vars_in_scope,
4286                    match_clause.optional,
4287                    vars_before_pattern,
4288                )?;
4289            }
4290        }
4291
4292        // Collect variables introduced by this OPTIONAL MATCH pattern
4293        let optional_vars: HashSet<String> = if match_clause.optional {
4294            vars_in_scope[vars_before_pattern..]
4295                .iter()
4296                .map(|v| v.name.clone())
4297                .collect()
4298        } else {
4299            HashSet::new()
4300        };
4301
4302        // Handle WHERE clause with vector_similarity and predicate pushdown
4303        if let Some(predicate) = &match_clause.where_clause {
4304            plan = self.plan_where_clause(predicate, plan, vars_in_scope, optional_vars)?;
4305        }
4306
4307        Ok(plan)
4308    }
4309
4310    /// Plan a shortestPath pattern.
4311    fn plan_shortest_path(
4312        &self,
4313        path: &PathPattern,
4314        plan: LogicalPlan,
4315        vars_in_scope: &mut Vec<VariableInfo>,
4316        mode: &ShortestPathMode,
4317        _vars_before_pattern: usize,
4318    ) -> Result<LogicalPlan> {
4319        let mut plan = plan;
4320        let elements = &path.elements;
4321
4322        // Pattern must be: node-rel-node-rel-...-node (odd number of elements >= 3)
4323        if elements.len() < 3 || elements.len().is_multiple_of(2) {
4324            return Err(anyhow!(
4325                "shortestPath requires at least one relationship: (a)-[*]->(b)"
4326            ));
4327        }
4328
4329        let source_node = match &elements[0] {
4330            PatternElement::Node(n) => n,
4331            _ => return Err(anyhow!("ShortestPath must start with a node")),
4332        };
4333        let rel = match &elements[1] {
4334            PatternElement::Relationship(r) => r,
4335            _ => {
4336                return Err(anyhow!(
4337                    "ShortestPath middle element must be a relationship"
4338                ));
4339            }
4340        };
4341        let target_node = match &elements[2] {
4342            PatternElement::Node(n) => n,
4343            _ => return Err(anyhow!("ShortestPath must end with a node")),
4344        };
4345
4346        let source_var = source_node
4347            .variable
4348            .clone()
4349            .ok_or_else(|| anyhow!("Source node must have variable in shortestPath"))?;
4350        let target_var = target_node
4351            .variable
4352            .clone()
4353            .ok_or_else(|| anyhow!("Target node must have variable in shortestPath"))?;
4354        let path_var = path
4355            .variable
4356            .clone()
4357            .ok_or_else(|| anyhow!("shortestPath must be assigned to a variable"))?;
4358
4359        let source_bound = is_var_in_scope(vars_in_scope, &source_var);
4360        let target_bound = is_var_in_scope(vars_in_scope, &target_var);
4361
4362        // Plan source node if not bound
4363        if !source_bound {
4364            plan = self.plan_unbound_node(source_node, &source_var, plan, false)?;
4365        } else if let Some(prop_filter) =
4366            self.properties_to_expr(&source_var, &source_node.properties)
4367        {
4368            plan = LogicalPlan::Filter {
4369                input: Box::new(plan),
4370                predicate: prop_filter,
4371                optional_variables: HashSet::new(),
4372            };
4373        }
4374
4375        // Plan target node if not bound
4376        let target_label_id = if !target_bound {
4377            // Use first label for target_label_id
4378            let target_label_name = target_node
4379                .labels
4380                .first()
4381                .ok_or_else(|| anyhow!("Target node must have label if not already bound"))?;
4382            // Native lookup first; then consult `CatalogProvider` /
4383            // `ReplacementScanProvider` and allocate a virtual label-id
4384            // (M5b follow-up #6). Virtual ids dispatch to
4385            // `CatalogVertexScanExec` at physical-plan time.
4386            let target_label_id =
4387                if let Some(meta) = self.schema.get_label_case_insensitive(target_label_name) {
4388                    meta.id
4389                } else if let Some((vid, _)) = self.allocate_virtual_label(target_label_name)? {
4390                    vid
4391                } else {
4392                    return Err(anyhow!("Label {} not found", target_label_name));
4393                };
4394
4395            let target_scan = LogicalPlan::Scan {
4396                label_id: target_label_id,
4397                labels: target_node.labels.names().to_vec(),
4398                variable: target_var.clone(),
4399                filter: self.properties_to_expr(&target_var, &target_node.properties),
4400                optional: false,
4401            };
4402
4403            plan = Self::join_with_plan(plan, target_scan);
4404            target_label_id
4405        } else {
4406            if let Some(prop_filter) = self.properties_to_expr(&target_var, &target_node.properties)
4407            {
4408                plan = LogicalPlan::Filter {
4409                    input: Box::new(plan),
4410                    predicate: prop_filter,
4411                    optional_variables: HashSet::new(),
4412                };
4413            }
4414            0 // Wildcard for already-bound target
4415        };
4416
4417        // Add ShortestPath operator
4418        let edge_type_ids = if rel.types.is_empty() {
4419            // If no type specified, fetch all edge types (both schema and schemaless)
4420            self.schema.all_edge_type_ids()
4421        } else {
4422            let mut ids = Vec::new();
4423            for type_name in &rel.types {
4424                let id = if let Some(meta) = self.schema.edge_types.get(type_name) {
4425                    meta.id
4426                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
4427                    vid
4428                } else {
4429                    return Err(anyhow!("Edge type {} not found", type_name));
4430                };
4431                ids.push(id);
4432            }
4433            ids
4434        };
4435
4436        // Extract hop constraints from relationship pattern
4437        let min_hops = rel.range.as_ref().and_then(|r| r.min).unwrap_or(1);
4438        let max_hops = rel.range.as_ref().and_then(|r| r.max).unwrap_or(u32::MAX);
4439
4440        let sp_plan = match mode {
4441            ShortestPathMode::Shortest => LogicalPlan::ShortestPath {
4442                input: Box::new(plan),
4443                edge_type_ids,
4444                direction: rel.direction.clone(),
4445                source_variable: source_var.clone(),
4446                target_variable: target_var.clone(),
4447                target_label_id,
4448                path_variable: path_var.clone(),
4449                min_hops,
4450                max_hops,
4451            },
4452            ShortestPathMode::AllShortest => LogicalPlan::AllShortestPaths {
4453                input: Box::new(plan),
4454                edge_type_ids,
4455                direction: rel.direction.clone(),
4456                source_variable: source_var.clone(),
4457                target_variable: target_var.clone(),
4458                target_label_id,
4459                path_variable: path_var.clone(),
4460                min_hops,
4461                max_hops,
4462            },
4463        };
4464
4465        if !source_bound {
4466            add_var_to_scope(vars_in_scope, &source_var, VariableType::Node)?;
4467        }
4468        if !target_bound {
4469            add_var_to_scope(vars_in_scope, &target_var, VariableType::Node)?;
4470        }
4471        add_var_to_scope(vars_in_scope, &path_var, VariableType::Path)?;
4472
4473        Ok(sp_plan)
4474    }
4475    /// Plan a MATCH pattern into a LogicalPlan (Scan → Traverse chains).
4476    ///
4477    /// This is a public entry point for the Locy plan builder to reuse the
4478    /// existing pattern-planning logic for clause bodies.
4479    pub fn plan_pattern(
4480        &self,
4481        pattern: &Pattern,
4482        initial_vars: &[VariableInfo],
4483    ) -> Result<LogicalPlan> {
4484        let mut vars_in_scope: Vec<VariableInfo> = initial_vars.to_vec();
4485        let vars_before_pattern = vars_in_scope.len();
4486        let mut plan = LogicalPlan::Empty;
4487        for path in &pattern.paths {
4488            plan = self.plan_path(path, plan, &mut vars_in_scope, false, vars_before_pattern)?;
4489        }
4490        Ok(plan)
4491    }
4492
4493    /// Plan a regular MATCH path (not shortestPath).
4494    fn plan_path(
4495        &self,
4496        path: &PathPattern,
4497        plan: LogicalPlan,
4498        vars_in_scope: &mut Vec<VariableInfo>,
4499        optional: bool,
4500        vars_before_pattern: usize,
4501    ) -> Result<LogicalPlan> {
4502        let mut plan = plan;
4503        let elements = &path.elements;
4504        let mut i = 0;
4505
4506        let path_variable = path.variable.clone();
4507
4508        // Check for VariableAlreadyBound: path variable already in scope
4509        if let Some(pv) = &path_variable
4510            && !pv.is_empty()
4511            && is_var_in_scope(vars_in_scope, pv)
4512        {
4513            return Err(anyhow!(
4514                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4515                pv
4516            ));
4517        }
4518
4519        // Check for VariableAlreadyBound: path variable conflicts with element variables
4520        if let Some(pv) = &path_variable
4521            && !pv.is_empty()
4522        {
4523            for element in elements {
4524                match element {
4525                    PatternElement::Node(n) => {
4526                        if let Some(v) = &n.variable
4527                            && v == pv
4528                        {
4529                            return Err(anyhow!(
4530                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4531                                pv
4532                            ));
4533                        }
4534                    }
4535                    PatternElement::Relationship(r) => {
4536                        if let Some(v) = &r.variable
4537                            && v == pv
4538                        {
4539                            return Err(anyhow!(
4540                                "SyntaxError: VariableAlreadyBound - Variable '{}' already defined",
4541                                pv
4542                            ));
4543                        }
4544                    }
4545                    PatternElement::Parenthesized { .. } => {}
4546                }
4547            }
4548        }
4549
4550        // For OPTIONAL MATCH, extract all variables from this pattern upfront.
4551        // When any hop fails in a multi-hop pattern, ALL these variables should be NULL.
4552        let mut optional_pattern_vars: HashSet<String> = if optional {
4553            let mut vars = HashSet::new();
4554            for element in elements {
4555                match element {
4556                    PatternElement::Node(n) => {
4557                        if let Some(v) = &n.variable
4558                            && !v.is_empty()
4559                            && !is_var_in_scope(vars_in_scope, v)
4560                        {
4561                            vars.insert(v.clone());
4562                        }
4563                    }
4564                    PatternElement::Relationship(r) => {
4565                        if let Some(v) = &r.variable
4566                            && !v.is_empty()
4567                            && !is_var_in_scope(vars_in_scope, v)
4568                        {
4569                            vars.insert(v.clone());
4570                        }
4571                    }
4572                    PatternElement::Parenthesized { pattern, .. } => {
4573                        // Also check nested patterns
4574                        for nested_elem in &pattern.elements {
4575                            match nested_elem {
4576                                PatternElement::Node(n) => {
4577                                    if let Some(v) = &n.variable
4578                                        && !v.is_empty()
4579                                        && !is_var_in_scope(vars_in_scope, v)
4580                                    {
4581                                        vars.insert(v.clone());
4582                                    }
4583                                }
4584                                PatternElement::Relationship(r) => {
4585                                    if let Some(v) = &r.variable
4586                                        && !v.is_empty()
4587                                        && !is_var_in_scope(vars_in_scope, v)
4588                                    {
4589                                        vars.insert(v.clone());
4590                                    }
4591                                }
4592                                _ => {}
4593                            }
4594                        }
4595                    }
4596                }
4597            }
4598            // Include path variable if present
4599            if let Some(pv) = &path_variable
4600                && !pv.is_empty()
4601            {
4602                vars.insert(pv.clone());
4603            }
4604            vars
4605        } else {
4606            HashSet::new()
4607        };
4608
4609        // Pre-scan path elements for bound edge variables from previous MATCH clauses.
4610        // These must participate in Trail mode (relationship uniqueness) enforcement
4611        // across ALL segments in this path, so that VLP segments like [*0..1] don't
4612        // traverse through edges already claimed by a bound relationship [r].
4613        let path_bound_edge_vars: HashSet<String> = {
4614            let mut bound = HashSet::new();
4615            for element in elements {
4616                if let PatternElement::Relationship(rel) = element
4617                    && let Some(ref var_name) = rel.variable
4618                    && !var_name.is_empty()
4619                    && vars_in_scope[..vars_before_pattern]
4620                        .iter()
4621                        .any(|v| v.name == *var_name)
4622                {
4623                    bound.insert(var_name.clone());
4624                }
4625            }
4626            bound
4627        };
4628
4629        // Track if any traverses were added (for zero-length path detection)
4630        let mut had_traverses = false;
4631        // Track the node variable for zero-length path binding
4632        let mut single_node_variable: Option<String> = None;
4633        // Collect node/edge variables for BindPath (fixed-length path binding)
4634        let mut path_node_vars: Vec<String> = Vec::new();
4635        let mut path_edge_vars: Vec<String> = Vec::new();
4636        // Track the last processed outer node variable for QPP source binding.
4637        // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is `a`, not `x`.
4638        let mut last_outer_node_var: Option<String> = None;
4639
4640        // Multi-hop path variables are now supported - path is accumulated across hops
4641        while i < elements.len() {
4642            let element = &elements[i];
4643            match element {
4644                PatternElement::Node(n) => {
4645                    let mut variable = n.variable.clone().unwrap_or_default();
4646                    if variable.is_empty() {
4647                        variable = self.next_anon_var();
4648                    }
4649                    // Track first node variable for zero-length path
4650                    if single_node_variable.is_none() {
4651                        single_node_variable = Some(variable.clone());
4652                    }
4653                    let is_bound =
4654                        !variable.is_empty() && is_var_in_scope(vars_in_scope, &variable);
4655                    if optional && !is_bound {
4656                        optional_pattern_vars.insert(variable.clone());
4657                    }
4658
4659                    if is_bound {
4660                        // Check for type conflict - can't use an Edge/Path as a Node
4661                        if let Some(info) = find_var_in_scope(vars_in_scope, &variable)
4662                            && !info.var_type.is_compatible_with(VariableType::Node)
4663                        {
4664                            return Err(anyhow!(
4665                                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
4666                                variable,
4667                                info.var_type
4668                            ));
4669                        }
4670                        if let Some(node_filter) =
4671                            self.node_filter_expr(&variable, &n.labels, &n.properties)
4672                        {
4673                            plan = LogicalPlan::Filter {
4674                                input: Box::new(plan),
4675                                predicate: node_filter,
4676                                optional_variables: HashSet::new(),
4677                            };
4678                        }
4679                    } else {
4680                        plan = self.plan_unbound_node(n, &variable, plan, optional)?;
4681                        if !variable.is_empty() {
4682                            add_var_to_scope(vars_in_scope, &variable, VariableType::Node)?;
4683                        }
4684                    }
4685
4686                    // Track source node for BindPath
4687                    if path_variable.is_some() && path_node_vars.is_empty() {
4688                        path_node_vars.push(variable.clone());
4689                    }
4690
4691                    // Look ahead for relationships
4692                    let mut current_source_var = variable;
4693                    last_outer_node_var = Some(current_source_var.clone());
4694                    i += 1;
4695                    while i < elements.len() {
4696                        if let PatternElement::Relationship(r) = &elements[i] {
4697                            if i + 1 < elements.len() {
4698                                let target_node_part = &elements[i + 1];
4699                                if let PatternElement::Node(n_target) = target_node_part {
4700                                    // For VLP traversals, pass path_variable through
4701                                    // For fixed-length, we use BindPath instead
4702                                    let is_vlp = r.range.is_some();
4703                                    let traverse_path_var =
4704                                        if is_vlp { path_variable.clone() } else { None };
4705
4706                                    // If we're about to start a VLP segment and there are
4707                                    // collected fixed-hop path vars, create an intermediate
4708                                    // BindPath for the fixed prefix first. The VLP will then
4709                                    // extend this existing path.
4710                                    if is_vlp
4711                                        && let Some(pv) = path_variable.as_ref()
4712                                        && !path_node_vars.is_empty()
4713                                    {
4714                                        plan = LogicalPlan::BindPath {
4715                                            input: Box::new(plan),
4716                                            node_variables: std::mem::take(&mut path_node_vars),
4717                                            edge_variables: std::mem::take(&mut path_edge_vars),
4718                                            path_variable: pv.clone(),
4719                                        };
4720                                        if !is_var_in_scope(vars_in_scope, pv) {
4721                                            add_var_to_scope(
4722                                                vars_in_scope,
4723                                                pv,
4724                                                VariableType::Path,
4725                                            )?;
4726                                        }
4727                                    }
4728
4729                                    // Plan the traverse from the current source node
4730                                    let target_was_bound =
4731                                        n_target.variable.as_ref().is_some_and(|v| {
4732                                            !v.is_empty() && is_var_in_scope(vars_in_scope, v)
4733                                        });
4734                                    let (new_plan, target_var, effective_target) = self
4735                                        .plan_traverse_with_source(
4736                                            plan,
4737                                            vars_in_scope,
4738                                            TraverseParams {
4739                                                rel: r,
4740                                                target_node: n_target,
4741                                                optional,
4742                                                path_variable: traverse_path_var,
4743                                                optional_pattern_vars: optional_pattern_vars
4744                                                    .clone(),
4745                                            },
4746                                            &current_source_var,
4747                                            vars_before_pattern,
4748                                            &path_bound_edge_vars,
4749                                        )?;
4750                                    plan = new_plan;
4751                                    if optional && !target_was_bound {
4752                                        optional_pattern_vars.insert(target_var.clone());
4753                                    }
4754
4755                                    // Track edge/target node for BindPath
4756                                    if path_variable.is_some() && !is_vlp {
4757                                        // Use the edge variable if given, otherwise use
4758                                        // the internal tracking column pattern.
4759                                        // Use effective_target (which may be __rebound_x
4760                                        // for bound-target traversals) to match the actual
4761                                        // column name produced by GraphTraverseExec.
4762                                        if let Some(ev) = &r.variable {
4763                                            path_edge_vars.push(ev.clone());
4764                                        } else {
4765                                            path_edge_vars
4766                                                .push(format!("__eid_to_{}", effective_target));
4767                                        }
4768                                        path_node_vars.push(target_var.clone());
4769                                    }
4770
4771                                    current_source_var = target_var;
4772                                    last_outer_node_var = Some(current_source_var.clone());
4773                                    had_traverses = true;
4774                                    i += 2;
4775                                } else {
4776                                    return Err(anyhow!("Relationship must be followed by a node"));
4777                                }
4778                            } else {
4779                                return Err(anyhow!("Relationship cannot be the last element"));
4780                            }
4781                        } else {
4782                            break;
4783                        }
4784                    }
4785                }
4786                PatternElement::Relationship(_) => {
4787                    return Err(anyhow!("Pattern must start with a node"));
4788                }
4789                PatternElement::Parenthesized { pattern, range } => {
4790                    // Quantified pattern: ((a)-[:REL]->(b)){n,m}
4791                    // Validate: odd number of elements (node-rel-node[-rel-node]*)
4792                    if pattern.elements.len() < 3 || pattern.elements.len() % 2 == 0 {
4793                        return Err(anyhow!(
4794                            "Quantified pattern must have node-relationship-node structure (odd number >= 3 elements)"
4795                        ));
4796                    }
4797
4798                    let source_node = match &pattern.elements[0] {
4799                        PatternElement::Node(n) => n,
4800                        _ => return Err(anyhow!("Quantified pattern must start with a node")),
4801                    };
4802
4803                    // Extract all relationship-node pairs (QPP steps)
4804                    let mut qpp_rels: Vec<(&RelationshipPattern, &NodePattern)> = Vec::new();
4805                    for pair_idx in (1..pattern.elements.len()).step_by(2) {
4806                        let rel = match &pattern.elements[pair_idx] {
4807                            PatternElement::Relationship(r) => r,
4808                            _ => {
4809                                return Err(anyhow!(
4810                                    "Quantified pattern element at position {} must be a relationship",
4811                                    pair_idx
4812                                ));
4813                            }
4814                        };
4815                        let node = match &pattern.elements[pair_idx + 1] {
4816                            PatternElement::Node(n) => n,
4817                            _ => {
4818                                return Err(anyhow!(
4819                                    "Quantified pattern element at position {} must be a node",
4820                                    pair_idx + 1
4821                                ));
4822                            }
4823                        };
4824                        // Reject nested quantifiers
4825                        if rel.range.is_some() {
4826                            return Err(anyhow!(
4827                                "Nested quantifiers not supported: ((a)-[:REL*n]->(b)){{m}}"
4828                            ));
4829                        }
4830                        qpp_rels.push((rel, node));
4831                    }
4832
4833                    // Check if there's an outer target node after the Parenthesized element.
4834                    // In syntax like `(a)((x)-[:LINK]->(y)){2,4}(b)`, the `(b)` is the outer
4835                    // target that should receive the traversal result.
4836                    let inner_target_node = qpp_rels.last().unwrap().1;
4837                    let outer_target_node = if i + 1 < elements.len() {
4838                        match &elements[i + 1] {
4839                            PatternElement::Node(n) => Some(n),
4840                            _ => None,
4841                        }
4842                    } else {
4843                        None
4844                    };
4845                    // Use the outer target for variable binding and filters; inner target
4846                    // labels are used for state constraints within the NFA.
4847                    let target_node = outer_target_node.unwrap_or(inner_target_node);
4848
4849                    // For simple 3-element single-hop QPP without intermediate label constraints,
4850                    // fall back to existing VLP behavior (copy range to relationship).
4851                    let use_simple_vlp = qpp_rels.len() == 1
4852                        && inner_target_node
4853                            .labels
4854                            .first()
4855                            .and_then(|l| self.schema.get_label_case_insensitive(l))
4856                            .is_none();
4857
4858                    // Plan source node.
4859                    // In `(a)((x)-[:R]->(y)){n}(b)`, the QPP source is the preceding
4860                    // outer node `a`, NOT the inner `x`. If there's a preceding outer
4861                    // node variable, use it; otherwise fall back to the inner source.
4862                    let source_variable = if let Some(ref outer_src) = last_outer_node_var {
4863                        // The preceding outer node is already bound and in scope
4864                        // Apply any property filters from the inner source node
4865                        if let Some(prop_filter) =
4866                            self.properties_to_expr(outer_src, &source_node.properties)
4867                        {
4868                            plan = LogicalPlan::Filter {
4869                                input: Box::new(plan),
4870                                predicate: prop_filter,
4871                                optional_variables: HashSet::new(),
4872                            };
4873                        }
4874                        outer_src.clone()
4875                    } else {
4876                        let sv = source_node
4877                            .variable
4878                            .clone()
4879                            .filter(|v| !v.is_empty())
4880                            .unwrap_or_else(|| self.next_anon_var());
4881
4882                        if is_var_in_scope(vars_in_scope, &sv) {
4883                            // Source is already bound, apply property filter if needed
4884                            if let Some(prop_filter) =
4885                                self.properties_to_expr(&sv, &source_node.properties)
4886                            {
4887                                plan = LogicalPlan::Filter {
4888                                    input: Box::new(plan),
4889                                    predicate: prop_filter,
4890                                    optional_variables: HashSet::new(),
4891                                };
4892                            }
4893                        } else {
4894                            // Source is unbound, scan it
4895                            plan = self.plan_unbound_node(source_node, &sv, plan, optional)?;
4896                            add_var_to_scope(vars_in_scope, &sv, VariableType::Node)?;
4897                            if optional {
4898                                optional_pattern_vars.insert(sv.clone());
4899                            }
4900                        }
4901                        sv
4902                    };
4903
4904                    if use_simple_vlp {
4905                        // Simple single-hop QPP: apply range to relationship and use VLP path
4906                        let mut relationship = qpp_rels[0].0.clone();
4907                        relationship.range = range.clone();
4908
4909                        let target_was_bound = target_node
4910                            .variable
4911                            .as_ref()
4912                            .is_some_and(|v| !v.is_empty() && is_var_in_scope(vars_in_scope, v));
4913                        let (new_plan, target_var, _effective_target) = self
4914                            .plan_traverse_with_source(
4915                                plan,
4916                                vars_in_scope,
4917                                TraverseParams {
4918                                    rel: &relationship,
4919                                    target_node,
4920                                    optional,
4921                                    path_variable: path_variable.clone(),
4922                                    optional_pattern_vars: optional_pattern_vars.clone(),
4923                                },
4924                                &source_variable,
4925                                vars_before_pattern,
4926                                &path_bound_edge_vars,
4927                            )?;
4928                        plan = new_plan;
4929                        if optional && !target_was_bound {
4930                            optional_pattern_vars.insert(target_var);
4931                        }
4932                    } else {
4933                        // Multi-hop QPP: build QppStepInfo list and create Traverse with qpp_steps
4934                        let mut qpp_step_infos = Vec::new();
4935                        let mut all_edge_type_ids = Vec::new();
4936
4937                        for (rel, node) in &qpp_rels {
4938                            let mut step_edge_type_ids = Vec::new();
4939                            if rel.types.is_empty() {
4940                                step_edge_type_ids = self.schema.all_edge_type_ids();
4941                            } else {
4942                                for type_name in &rel.types {
4943                                    if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
4944                                        step_edge_type_ids.push(edge_meta.id);
4945                                    }
4946                                }
4947                            }
4948                            all_edge_type_ids.extend_from_slice(&step_edge_type_ids);
4949
4950                            let target_label = node.labels.first().and_then(|l| {
4951                                self.schema.get_label_case_insensitive(l).map(|_| l.clone())
4952                            });
4953
4954                            qpp_step_infos.push(QppStepInfo {
4955                                edge_type_ids: step_edge_type_ids,
4956                                direction: rel.direction.clone(),
4957                                target_label,
4958                            });
4959                        }
4960
4961                        // Deduplicate edge type IDs for adjacency warming
4962                        all_edge_type_ids.sort_unstable();
4963                        all_edge_type_ids.dedup();
4964
4965                        // Compute iteration bounds from range
4966                        let hops_per_iter = qpp_step_infos.len();
4967                        const QPP_DEFAULT_MAX_HOPS: usize = 100;
4968                        let (min_iter, max_iter) = if let Some(range) = range {
4969                            let min = range.min.unwrap_or(1) as usize;
4970                            let max = range
4971                                .max
4972                                .map(|m| m as usize)
4973                                .unwrap_or(QPP_DEFAULT_MAX_HOPS / hops_per_iter);
4974                            (min, max)
4975                        } else {
4976                            (1, 1)
4977                        };
4978                        let min_hops = min_iter * hops_per_iter;
4979                        let max_hops = max_iter * hops_per_iter;
4980
4981                        // Target variable from the last node in the QPP sub-pattern
4982                        let target_variable = target_node
4983                            .variable
4984                            .clone()
4985                            .filter(|v| !v.is_empty())
4986                            .unwrap_or_else(|| self.next_anon_var());
4987
4988                        let target_is_bound = is_var_in_scope(vars_in_scope, &target_variable);
4989
4990                        // Determine target label for the final node
4991                        let target_label_meta = target_node
4992                            .labels
4993                            .first()
4994                            .and_then(|l| self.schema.get_label_case_insensitive(l));
4995
4996                        // Collect scope match variables
4997                        let mut scope_match_variables: HashSet<String> = vars_in_scope
4998                            [vars_before_pattern..]
4999                            .iter()
5000                            .map(|v| v.name.clone())
5001                            .collect();
5002                        scope_match_variables.insert(target_variable.clone());
5003
5004                        // Handle bound target: use rebound variable for traverse
5005                        let rebound_target_var = if target_is_bound {
5006                            Some(target_variable.clone())
5007                        } else {
5008                            None
5009                        };
5010                        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5011                            format!("__rebound_{}", bv)
5012                        } else {
5013                            target_variable.clone()
5014                        };
5015
5016                        plan = LogicalPlan::Traverse {
5017                            input: Box::new(plan),
5018                            edge_type_ids: all_edge_type_ids,
5019                            direction: qpp_rels[0].0.direction.clone(),
5020                            source_variable: source_variable.to_string(),
5021                            target_variable: effective_target_var.clone(),
5022                            target_label_id: target_label_meta.map(|m| m.id).unwrap_or(0),
5023                            step_variable: None, // QPP doesn't expose intermediate edges
5024                            min_hops,
5025                            max_hops,
5026                            optional,
5027                            target_filter: self.node_filter_expr(
5028                                &target_variable,
5029                                &target_node.labels,
5030                                &target_node.properties,
5031                            ),
5032                            path_variable: path_variable.clone(),
5033                            edge_properties: HashSet::new(),
5034                            is_variable_length: true,
5035                            optional_pattern_vars: optional_pattern_vars.clone(),
5036                            scope_match_variables,
5037                            edge_filter_expr: None,
5038                            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5039                            qpp_steps: Some(qpp_step_infos),
5040                        };
5041
5042                        // Handle bound target: filter rebound results against original variable
5043                        if let Some(ref btv) = rebound_target_var {
5044                            // Filter: __rebound_x._vid = x._vid
5045                            let filter_pred = Expr::BinaryOp {
5046                                left: Box::new(Expr::Property(
5047                                    Box::new(Expr::Variable(effective_target_var.clone())),
5048                                    "_vid".to_string(),
5049                                )),
5050                                op: BinaryOp::Eq,
5051                                right: Box::new(Expr::Property(
5052                                    Box::new(Expr::Variable(btv.clone())),
5053                                    "_vid".to_string(),
5054                                )),
5055                            };
5056                            plan = LogicalPlan::Filter {
5057                                input: Box::new(plan),
5058                                predicate: filter_pred,
5059                                optional_variables: if optional {
5060                                    optional_pattern_vars.clone()
5061                                } else {
5062                                    HashSet::new()
5063                                },
5064                            };
5065                        }
5066
5067                        // Add target variable to scope
5068                        if !target_is_bound {
5069                            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5070                        }
5071
5072                        // Add path variable to scope
5073                        if let Some(ref pv) = path_variable
5074                            && !pv.is_empty()
5075                            && !is_var_in_scope(vars_in_scope, pv)
5076                        {
5077                            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5078                        }
5079                    }
5080                    had_traverses = true;
5081
5082                    // Skip the outer target node if we consumed it
5083                    if outer_target_node.is_some() {
5084                        i += 2; // skip both Parenthesized and the following Node
5085                    } else {
5086                        i += 1;
5087                    }
5088                }
5089            }
5090        }
5091
5092        // If this is a single-node pattern with a path variable, bind the zero-length path
5093        // E.g., `p = (a)` should create a Path with one node and zero edges
5094        if let Some(ref path_var) = path_variable
5095            && !path_var.is_empty()
5096            && !had_traverses
5097            && let Some(node_var) = single_node_variable
5098        {
5099            plan = LogicalPlan::BindZeroLengthPath {
5100                input: Box::new(plan),
5101                node_variable: node_var,
5102                path_variable: path_var.clone(),
5103            };
5104            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5105        }
5106
5107        // Bind fixed-length path from collected node/edge variables
5108        if let Some(ref path_var) = path_variable
5109            && !path_var.is_empty()
5110            && had_traverses
5111            && !path_node_vars.is_empty()
5112            && !is_var_in_scope(vars_in_scope, path_var)
5113        {
5114            plan = LogicalPlan::BindPath {
5115                input: Box::new(plan),
5116                node_variables: path_node_vars,
5117                edge_variables: path_edge_vars,
5118                path_variable: path_var.clone(),
5119            };
5120            add_var_to_scope(vars_in_scope, path_var, VariableType::Path)?;
5121        }
5122
5123        Ok(plan)
5124    }
5125
5126    /// Plan a traverse with an explicit source variable name.
5127    ///
5128    /// Returns `(plan, target_variable, effective_target_variable)` where:
5129    /// - `target_variable` is the semantic variable name for downstream scope
5130    /// - `effective_target_variable` is the actual column-name prefix used by
5131    ///   the traverse (may be `__rebound_x` for bound-target patterns)
5132    fn plan_traverse_with_source(
5133        &self,
5134        plan: LogicalPlan,
5135        vars_in_scope: &mut Vec<VariableInfo>,
5136        params: TraverseParams<'_>,
5137        source_variable: &str,
5138        vars_before_pattern: usize,
5139        path_bound_edge_vars: &HashSet<String>,
5140    ) -> Result<(LogicalPlan, String, String)> {
5141        // Check for parameter used as relationship predicate
5142        if let Some(Expr::Parameter(_)) = &params.rel.properties {
5143            return Err(anyhow!(
5144                "SyntaxError: InvalidParameterUse - Parameters cannot be used as relationship predicates"
5145            ));
5146        }
5147
5148        let mut edge_type_ids = Vec::new();
5149        let mut dst_labels = Vec::new();
5150        let mut unknown_types = Vec::new();
5151
5152        if params.rel.types.is_empty() {
5153            // All types - include both schema and schemaless edge types
5154            // This ensures MATCH (a)-[r]->(b) finds edges even when no schema is defined
5155            edge_type_ids = self.schema.all_edge_type_ids();
5156            for meta in self.schema.edge_types.values() {
5157                dst_labels.extend(meta.dst_labels.iter().cloned());
5158            }
5159        } else {
5160            for type_name in &params.rel.types {
5161                if let Some(edge_meta) = self.schema.edge_types.get(type_name) {
5162                    // Known type - use standard Traverse with type_id
5163                    edge_type_ids.push(edge_meta.id);
5164                    dst_labels.extend(edge_meta.dst_labels.iter().cloned());
5165                } else if let Some((vid, _)) = self.allocate_virtual_edge_type(type_name)? {
5166                    // M5b.3: virtual edge type (plugin-registered CatalogTable).
5167                    // Resolving it into `edge_type_ids` (not `unknown_types`)
5168                    // lets the regular `Traverse` planner build a structured
5169                    // plan that the physical planner can dispatch to a
5170                    // `CatalogEdgeScanExec` mid-pattern.
5171                    edge_type_ids.push(vid);
5172                } else {
5173                    // Unknown type - will use TraverseMainByType
5174                    unknown_types.push(type_name.clone());
5175                }
5176            }
5177        }
5178
5179        // Deduplicate edge type IDs and unknown types ([:T|:T] → [:T])
5180        edge_type_ids.sort_unstable();
5181        edge_type_ids.dedup();
5182        unknown_types.sort_unstable();
5183        unknown_types.dedup();
5184
5185        let mut target_variable = params.target_node.variable.clone().unwrap_or_default();
5186        if target_variable.is_empty() {
5187            target_variable = self.next_anon_var();
5188        }
5189        let target_is_bound =
5190            !target_variable.is_empty() && is_var_in_scope(vars_in_scope, &target_variable);
5191
5192        // Check for VariableTypeConflict: relationship variable used as node
5193        // e.g., ()-[r]-(r) where r is both the edge and a node endpoint
5194        if let Some(rel_var) = &params.rel.variable
5195            && !rel_var.is_empty()
5196            && rel_var == &target_variable
5197        {
5198            return Err(anyhow!(
5199                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as relationship, cannot use as node",
5200                rel_var
5201            ));
5202        }
5203
5204        // Check for VariableTypeConflict/RelationshipUniquenessViolation
5205        // e.g., (r)-[r]-() or r = ()-[]-(), ()-[r]-()
5206        // Also: (a)-[r]->()-[r]->(a) where r is reused as relationship in same pattern
5207        // BUT: MATCH (a)-[r]->() WITH r MATCH ()-[r]->() is ALLOWED (r is bound from previous clause)
5208        let mut bound_edge_var: Option<String> = None;
5209        let mut bound_edge_list_var: Option<String> = None;
5210        if let Some(rel_var) = &params.rel.variable
5211            && !rel_var.is_empty()
5212            && let Some(info) = find_var_in_scope(vars_in_scope, rel_var)
5213        {
5214            let is_from_previous_clause = vars_in_scope[..vars_before_pattern]
5215                .iter()
5216                .any(|v| v.name == *rel_var);
5217
5218            if info.var_type == VariableType::Edge {
5219                // Check if this edge variable comes from a previous clause (before this MATCH)
5220                if is_from_previous_clause {
5221                    // Edge variable bound from previous clause - this is allowed
5222                    // We'll filter the traversal to match this specific edge
5223                    bound_edge_var = Some(rel_var.clone());
5224                } else {
5225                    // Same relationship variable used twice in the same MATCH clause
5226                    return Err(anyhow!(
5227                        "SyntaxError: RelationshipUniquenessViolation - Relationship variable '{}' is already used in this pattern",
5228                        rel_var
5229                    ));
5230                }
5231            } else if params.rel.range.is_some()
5232                && is_from_previous_clause
5233                && matches!(
5234                    info.var_type,
5235                    VariableType::Scalar | VariableType::ScalarLiteral
5236                )
5237            {
5238                // Allow VLP rebound against a previously bound relationship list
5239                // (e.g. WITH [r1, r2] AS rs ... MATCH ()-[rs*]->()).
5240                bound_edge_list_var = Some(rel_var.clone());
5241            } else if !info.var_type.is_compatible_with(VariableType::Edge) {
5242                return Err(anyhow!(
5243                    "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as relationship",
5244                    rel_var,
5245                    info.var_type
5246                ));
5247            }
5248        }
5249
5250        // Check for VariableTypeConflict: target node variable already bound as non-Node
5251        // e.g., ()-[r]-()-[]-(r) where r was added as Edge, now used as target node
5252        if target_is_bound
5253            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5254            && !info.var_type.is_compatible_with(VariableType::Node)
5255        {
5256            return Err(anyhow!(
5257                "SyntaxError: VariableTypeConflict - Variable '{}' already defined as {:?}, cannot use as Node",
5258                target_variable,
5259                info.var_type
5260            ));
5261        }
5262
5263        // If all requested types are unknown (schemaless), use TraverseMainByType
5264        // This allows queries like MATCH (a)-[:UnknownType]->(b) to work
5265        // Also supports OR relationship types like MATCH (a)-[:KNOWS|HATES]->(b)
5266        if !unknown_types.is_empty() && edge_type_ids.is_empty() {
5267            // All types are unknown - use schemaless traversal
5268
5269            let is_variable_length = params.rel.range.is_some();
5270
5271            const DEFAULT_MAX_HOPS: usize = 100;
5272            let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5273                let min = range.min.unwrap_or(1) as usize;
5274                let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5275                (min, max)
5276            } else {
5277                (1, 1)
5278            };
5279
5280            // For both single-hop and variable-length paths:
5281            // - step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5282            //   Single-hop: step_var holds a single edge object
5283            //   VLP: step_var holds a list of edge objects
5284            // - path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5285            let step_var = params.rel.variable.clone();
5286            let path_var = params.path_variable.clone();
5287
5288            // Compute scope_match_variables for relationship uniqueness scoping.
5289            let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5290                .iter()
5291                .map(|v| v.name.clone())
5292                .collect();
5293            if let Some(ref sv) = step_var {
5294                // Only add the step variable to scope if it's NOT rebound from a previous clause.
5295                // Rebound edges (bound_edge_var is set) should not participate in uniqueness
5296                // filtering because the second MATCH intentionally reuses the same edge.
5297                if bound_edge_var.is_none() {
5298                    scope_match_variables.insert(sv.clone());
5299                }
5300            }
5301            scope_match_variables.insert(target_variable.clone());
5302            // Include bound edge variables from this path for cross-segment Trail mode
5303            // enforcement. This ensures VLP segments like [*0..1] don't traverse through
5304            // edges already claimed by a bound relationship [r] in the same path.
5305            // Exclude the CURRENT segment's bound edge: the schemaless path doesn't use
5306            // __rebound_ renaming, so the BFS must be free to match the bound edge itself.
5307            scope_match_variables.extend(
5308                path_bound_edge_vars
5309                    .iter()
5310                    .filter(|v| bound_edge_var.as_ref() != Some(*v))
5311                    .cloned(),
5312            );
5313
5314            let mut plan = LogicalPlan::TraverseMainByType {
5315                type_names: unknown_types,
5316                input: Box::new(plan),
5317                direction: params.rel.direction.clone(),
5318                source_variable: source_variable.to_string(),
5319                target_variable: target_variable.clone(),
5320                step_variable: step_var.clone(),
5321                min_hops,
5322                max_hops,
5323                optional: params.optional,
5324                target_filter: self.node_filter_expr(
5325                    &target_variable,
5326                    &params.target_node.labels,
5327                    &params.target_node.properties,
5328                ),
5329                path_variable: path_var.clone(),
5330                is_variable_length,
5331                optional_pattern_vars: params.optional_pattern_vars.clone(),
5332                scope_match_variables,
5333                edge_filter_expr: if is_variable_length {
5334                    let filter_var = step_var
5335                        .clone()
5336                        .unwrap_or_else(|| "__anon_edge".to_string());
5337                    self.properties_to_expr(&filter_var, &params.rel.properties)
5338                } else {
5339                    None
5340                },
5341                path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5342            };
5343
5344            // Only apply bound target filter for Imported variables (from outer scope/subquery).
5345            // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5346            // uses Parameter which requires the value to be in params (subquery context).
5347            if target_is_bound
5348                && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5349                && info.var_type == VariableType::Imported
5350            {
5351                plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5352            }
5353
5354            // Apply relationship property predicates for fixed-length schemaless
5355            // traversals (e.g., [r:KNOWS {name: 'monkey'}]).
5356            // For VLP, predicates are stored inline in edge_filter_expr (above).
5357            // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5358            if !is_variable_length
5359                && let Some(edge_var_name) = step_var.as_ref()
5360                && let Some(edge_prop_filter) =
5361                    self.properties_to_expr(edge_var_name, &params.rel.properties)
5362            {
5363                let filter_optional_vars = if params.optional {
5364                    params.optional_pattern_vars.clone()
5365                } else {
5366                    HashSet::new()
5367                };
5368                plan = LogicalPlan::Filter {
5369                    input: Box::new(plan),
5370                    predicate: edge_prop_filter,
5371                    optional_variables: filter_optional_vars,
5372                };
5373            }
5374
5375            // Add the bound variables to scope
5376            if let Some(sv) = &step_var {
5377                add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5378                if is_variable_length
5379                    && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5380                {
5381                    info.is_vlp = true;
5382                }
5383            }
5384            if let Some(pv) = &path_var
5385                && !is_var_in_scope(vars_in_scope, pv)
5386            {
5387                add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5388            }
5389            if !is_var_in_scope(vars_in_scope, &target_variable) {
5390                add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5391            }
5392
5393            return Ok((plan, target_variable.clone(), target_variable));
5394        }
5395
5396        // If we have a mix of known and unknown types, error for now
5397        // (could be extended to Union of Traverse + TraverseMainByType)
5398        if !unknown_types.is_empty() {
5399            return Err(anyhow!(
5400                "Mixed known and unknown edge types not yet supported. Unknown: {:?}",
5401                unknown_types
5402            ));
5403        }
5404
5405        // Resolve target label to either a schema id or a virtual id from the
5406        // plugin registry. Mid-pattern virtual-label dispatch (M5b.3) requires
5407        // the virtual id to flow into `Traverse.target_label_id` so the
5408        // physical planner can layer a `CatalogVertexScanExec` join on the
5409        // traverse output. Mirrors the schema-then-virtual fallthrough used
5410        // by single-vertex `Scan` planning (~`plan_node_pattern` below).
5411        let mut virtual_target_label_id: Option<u16> = None;
5412        let target_label_meta = if let Some(label_name) = params.target_node.labels.first() {
5413            // Use first label for target_label_id
5414            // For schemaless support, allow unknown target labels
5415            match self.schema.get_label_case_insensitive(label_name) {
5416                Some(meta) => Some(meta),
5417                None => {
5418                    if let Some((vid, _)) = self.allocate_virtual_label(label_name)? {
5419                        virtual_target_label_id = Some(vid);
5420                    }
5421                    None
5422                }
5423            }
5424        } else if !target_is_bound {
5425            // Infer from edge type(s)
5426            let unique_dsts: Vec<_> = dst_labels
5427                .into_iter()
5428                .collect::<HashSet<_>>()
5429                .into_iter()
5430                .collect();
5431            if unique_dsts.len() == 1 {
5432                let label_name = &unique_dsts[0];
5433                self.schema.get_label_case_insensitive(label_name)
5434            } else {
5435                // Multiple or no destination labels inferred - allow any target
5436                // This supports patterns like MATCH (a)-[:EDGE_TYPE]-(b) WHERE b:Label
5437                // where the edge type can connect to multiple labels
5438                None
5439            }
5440        } else {
5441            None
5442        };
5443
5444        // Check if this is a variable-length pattern (has range specifier like *1..3)
5445        let is_variable_length = params.rel.range.is_some();
5446
5447        // For VLP patterns, default min to 1 and max to a reasonable limit.
5448        // For single-hop patterns (no range), both are 1.
5449        const DEFAULT_MAX_HOPS: usize = 100;
5450        let (min_hops, max_hops) = if let Some(range) = &params.rel.range {
5451            let min = range.min.unwrap_or(1) as usize;
5452            let max = range.max.map(|m| m as usize).unwrap_or(DEFAULT_MAX_HOPS);
5453            (min, max)
5454        } else {
5455            (1, 1)
5456        };
5457
5458        // step_var is the relationship variable (r in `()-[r]->()` or `()-[r*]->()`)
5459        //   Single-hop: step_var holds a single edge object
5460        //   VLP: step_var holds a list of edge objects
5461        // path_var is the named path variable (p in `p = (a)-[r*]->(b)`)
5462        let step_var = params.rel.variable.clone();
5463        let path_var = params.path_variable.clone();
5464
5465        // If we have a bound edge variable from a previous clause, use a temp variable
5466        // for the Traverse step, then filter to match the bound edge
5467        let rebound_var = bound_edge_var
5468            .as_ref()
5469            .or(bound_edge_list_var.as_ref())
5470            .cloned();
5471        let effective_step_var = if let Some(ref bv) = rebound_var {
5472            Some(format!("__rebound_{}", bv))
5473        } else {
5474            step_var.clone()
5475        };
5476
5477        // If we have a bound target variable from a previous clause (e.g. WITH),
5478        // use a temp variable for the Traverse step, then filter to match the bound
5479        // target — mirroring the bound edge pattern above.
5480        let rebound_target_var = if target_is_bound && !target_variable.is_empty() {
5481            let is_imported = find_var_in_scope(vars_in_scope, &target_variable)
5482                .map(|info| info.var_type == VariableType::Imported)
5483                .unwrap_or(false);
5484            if !is_imported {
5485                Some(target_variable.clone())
5486            } else {
5487                None
5488            }
5489        } else {
5490            None
5491        };
5492
5493        let effective_target_var = if let Some(ref bv) = rebound_target_var {
5494            format!("__rebound_{}", bv)
5495        } else {
5496            target_variable.clone()
5497        };
5498
5499        // Collect all variables (node + edge) from the current MATCH clause scope
5500        // for relationship uniqueness scoping. Edge ID columns (both named `r._eid`
5501        // and anonymous `__eid_to_target`) are only included in uniqueness filtering
5502        // if their associated variable is in this set. This prevents relationship
5503        // uniqueness from being enforced across disconnected MATCH clauses.
5504        let mut scope_match_variables: HashSet<String> = vars_in_scope[vars_before_pattern..]
5505            .iter()
5506            .map(|v| v.name.clone())
5507            .collect();
5508        // Include the current traverse's edge variable (not yet added to vars_in_scope)
5509        if let Some(ref sv) = effective_step_var {
5510            scope_match_variables.insert(sv.clone());
5511        }
5512        // Include the target variable (not yet added to vars_in_scope)
5513        scope_match_variables.insert(effective_target_var.clone());
5514        // Include bound edge variables from this path for cross-segment Trail mode
5515        // enforcement (same as the schemaless path above).
5516        scope_match_variables.extend(path_bound_edge_vars.iter().cloned());
5517
5518        let mut plan = LogicalPlan::Traverse {
5519            input: Box::new(plan),
5520            edge_type_ids,
5521            direction: params.rel.direction.clone(),
5522            source_variable: source_variable.to_string(),
5523            target_variable: effective_target_var.clone(),
5524            target_label_id: target_label_meta
5525                .map(|m| m.id)
5526                .or(virtual_target_label_id)
5527                .unwrap_or(0),
5528            step_variable: effective_step_var.clone(),
5529            min_hops,
5530            max_hops,
5531            optional: params.optional,
5532            target_filter: self.node_filter_expr(
5533                &target_variable,
5534                &params.target_node.labels,
5535                &params.target_node.properties,
5536            ),
5537            path_variable: path_var.clone(),
5538            edge_properties: HashSet::new(),
5539            is_variable_length,
5540            optional_pattern_vars: params.optional_pattern_vars.clone(),
5541            scope_match_variables,
5542            edge_filter_expr: if is_variable_length {
5543                // Use the step variable name, or a fallback for anonymous edges.
5544                // The variable name is used by properties_to_expr to build
5545                // `var.prop = value` expressions. For BFS property checking,
5546                // only the property name and value matter (the variable name
5547                // is stripped during extraction).
5548                let filter_var = effective_step_var
5549                    .clone()
5550                    .unwrap_or_else(|| "__anon_edge".to_string());
5551                self.properties_to_expr(&filter_var, &params.rel.properties)
5552            } else {
5553                None
5554            },
5555            path_mode: crate::query::df_graph::nfa::PathMode::Trail,
5556            qpp_steps: None,
5557        };
5558
5559        // Pre-compute optional variables set for filter nodes in this traverse.
5560        // Used by relationship property filters and bound-edge filters below.
5561        let filter_optional_vars = if params.optional {
5562            params.optional_pattern_vars.clone()
5563        } else {
5564            HashSet::new()
5565        };
5566
5567        // Apply relationship property predicates (e.g. [r {k: v}]).
5568        // For VLP, predicates are stored inline in edge_filter_expr (above).
5569        // For fixed-length, wrap as a Filter node for post-traverse evaluation.
5570        if !is_variable_length
5571            && let Some(edge_var_name) = effective_step_var.as_ref()
5572            && let Some(edge_prop_filter) =
5573                self.properties_to_expr(edge_var_name, &params.rel.properties)
5574        {
5575            plan = LogicalPlan::Filter {
5576                input: Box::new(plan),
5577                predicate: edge_prop_filter,
5578                optional_variables: filter_optional_vars.clone(),
5579            };
5580        }
5581
5582        // Only apply bound target filter for Imported variables (from outer scope/subquery).
5583        // For regular cycle patterns like (a)-[:T]->(b)-[:T]->(a), the bound check
5584        // uses Parameter which requires the value to be in params (subquery context).
5585        if target_is_bound
5586            && let Some(info) = find_var_in_scope(vars_in_scope, &target_variable)
5587            && info.var_type == VariableType::Imported
5588        {
5589            plan = Self::wrap_with_bound_target_filter(plan, &target_variable);
5590        }
5591
5592        // If we have a bound edge variable, add a filter to match it
5593        if let Some(ref bv) = bound_edge_var {
5594            let temp_var = format!("__rebound_{}", bv);
5595            let bound_check = Expr::BinaryOp {
5596                left: Box::new(Expr::Property(
5597                    Box::new(Expr::Variable(temp_var)),
5598                    "_eid".to_string(),
5599                )),
5600                op: BinaryOp::Eq,
5601                right: Box::new(Expr::Property(
5602                    Box::new(Expr::Variable(bv.clone())),
5603                    "_eid".to_string(),
5604                )),
5605            };
5606            plan = LogicalPlan::Filter {
5607                input: Box::new(plan),
5608                predicate: bound_check,
5609                optional_variables: filter_optional_vars.clone(),
5610            };
5611        }
5612
5613        // If we have a bound relationship list variable for a VLP pattern,
5614        // add a filter to match the traversed relationship list exactly.
5615        if let Some(ref bv) = bound_edge_list_var {
5616            let temp_var = format!("__rebound_{}", bv);
5617            let temp_eids = Expr::ListComprehension {
5618                variable: "__rebound_edge".to_string(),
5619                list: Box::new(Expr::Variable(temp_var)),
5620                where_clause: None,
5621                map_expr: Box::new(Expr::FunctionCall {
5622                    name: "toInteger".to_string(),
5623                    args: vec![Expr::Property(
5624                        Box::new(Expr::Variable("__rebound_edge".to_string())),
5625                        "_eid".to_string(),
5626                    )],
5627                    distinct: false,
5628                    window_spec: None,
5629                }),
5630            };
5631            let bound_eids = Expr::ListComprehension {
5632                variable: "__bound_edge".to_string(),
5633                list: Box::new(Expr::Variable(bv.clone())),
5634                where_clause: None,
5635                map_expr: Box::new(Expr::FunctionCall {
5636                    name: "toInteger".to_string(),
5637                    args: vec![Expr::Property(
5638                        Box::new(Expr::Variable("__bound_edge".to_string())),
5639                        "_eid".to_string(),
5640                    )],
5641                    distinct: false,
5642                    window_spec: None,
5643                }),
5644            };
5645            let bound_list_check = Expr::BinaryOp {
5646                left: Box::new(temp_eids),
5647                op: BinaryOp::Eq,
5648                right: Box::new(bound_eids),
5649            };
5650            plan = LogicalPlan::Filter {
5651                input: Box::new(plan),
5652                predicate: bound_list_check,
5653                optional_variables: filter_optional_vars.clone(),
5654            };
5655        }
5656
5657        // If we have a bound target variable (non-imported), add a filter to constrain
5658        // the traversal output to match the previously bound target node.
5659        if let Some(ref bv) = rebound_target_var {
5660            let temp_var = format!("__rebound_{}", bv);
5661            let bound_check = Expr::BinaryOp {
5662                left: Box::new(Expr::Property(
5663                    Box::new(Expr::Variable(temp_var.clone())),
5664                    "_vid".to_string(),
5665                )),
5666                op: BinaryOp::Eq,
5667                right: Box::new(Expr::Property(
5668                    Box::new(Expr::Variable(bv.clone())),
5669                    "_vid".to_string(),
5670                )),
5671            };
5672            // For OPTIONAL MATCH, include the rebound variable in optional_variables
5673            // so that OptionalFilterExec excludes it from the grouping key and
5674            // properly nullifies it in recovery rows when all matches are filtered out.
5675            // Without this, each traverse result creates its own group (keyed by
5676            // __rebound_c._vid), and null-row recovery emits a spurious null row
5677            // for every non-matching target instead of one per source group.
5678            let mut rebound_filter_vars = filter_optional_vars;
5679            if params.optional {
5680                rebound_filter_vars.insert(temp_var);
5681            }
5682            plan = LogicalPlan::Filter {
5683                input: Box::new(plan),
5684                predicate: bound_check,
5685                optional_variables: rebound_filter_vars,
5686            };
5687        }
5688
5689        // Add the bound variables to scope
5690        // Skip adding the edge variable if it's already bound from a previous clause
5691        if let Some(sv) = &step_var
5692            && bound_edge_var.is_none()
5693            && bound_edge_list_var.is_none()
5694        {
5695            add_var_to_scope(vars_in_scope, sv, VariableType::Edge)?;
5696            if is_variable_length
5697                && let Some(info) = vars_in_scope.iter_mut().find(|v| v.name == *sv)
5698            {
5699                info.is_vlp = true;
5700            }
5701        }
5702        if let Some(pv) = &path_var
5703            && !is_var_in_scope(vars_in_scope, pv)
5704        {
5705            add_var_to_scope(vars_in_scope, pv, VariableType::Path)?;
5706        }
5707        if !is_var_in_scope(vars_in_scope, &target_variable) {
5708            add_var_to_scope(vars_in_scope, &target_variable, VariableType::Node)?;
5709        }
5710
5711        Ok((plan, target_variable, effective_target_var))
5712    }
5713
5714    /// Combine a new scan plan with an existing plan.
5715    ///
5716    /// If the existing plan is `Empty`, returns the new plan directly.
5717    /// Otherwise, wraps them in a `CrossJoin`.
5718    fn join_with_plan(existing: LogicalPlan, new: LogicalPlan) -> LogicalPlan {
5719        if matches!(existing, LogicalPlan::Empty) {
5720            new
5721        } else {
5722            LogicalPlan::CrossJoin {
5723                left: Box::new(existing),
5724                right: Box::new(new),
5725            }
5726        }
5727    }
5728
5729    /// Split node map predicates into scan-pushable and residual filters.
5730    ///
5731    /// A predicate is scan-pushable when its value expression references only
5732    /// the node variable itself (or no variables). Predicates referencing other
5733    /// in-scope variables (correlated predicates) are returned as residual so
5734    /// they can be applied after joining with the existing plan.
5735    fn split_node_property_filters_for_scan(
5736        &self,
5737        variable: &str,
5738        properties: &Option<Expr>,
5739    ) -> (Option<Expr>, Option<Expr>) {
5740        let entries = match properties {
5741            Some(Expr::Map(entries)) => entries,
5742            _ => return (None, None),
5743        };
5744
5745        if entries.is_empty() {
5746            return (None, None);
5747        }
5748
5749        let mut pushdown_entries = Vec::new();
5750        let mut residual_entries = Vec::new();
5751
5752        for (prop, val_expr) in entries {
5753            let vars = collect_expr_variables(val_expr);
5754            if vars.iter().all(|v| v == variable) {
5755                pushdown_entries.push((prop.clone(), val_expr.clone()));
5756            } else {
5757                residual_entries.push((prop.clone(), val_expr.clone()));
5758            }
5759        }
5760
5761        let pushdown_map = if pushdown_entries.is_empty() {
5762            None
5763        } else {
5764            Some(Expr::Map(pushdown_entries))
5765        };
5766        let residual_map = if residual_entries.is_empty() {
5767            None
5768        } else {
5769            Some(Expr::Map(residual_entries))
5770        };
5771
5772        (
5773            self.properties_to_expr(variable, &pushdown_map),
5774            self.properties_to_expr(variable, &residual_map),
5775        )
5776    }
5777
5778    /// Decide whether per-label `Scan` branches for a label disjunction can
5779    /// safely be combined under `LogicalPlan::Union`. Returns `true` iff every
5780    /// label in `labels` is registered in the schema AND every pair shares an
5781    /// identical property name+type set.
5782    ///
5783    /// When this returns `false`, the disjunction must fall back to a single
5784    /// `ScanMainByLabels` over all labels — otherwise DataFusion's
5785    /// `UnionExec::try_new` panics in `union_schema` because the per-label
5786    /// `GraphScanExec` outputs (`_vid` + `_labels` + per-label projected
5787    /// properties) have different field counts. Issue rustic-ai/uni-db#62.
5788    ///
5789    /// We deliberately compare full schema property sets rather than only the
5790    /// properties referenced by the current query: at this logical-planning
5791    /// stage we have not yet collected `all_properties`, and `*` wildcards
5792    /// (e.g. from unknown function calls) would expand per-label downstream
5793    /// in `df_planner::resolve_properties` even when the query text only
5794    /// touches common columns.
5795    fn label_branches_share_property_schema(&self, labels: &[String]) -> bool {
5796        if labels.len() < 2 {
5797            return true;
5798        }
5799        let mut iter = labels.iter();
5800        let first = iter.next().expect("len >= 2");
5801        let Some(first_props) = self.schema.properties.get(first) else {
5802            return false;
5803        };
5804        for label in iter {
5805            let Some(props) = self.schema.properties.get(label) else {
5806                return false;
5807            };
5808            if props.len() != first_props.len() {
5809                return false;
5810            }
5811            for (name, meta) in first_props {
5812                let Some(other_meta) = props.get(name) else {
5813                    return false;
5814                };
5815                if meta.r#type != other_meta.r#type {
5816                    return false;
5817                }
5818            }
5819        }
5820        true
5821    }
5822
5823    /// Plan an unbound node (creates a Scan, ScanAll, ScanMainByLabel, ExtIdLookup, or CrossJoin).
5824    fn plan_unbound_node(
5825        &self,
5826        node: &NodePattern,
5827        variable: &str,
5828        plan: LogicalPlan,
5829        optional: bool,
5830    ) -> Result<LogicalPlan> {
5831        // Properties handling
5832        let properties = match &node.properties {
5833            Some(Expr::Map(entries)) => entries.as_slice(),
5834            Some(Expr::Parameter(_)) => {
5835                return Err(anyhow!(
5836                    "SyntaxError: InvalidParameterUse - Parameters cannot be used as node predicates"
5837                ));
5838            }
5839            Some(_) => return Err(anyhow!("Node properties must be a Map")),
5840            None => &[],
5841        };
5842
5843        let has_existing_scope = !matches!(plan, LogicalPlan::Empty);
5844
5845        let apply_residual_filter = |input: LogicalPlan, residual: Option<Expr>| -> LogicalPlan {
5846            if let Some(predicate) = residual {
5847                LogicalPlan::Filter {
5848                    input: Box::new(input),
5849                    predicate,
5850                    optional_variables: HashSet::new(),
5851                }
5852            } else {
5853                input
5854            }
5855        };
5856
5857        let (node_scan_filter, node_residual_filter) = if has_existing_scope {
5858            self.split_node_property_filters_for_scan(variable, &node.properties)
5859        } else {
5860            (self.properties_to_expr(variable, &node.properties), None)
5861        };
5862
5863        // Check for ext_id in properties when no label is specified
5864        if node.labels.is_empty() {
5865            // Try to find ext_id property for main table lookup
5866            if let Some((_, ext_id_value)) = properties.iter().find(|(k, _)| k == "ext_id") {
5867                // Extract the ext_id value as a string
5868                let ext_id = match ext_id_value {
5869                    Expr::Literal(CypherLiteral::String(s)) => s.clone(),
5870                    _ => {
5871                        return Err(anyhow!("ext_id must be a string literal for direct lookup"));
5872                    }
5873                };
5874
5875                // Build filter for remaining properties (excluding ext_id)
5876                let remaining_props: Vec<_> = properties
5877                    .iter()
5878                    .filter(|(k, _)| k != "ext_id")
5879                    .cloned()
5880                    .collect();
5881
5882                let remaining_expr = if remaining_props.is_empty() {
5883                    None
5884                } else {
5885                    Some(Expr::Map(remaining_props))
5886                };
5887
5888                let (prop_filter, residual_filter) = if has_existing_scope {
5889                    self.split_node_property_filters_for_scan(variable, &remaining_expr)
5890                } else {
5891                    (self.properties_to_expr(variable, &remaining_expr), None)
5892                };
5893
5894                let ext_id_lookup = LogicalPlan::ExtIdLookup {
5895                    variable: variable.to_string(),
5896                    ext_id,
5897                    filter: prop_filter,
5898                    optional,
5899                };
5900
5901                let joined = Self::join_with_plan(plan, ext_id_lookup);
5902                return Ok(apply_residual_filter(joined, residual_filter));
5903            }
5904
5905            // No ext_id: create ScanAll for unlabeled node pattern
5906            let scan_all = LogicalPlan::ScanAll {
5907                variable: variable.to_string(),
5908                filter: node_scan_filter,
5909                optional,
5910            };
5911
5912            let joined = Self::join_with_plan(plan, scan_all);
5913            return Ok(apply_residual_filter(joined, node_residual_filter));
5914        }
5915
5916        // Label disjunction `(n:A|B|C)` — emit Union of label-scoped Scans.
5917        //
5918        // Storage fact: a multi-labeled vertex is fanned out into every
5919        // per-label table it carries (uni-store/src/runtime/writer.rs's
5920        // `push_vertex_to_labels`), so the same vid can appear in both the
5921        // `A` scan and the `B` scan of a disjunctive query. Use
5922        // `Union { all: false }` so the combined result deduplicates by row
5923        // contents (which include the vid) rather than emitting the same
5924        // vertex twice. The single-label-disjunction case (`Disjunction(["A"])`)
5925        // is encoded the same way the parser already encodes single edge
5926        // types, and reduces to one Scan with no Union wrapping.
5927        if node.labels.is_proper_disjunction() {
5928            let label_names: Vec<String> = node.labels.names().to_vec();
5929
5930            // Per-label branches under a `Union` only line up when every
5931            // branch produces the same Arrow schema. The narrow-scan
5932            // `Scan` path resolves columns *per label*, so heterogeneous
5933            // property sets (or any schemaless label in the mix) yield
5934            // mismatched widths and DataFusion's `UnionExec::try_new`
5935            // panics inside `union_schema` (issue rustic-ai/uni-db#62).
5936            //
5937            // For those cases, lower every branch to a *single-label*
5938            // `ScanMainByLabels` instead. The schemaless main-table scan
5939            // resolves columns from `all_properties` directly (no per-label
5940            // expansion), so all branches emit a uniform schema and the
5941            // outer `Union { all: false }` deduplicates correctly. We
5942            // keep the per-branch Union shape (rather than collapsing to
5943            // a single multi-label scan) because multi-label
5944            // `ScanMainByLabels` has AND/intersection semantics — wrong
5945            // for a disjunction.
5946            let use_main_table_branches = !self.label_branches_share_property_schema(&label_names);
5947
5948            let mut branches: Vec<LogicalPlan> = Vec::with_capacity(label_names.len());
5949            for label_name in &label_names {
5950                let branch = if use_main_table_branches {
5951                    LogicalPlan::ScanMainByLabels {
5952                        labels: vec![label_name.clone()],
5953                        variable: variable.to_string(),
5954                        filter: node_scan_filter.clone(),
5955                        optional,
5956                    }
5957                } else {
5958                    let meta = self
5959                        .schema
5960                        .get_label_case_insensitive(label_name)
5961                        .expect("share_property_schema true implies all labels in schema");
5962                    LogicalPlan::Scan {
5963                        label_id: meta.id,
5964                        labels: vec![label_name.clone()],
5965                        variable: variable.to_string(),
5966                        filter: node_scan_filter.clone(),
5967                        optional,
5968                    }
5969                };
5970                branches.push(branch);
5971            }
5972            // Left-leaning Union: Union(Union(A, B), C). All inner
5973            // unions dedupe by row, so the outer one does too.
5974            let mut iter = branches.into_iter();
5975            let mut union_plan = iter
5976                .next()
5977                .expect("is_proper_disjunction implies at least 2 labels");
5978            for next in iter {
5979                union_plan = LogicalPlan::Union {
5980                    left: Box::new(union_plan),
5981                    right: Box::new(next),
5982                    all: false,
5983                };
5984            }
5985            let joined = Self::join_with_plan(plan, union_plan);
5986            return Ok(apply_residual_filter(joined, node_residual_filter));
5987        }
5988
5989        // Use first label for label_id (primary label for dataset selection)
5990        let label_name = &node.labels[0];
5991
5992        // Check if label exists in schema
5993        if let Some(label_meta) = self.schema.get_label_case_insensitive(label_name) {
5994            // Known label: use standard Scan
5995            let scan = LogicalPlan::Scan {
5996                label_id: label_meta.id,
5997                labels: node.labels.names().to_vec(),
5998                variable: variable.to_string(),
5999                filter: node_scan_filter,
6000                optional,
6001            };
6002
6003            let joined = Self::join_with_plan(plan, scan);
6004            Ok(apply_residual_filter(joined, node_residual_filter))
6005        } else {
6006            // Unknown label. Try a CatalogProvider / ReplacementScanProvider
6007            // claim first: on success allocate a virtual label-ID and emit a
6008            // regular `Scan` against the virtual id (`df_planner` dispatches
6009            // to `CatalogVertexScanExec`). When no provider claims and the
6010            // replacement-scan gate is on, strict-mode errors. When the gate
6011            // is off and no provider claims, preserve today's silent-empty
6012            // schemaless `ScanMainByLabels` behavior bit-for-bit.
6013            if let Some((virtual_id, _)) = self.allocate_virtual_label(label_name)? {
6014                let scan = LogicalPlan::Scan {
6015                    label_id: virtual_id,
6016                    labels: node.labels.names().to_vec(),
6017                    variable: variable.to_string(),
6018                    filter: node_scan_filter,
6019                    optional,
6020                };
6021                let joined = Self::join_with_plan(plan, scan);
6022                return Ok(apply_residual_filter(joined, node_residual_filter));
6023            }
6024            if self.replacement_scans_enabled {
6025                return Err(anyhow!(
6026                    "Label `{}` is not defined in schema and no \
6027                     CatalogProvider or ReplacementScanProvider claimed it; \
6028                     strict-mode (replacement_scans=true) requires the label \
6029                     to resolve",
6030                    label_name
6031                ));
6032            }
6033
6034            let scan_main = LogicalPlan::ScanMainByLabels {
6035                labels: node.labels.names().to_vec(),
6036                variable: variable.to_string(),
6037                filter: node_scan_filter,
6038                optional,
6039            };
6040
6041            let joined = Self::join_with_plan(plan, scan_main);
6042            Ok(apply_residual_filter(joined, node_residual_filter))
6043        }
6044    }
6045
6046    /// Plan a WHERE clause with vector_similarity extraction and predicate pushdown.
6047    ///
6048    /// When `optional_vars` is non-empty, the Filter will preserve rows where
6049    /// any of those variables are NULL (for OPTIONAL MATCH semantics).
6050    fn plan_where_clause(
6051        &self,
6052        predicate: &Expr,
6053        plan: LogicalPlan,
6054        vars_in_scope: &[VariableInfo],
6055        optional_vars: HashSet<String>,
6056    ) -> Result<LogicalPlan> {
6057        // Validate no aggregation functions in WHERE clause
6058        validate_no_aggregation_in_where(predicate)?;
6059
6060        // Validate all variables used are in scope
6061        validate_expression_variables(predicate, vars_in_scope)?;
6062
6063        // Validate expression types (function args, boolean operators)
6064        validate_expression(predicate, vars_in_scope)?;
6065
6066        // Check that WHERE predicate isn't a bare node/edge/path variable
6067        if let Expr::Variable(var_name) = predicate
6068            && let Some(info) = find_var_in_scope(vars_in_scope, var_name)
6069            && matches!(
6070                info.var_type,
6071                VariableType::Node | VariableType::Edge | VariableType::Path
6072            )
6073        {
6074            return Err(anyhow!(
6075                "SyntaxError: InvalidArgumentType - Type mismatch: expected Boolean but was {:?}",
6076                info.var_type
6077            ));
6078        }
6079
6080        let mut plan = plan;
6081
6082        // Transform VALID_AT macro to function call
6083        let transformed_predicate = Self::transform_valid_at_to_function(predicate.clone());
6084
6085        // Rewrite id(var) to var._vid (or var._eid for an edge) so
6086        // PredicateAnalyzer can push it down.
6087        let transformed_predicate = Self::rewrite_id_to_vid(transformed_predicate, vars_in_scope);
6088
6089        let mut current_predicate =
6090            self.rewrite_predicates_using_indexes(&transformed_predicate, &plan, vars_in_scope)?;
6091
6092        // 1. Try to extract vector_similarity predicate for optimization
6093        if let Some(extraction) = extract_vector_similarity(&current_predicate) {
6094            let vs = &extraction.predicate;
6095            if Self::find_scan_label_id(&plan, &vs.variable).is_some() {
6096                plan = Self::replace_scan_with_knn(
6097                    plan,
6098                    &vs.variable,
6099                    &vs.property,
6100                    vs.query.clone(),
6101                    vs.threshold,
6102                );
6103                if let Some(residual) = extraction.residual {
6104                    current_predicate = residual;
6105                } else {
6106                    current_predicate = Expr::TRUE;
6107                }
6108            }
6109        }
6110
6111        // 2. Label/type disjunction → narrow-scan rewrite.
6112        //
6113        // `WHERE n:A OR n:B` and `WHERE type(r) = 'A' OR type(r) = 'B'`
6114        // are functionally identical to the inline forms `(n:A|B)` and
6115        // `[r:A|B]`, but a literal pattern lowering would route them
6116        // through `Filter(LabelCheck OR LabelCheck)` over `ScanAll` —
6117        // a full vertex/edge scan plus residual filter, missing the
6118        // narrow-scan fast-path that the inline forms get for free.
6119        // Detect those OR-chains here and rewrite the upstream
6120        // `ScanAll` / `Traverse` accordingly.
6121        let conjuncts = Self::split_and_conjuncts(&current_predicate);
6122        let mut keep: Vec<Expr> = Vec::with_capacity(conjuncts.len());
6123        for conj in conjuncts {
6124            let mut consumed = false;
6125            for var in vars_in_scope {
6126                if optional_vars.contains(&var.name) {
6127                    continue;
6128                }
6129                // Node label disjunction → Union of label-scoped Scans.
6130                if Self::is_scan_all_for(&plan, &var.name)
6131                    && let Some(labels) = try_label_or_to_union(&conj, &var.name)
6132                {
6133                    plan = self.replace_scan_all_with_label_union(plan, &var.name, &labels, false);
6134                    consumed = true;
6135                    break;
6136                }
6137                // Edge type disjunction → merge into Traverse.edge_type_ids.
6138                if let Some(types) = try_type_or_to_union(&conj, &var.name)
6139                    && Self::merge_traverse_types_for(&plan, &var.name, &types).is_some()
6140                {
6141                    let mut ids: Vec<u32> = Vec::with_capacity(types.len());
6142                    let mut all_known = true;
6143                    for t in &types {
6144                        match self.schema.edge_types.get(t) {
6145                            Some(meta) => ids.push(meta.id),
6146                            None => {
6147                                all_known = false;
6148                                break;
6149                            }
6150                        }
6151                    }
6152                    if all_known {
6153                        plan = Self::set_traverse_edge_type_ids(plan, &var.name, ids);
6154                        consumed = true;
6155                        break;
6156                    }
6157                }
6158            }
6159            if !consumed {
6160                keep.push(conj);
6161            }
6162        }
6163        current_predicate = Self::combine_predicates(keep).unwrap_or(Expr::TRUE);
6164
6165        // 3. Push eligible predicates to Scan OR Traverse filters
6166        // Note: Do NOT push predicates on optional variables (from OPTIONAL MATCH) to
6167        // Traverse's target_filter, because target_filter filtering doesn't preserve NULL
6168        // rows. Let them stay in the Filter operator which handles NULL preservation.
6169        for var in vars_in_scope {
6170            // Skip pushdown for optional variables - they need NULL preservation in Filter
6171            if optional_vars.contains(&var.name) {
6172                continue;
6173            }
6174
6175            // Check if var is produced by a Scan
6176            if Self::find_scan_label_id(&plan, &var.name).is_some() {
6177                let (pushable, residual) =
6178                    Self::extract_variable_predicates(&current_predicate, &var.name);
6179
6180                for pred in pushable {
6181                    plan = Self::push_predicate_to_scan(plan, &var.name, pred);
6182                }
6183
6184                if let Some(r) = residual {
6185                    current_predicate = r;
6186                } else {
6187                    current_predicate = Expr::TRUE;
6188                }
6189            } else if Self::is_traverse_target(&plan, &var.name) {
6190                // Push to Traverse
6191                let (pushable, residual) =
6192                    Self::extract_variable_predicates(&current_predicate, &var.name);
6193
6194                for pred in pushable {
6195                    plan = Self::push_predicate_to_traverse(plan, &var.name, pred);
6196                }
6197
6198                if let Some(r) = residual {
6199                    current_predicate = r;
6200                } else {
6201                    current_predicate = Expr::TRUE;
6202                }
6203            }
6204        }
6205
6206        // 4. Push predicates to Apply.input_filter
6207        // This filters input rows BEFORE executing correlated subqueries.
6208        plan = Self::push_predicates_to_apply(plan, &mut current_predicate);
6209
6210        // 5. Add Filter node for any remaining predicates
6211        if !current_predicate.is_true_literal() {
6212            plan = LogicalPlan::Filter {
6213                input: Box::new(plan),
6214                predicate: current_predicate,
6215                optional_variables: optional_vars,
6216            };
6217        }
6218
6219        Ok(plan)
6220    }
6221
6222    fn rewrite_predicates_using_indexes(
6223        &self,
6224        predicate: &Expr,
6225        plan: &LogicalPlan,
6226        vars_in_scope: &[VariableInfo],
6227    ) -> Result<Expr> {
6228        let mut rewritten = predicate.clone();
6229
6230        for var in vars_in_scope {
6231            if let Some(label_id) = Self::find_scan_label_id(plan, &var.name) {
6232                // Find label name
6233                let label_name = self.schema.label_name_by_id(label_id).map(str::to_owned);
6234
6235                if let Some(label) = label_name
6236                    && let Some(props) = self.schema.properties.get(&label)
6237                {
6238                    for (gen_col, meta) in props {
6239                        if meta.generation_expression.is_some() {
6240                            // Use cached parsed expression
6241                            if let Some(schema_expr) =
6242                                self.gen_expr_cache.get(&(label.clone(), gen_col.clone()))
6243                            {
6244                                // Rewrite 'rewritten' replacing occurrences of schema_expr with gen_col
6245                                rewritten = Self::replace_expression(
6246                                    rewritten,
6247                                    schema_expr,
6248                                    &var.name,
6249                                    gen_col,
6250                                );
6251                            }
6252                        }
6253                    }
6254                }
6255            }
6256        }
6257        Ok(rewritten)
6258    }
6259
6260    fn replace_expression(expr: Expr, schema_expr: &Expr, query_var: &str, gen_col: &str) -> Expr {
6261        // First, normalize schema_expr to use query_var
6262        let schema_var = schema_expr.extract_variable();
6263
6264        if let Some(s_var) = schema_var {
6265            let target_expr = schema_expr.substitute_variable(&s_var, query_var);
6266
6267            if expr == target_expr {
6268                return Expr::Property(
6269                    Box::new(Expr::Variable(query_var.to_string())),
6270                    gen_col.to_string(),
6271                );
6272            }
6273        }
6274
6275        // Recurse
6276        match expr {
6277            Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
6278                left: Box::new(Self::replace_expression(
6279                    *left,
6280                    schema_expr,
6281                    query_var,
6282                    gen_col,
6283                )),
6284                op,
6285                right: Box::new(Self::replace_expression(
6286                    *right,
6287                    schema_expr,
6288                    query_var,
6289                    gen_col,
6290                )),
6291            },
6292            Expr::UnaryOp { op, expr } => Expr::UnaryOp {
6293                op,
6294                expr: Box::new(Self::replace_expression(
6295                    *expr,
6296                    schema_expr,
6297                    query_var,
6298                    gen_col,
6299                )),
6300            },
6301            Expr::FunctionCall {
6302                name,
6303                args,
6304                distinct,
6305                window_spec,
6306            } => Expr::FunctionCall {
6307                name,
6308                args: args
6309                    .into_iter()
6310                    .map(|a| Self::replace_expression(a, schema_expr, query_var, gen_col))
6311                    .collect(),
6312                distinct,
6313                window_spec,
6314            },
6315            Expr::IsNull(expr) => Expr::IsNull(Box::new(Self::replace_expression(
6316                *expr,
6317                schema_expr,
6318                query_var,
6319                gen_col,
6320            ))),
6321            Expr::IsNotNull(expr) => Expr::IsNotNull(Box::new(Self::replace_expression(
6322                *expr,
6323                schema_expr,
6324                query_var,
6325                gen_col,
6326            ))),
6327            Expr::IsUnique(expr) => Expr::IsUnique(Box::new(Self::replace_expression(
6328                *expr,
6329                schema_expr,
6330                query_var,
6331                gen_col,
6332            ))),
6333            Expr::ArrayIndex {
6334                array: e,
6335                index: idx,
6336            } => Expr::ArrayIndex {
6337                array: Box::new(Self::replace_expression(
6338                    *e,
6339                    schema_expr,
6340                    query_var,
6341                    gen_col,
6342                )),
6343                index: Box::new(Self::replace_expression(
6344                    *idx,
6345                    schema_expr,
6346                    query_var,
6347                    gen_col,
6348                )),
6349            },
6350            Expr::ArraySlice { array, start, end } => Expr::ArraySlice {
6351                array: Box::new(Self::replace_expression(
6352                    *array,
6353                    schema_expr,
6354                    query_var,
6355                    gen_col,
6356                )),
6357                start: start.map(|s| {
6358                    Box::new(Self::replace_expression(
6359                        *s,
6360                        schema_expr,
6361                        query_var,
6362                        gen_col,
6363                    ))
6364                }),
6365                end: end.map(|e| {
6366                    Box::new(Self::replace_expression(
6367                        *e,
6368                        schema_expr,
6369                        query_var,
6370                        gen_col,
6371                    ))
6372                }),
6373            },
6374            Expr::List(exprs) => Expr::List(
6375                exprs
6376                    .into_iter()
6377                    .map(|e| Self::replace_expression(e, schema_expr, query_var, gen_col))
6378                    .collect(),
6379            ),
6380            Expr::Map(entries) => Expr::Map(
6381                entries
6382                    .into_iter()
6383                    .map(|(k, v)| {
6384                        (
6385                            k,
6386                            Self::replace_expression(v, schema_expr, query_var, gen_col),
6387                        )
6388                    })
6389                    .collect(),
6390            ),
6391            Expr::Property(e, prop) => Expr::Property(
6392                Box::new(Self::replace_expression(
6393                    *e,
6394                    schema_expr,
6395                    query_var,
6396                    gen_col,
6397                )),
6398                prop,
6399            ),
6400            Expr::Case {
6401                expr: case_expr,
6402                when_then,
6403                else_expr,
6404            } => Expr::Case {
6405                expr: case_expr.map(|e| {
6406                    Box::new(Self::replace_expression(
6407                        *e,
6408                        schema_expr,
6409                        query_var,
6410                        gen_col,
6411                    ))
6412                }),
6413                when_then: when_then
6414                    .into_iter()
6415                    .map(|(w, t)| {
6416                        (
6417                            Self::replace_expression(w, schema_expr, query_var, gen_col),
6418                            Self::replace_expression(t, schema_expr, query_var, gen_col),
6419                        )
6420                    })
6421                    .collect(),
6422                else_expr: else_expr.map(|e| {
6423                    Box::new(Self::replace_expression(
6424                        *e,
6425                        schema_expr,
6426                        query_var,
6427                        gen_col,
6428                    ))
6429                }),
6430            },
6431            Expr::Reduce {
6432                accumulator,
6433                init,
6434                variable: reduce_var,
6435                list,
6436                expr: reduce_expr,
6437            } => Expr::Reduce {
6438                accumulator,
6439                init: Box::new(Self::replace_expression(
6440                    *init,
6441                    schema_expr,
6442                    query_var,
6443                    gen_col,
6444                )),
6445                variable: reduce_var,
6446                list: Box::new(Self::replace_expression(
6447                    *list,
6448                    schema_expr,
6449                    query_var,
6450                    gen_col,
6451                )),
6452                expr: Box::new(Self::replace_expression(
6453                    *reduce_expr,
6454                    schema_expr,
6455                    query_var,
6456                    gen_col,
6457                )),
6458            },
6459
6460            // Leaf nodes (Identifier, Literal, Parameter, etc.) need no recursion
6461            _ => expr,
6462        }
6463    }
6464
6465    /// Returns `true` iff `variable` is bound to a `ScanAll` operator
6466    /// (somewhere under `plan`). Used to gate the
6467    /// `WHERE n:A OR n:B` → `Union(Scan{A}, Scan{B})` rewrite — we only
6468    /// fire it when the variable is currently doing a full vertex scan,
6469    /// not when it's already bound to a labeled `Scan`.
6470    fn is_scan_all_for(plan: &LogicalPlan, variable: &str) -> bool {
6471        match plan {
6472            LogicalPlan::ScanAll { variable: var, .. } => var == variable,
6473            LogicalPlan::Filter { input, .. }
6474            | LogicalPlan::Project { input, .. }
6475            | LogicalPlan::Sort { input, .. }
6476            | LogicalPlan::Limit { input, .. }
6477            | LogicalPlan::Aggregate { input, .. }
6478            | LogicalPlan::Apply { input, .. }
6479            | LogicalPlan::Traverse { input, .. } => Self::is_scan_all_for(input, variable),
6480            LogicalPlan::CrossJoin { left, right } => {
6481                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6482            }
6483            LogicalPlan::Union { left, right, .. } => {
6484                Self::is_scan_all_for(left, variable) || Self::is_scan_all_for(right, variable)
6485            }
6486            _ => false,
6487        }
6488    }
6489
6490    /// Replace the `ScanAll` for `variable` in `plan` with a left-leaning
6491    /// `Union` of label-scoped `Scan` (or `ScanMainByLabels` for unknown
6492    /// labels) operators built from `labels`. Used by the
6493    /// `WHERE n:A OR n:B` rewrite.
6494    fn replace_scan_all_with_label_union(
6495        &self,
6496        plan: LogicalPlan,
6497        variable: &str,
6498        labels: &[String],
6499        optional: bool,
6500    ) -> LogicalPlan {
6501        match plan {
6502            LogicalPlan::ScanAll {
6503                variable: var,
6504                filter,
6505                optional: scan_optional,
6506            } if var == variable => {
6507                // Heterogeneous (or any-schemaless) disjunction: route every
6508                // branch through a single-label `ScanMainByLabels` so all
6509                // branches emit a uniform schemaless schema. Avoids the
6510                // DataFusion `union_schema` panic. See `plan_unbound_node`
6511                // and issue rustic-ai/uni-db#62.
6512                let use_main_table_branches = !self.label_branches_share_property_schema(labels);
6513
6514                let mut branches: Vec<LogicalPlan> = Vec::with_capacity(labels.len());
6515                for label in labels {
6516                    let branch = if use_main_table_branches {
6517                        LogicalPlan::ScanMainByLabels {
6518                            labels: vec![label.clone()],
6519                            variable: variable.to_string(),
6520                            filter: filter.clone(),
6521                            optional: scan_optional || optional,
6522                        }
6523                    } else {
6524                        let meta = self
6525                            .schema
6526                            .get_label_case_insensitive(label)
6527                            .expect("share_property_schema true implies all labels in schema");
6528                        LogicalPlan::Scan {
6529                            label_id: meta.id,
6530                            labels: vec![label.clone()],
6531                            variable: variable.to_string(),
6532                            filter: filter.clone(),
6533                            optional: scan_optional || optional,
6534                        }
6535                    };
6536                    branches.push(branch);
6537                }
6538                let mut iter = branches.into_iter();
6539                let mut union_plan = iter.next().expect("at least one label");
6540                for next in iter {
6541                    union_plan = LogicalPlan::Union {
6542                        left: Box::new(union_plan),
6543                        right: Box::new(next),
6544                        all: false,
6545                    };
6546                }
6547                union_plan
6548            }
6549            LogicalPlan::Filter {
6550                input,
6551                predicate,
6552                optional_variables,
6553            } => LogicalPlan::Filter {
6554                input: Box::new(
6555                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6556                ),
6557                predicate,
6558                optional_variables,
6559            },
6560            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6561                input: Box::new(
6562                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6563                ),
6564                projections,
6565            },
6566            LogicalPlan::CrossJoin { left, right } => {
6567                if Self::is_scan_all_for(&left, variable) {
6568                    LogicalPlan::CrossJoin {
6569                        left: Box::new(
6570                            self.replace_scan_all_with_label_union(
6571                                *left, variable, labels, optional,
6572                            ),
6573                        ),
6574                        right,
6575                    }
6576                } else {
6577                    LogicalPlan::CrossJoin {
6578                        left,
6579                        right: Box::new(
6580                            self.replace_scan_all_with_label_union(
6581                                *right, variable, labels, optional,
6582                            ),
6583                        ),
6584                    }
6585                }
6586            }
6587            LogicalPlan::Traverse {
6588                input,
6589                edge_type_ids,
6590                direction,
6591                source_variable,
6592                target_variable,
6593                target_label_id,
6594                step_variable,
6595                min_hops,
6596                max_hops,
6597                optional: trav_optional,
6598                target_filter,
6599                path_variable,
6600                edge_properties,
6601                is_variable_length,
6602                optional_pattern_vars,
6603                scope_match_variables,
6604                edge_filter_expr,
6605                path_mode,
6606                qpp_steps,
6607            } => LogicalPlan::Traverse {
6608                input: Box::new(
6609                    self.replace_scan_all_with_label_union(*input, variable, labels, optional),
6610                ),
6611                edge_type_ids,
6612                direction,
6613                source_variable,
6614                target_variable,
6615                target_label_id,
6616                step_variable,
6617                min_hops,
6618                max_hops,
6619                optional: trav_optional,
6620                target_filter,
6621                path_variable,
6622                edge_properties,
6623                is_variable_length,
6624                optional_pattern_vars,
6625                scope_match_variables,
6626                edge_filter_expr,
6627                path_mode,
6628                qpp_steps,
6629            },
6630            other => other,
6631        }
6632    }
6633
6634    /// Returns `Some(())` iff `variable` is the `step_variable` (i.e. the
6635    /// edge variable) of some `Traverse` operator in `plan`. Used to gate
6636    /// the `WHERE type(r) = 'A' OR type(r) = 'B'` rewrite — we need a
6637    /// Traverse whose types we can merge into.
6638    fn merge_traverse_types_for(
6639        plan: &LogicalPlan,
6640        edge_var: &str,
6641        _types: &[String],
6642    ) -> Option<()> {
6643        match plan {
6644            LogicalPlan::Traverse {
6645                step_variable,
6646                input,
6647                ..
6648            } => {
6649                if step_variable.as_deref() == Some(edge_var) {
6650                    Some(())
6651                } else {
6652                    Self::merge_traverse_types_for(input, edge_var, _types)
6653                }
6654            }
6655            LogicalPlan::Filter { input, .. }
6656            | LogicalPlan::Project { input, .. }
6657            | LogicalPlan::Sort { input, .. }
6658            | LogicalPlan::Limit { input, .. }
6659            | LogicalPlan::Aggregate { input, .. }
6660            | LogicalPlan::Apply { input, .. } => {
6661                Self::merge_traverse_types_for(input, edge_var, _types)
6662            }
6663            LogicalPlan::CrossJoin { left, right } | LogicalPlan::Union { left, right, .. } => {
6664                Self::merge_traverse_types_for(left, edge_var, _types)
6665                    .or_else(|| Self::merge_traverse_types_for(right, edge_var, _types))
6666            }
6667            _ => None,
6668        }
6669    }
6670
6671    /// Replace `edge_type_ids` on the Traverse whose `step_variable`
6672    /// equals `edge_var`. Used by the type-OR rewrite.
6673    fn set_traverse_edge_type_ids(
6674        plan: LogicalPlan,
6675        edge_var: &str,
6676        new_ids: Vec<u32>,
6677    ) -> LogicalPlan {
6678        match plan {
6679            LogicalPlan::Traverse {
6680                input,
6681                edge_type_ids,
6682                direction,
6683                source_variable,
6684                target_variable,
6685                target_label_id,
6686                step_variable,
6687                min_hops,
6688                max_hops,
6689                optional,
6690                target_filter,
6691                path_variable,
6692                edge_properties,
6693                is_variable_length,
6694                optional_pattern_vars,
6695                scope_match_variables,
6696                edge_filter_expr,
6697                path_mode,
6698                qpp_steps,
6699            } => {
6700                let matches_var = step_variable.as_deref() == Some(edge_var);
6701                let recursed_input = if matches_var {
6702                    input
6703                } else {
6704                    Box::new(Self::set_traverse_edge_type_ids(
6705                        *input,
6706                        edge_var,
6707                        new_ids.clone(),
6708                    ))
6709                };
6710                LogicalPlan::Traverse {
6711                    input: recursed_input,
6712                    edge_type_ids: if matches_var { new_ids } else { edge_type_ids },
6713                    direction,
6714                    source_variable,
6715                    target_variable,
6716                    target_label_id,
6717                    step_variable,
6718                    min_hops,
6719                    max_hops,
6720                    optional,
6721                    target_filter,
6722                    path_variable,
6723                    edge_properties,
6724                    is_variable_length,
6725                    optional_pattern_vars,
6726                    scope_match_variables,
6727                    edge_filter_expr,
6728                    path_mode,
6729                    qpp_steps,
6730                }
6731            }
6732            LogicalPlan::Filter {
6733                input,
6734                predicate,
6735                optional_variables,
6736            } => LogicalPlan::Filter {
6737                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6738                predicate,
6739                optional_variables,
6740            },
6741            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6742                input: Box::new(Self::set_traverse_edge_type_ids(*input, edge_var, new_ids)),
6743                projections,
6744            },
6745            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
6746                left: Box::new(Self::set_traverse_edge_type_ids(
6747                    *left,
6748                    edge_var,
6749                    new_ids.clone(),
6750                )),
6751                right: Box::new(Self::set_traverse_edge_type_ids(*right, edge_var, new_ids)),
6752            },
6753            other => other,
6754        }
6755    }
6756
6757    /// Check if the variable is the target of a Traverse node
6758    fn is_traverse_target(plan: &LogicalPlan, variable: &str) -> bool {
6759        match plan {
6760            LogicalPlan::Traverse {
6761                target_variable,
6762                input,
6763                ..
6764            } => target_variable == variable || Self::is_traverse_target(input, variable),
6765            LogicalPlan::Filter { input, .. }
6766            | LogicalPlan::Project { input, .. }
6767            | LogicalPlan::Sort { input, .. }
6768            | LogicalPlan::Limit { input, .. }
6769            | LogicalPlan::Aggregate { input, .. }
6770            | LogicalPlan::Apply { input, .. } => Self::is_traverse_target(input, variable),
6771            LogicalPlan::CrossJoin { left, right } => {
6772                Self::is_traverse_target(left, variable)
6773                    || Self::is_traverse_target(right, variable)
6774            }
6775            _ => false,
6776        }
6777    }
6778
6779    /// Push a predicate into a Traverse's target_filter for the specified variable
6780    fn push_predicate_to_traverse(
6781        plan: LogicalPlan,
6782        variable: &str,
6783        predicate: Expr,
6784    ) -> LogicalPlan {
6785        match plan {
6786            LogicalPlan::Traverse {
6787                input,
6788                edge_type_ids,
6789                direction,
6790                source_variable,
6791                target_variable,
6792                target_label_id,
6793                step_variable,
6794                min_hops,
6795                max_hops,
6796                optional,
6797                target_filter,
6798                path_variable,
6799                edge_properties,
6800                is_variable_length,
6801                optional_pattern_vars,
6802                scope_match_variables,
6803                edge_filter_expr,
6804                path_mode,
6805                qpp_steps,
6806            } => {
6807                if target_variable == variable {
6808                    // Found the traverse producing this variable
6809                    let new_filter = match target_filter {
6810                        Some(existing) => Some(Expr::BinaryOp {
6811                            left: Box::new(existing),
6812                            op: BinaryOp::And,
6813                            right: Box::new(predicate),
6814                        }),
6815                        None => Some(predicate),
6816                    };
6817                    LogicalPlan::Traverse {
6818                        input,
6819                        edge_type_ids,
6820                        direction,
6821                        source_variable,
6822                        target_variable,
6823                        target_label_id,
6824                        step_variable,
6825                        min_hops,
6826                        max_hops,
6827                        optional,
6828                        target_filter: new_filter,
6829                        path_variable,
6830                        edge_properties,
6831                        is_variable_length,
6832                        optional_pattern_vars,
6833                        scope_match_variables,
6834                        edge_filter_expr,
6835                        path_mode,
6836                        qpp_steps,
6837                    }
6838                } else {
6839                    // Recurse into input
6840                    LogicalPlan::Traverse {
6841                        input: Box::new(Self::push_predicate_to_traverse(
6842                            *input, variable, predicate,
6843                        )),
6844                        edge_type_ids,
6845                        direction,
6846                        source_variable,
6847                        target_variable,
6848                        target_label_id,
6849                        step_variable,
6850                        min_hops,
6851                        max_hops,
6852                        optional,
6853                        target_filter,
6854                        path_variable,
6855                        edge_properties,
6856                        is_variable_length,
6857                        optional_pattern_vars,
6858                        scope_match_variables,
6859                        edge_filter_expr,
6860                        path_mode,
6861                        qpp_steps,
6862                    }
6863                }
6864            }
6865            LogicalPlan::Filter {
6866                input,
6867                predicate: p,
6868                optional_variables: opt_vars,
6869            } => LogicalPlan::Filter {
6870                input: Box::new(Self::push_predicate_to_traverse(
6871                    *input, variable, predicate,
6872                )),
6873                predicate: p,
6874                optional_variables: opt_vars,
6875            },
6876            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
6877                input: Box::new(Self::push_predicate_to_traverse(
6878                    *input, variable, predicate,
6879                )),
6880                projections,
6881            },
6882            LogicalPlan::CrossJoin { left, right } => {
6883                // Check which side has the variable
6884                if Self::is_traverse_target(&left, variable) {
6885                    LogicalPlan::CrossJoin {
6886                        left: Box::new(Self::push_predicate_to_traverse(
6887                            *left, variable, predicate,
6888                        )),
6889                        right,
6890                    }
6891                } else {
6892                    LogicalPlan::CrossJoin {
6893                        left,
6894                        right: Box::new(Self::push_predicate_to_traverse(
6895                            *right, variable, predicate,
6896                        )),
6897                    }
6898                }
6899            }
6900            other => other,
6901        }
6902    }
6903
6904    /// Plan a WITH clause, handling aggregations and projections.
6905    fn plan_with_clause(
6906        &self,
6907        with_clause: &WithClause,
6908        plan: LogicalPlan,
6909        vars_in_scope: &[VariableInfo],
6910    ) -> Result<(LogicalPlan, Vec<VariableInfo>)> {
6911        let mut plan = plan;
6912        let mut group_by: Vec<Expr> = Vec::new();
6913        let mut aggregates: Vec<Expr> = Vec::new();
6914        let mut compound_agg_exprs: Vec<Expr> = Vec::new();
6915        let mut has_agg = false;
6916        let mut projections = Vec::new();
6917        let mut new_vars: Vec<VariableInfo> = Vec::new();
6918        let mut projected_aggregate_reprs: HashSet<String> = HashSet::new();
6919        let mut projected_simple_reprs: HashSet<String> = HashSet::new();
6920        let mut projected_aliases: HashSet<String> = HashSet::new();
6921        let mut has_unaliased_non_variable_expr = false;
6922
6923        for item in &with_clause.items {
6924            match item {
6925                ReturnItem::All => {
6926                    // WITH * - add all variables in scope
6927                    for v in vars_in_scope {
6928                        projections.push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6929                        projected_aliases.insert(v.name.clone());
6930                        projected_simple_reprs.insert(v.name.clone());
6931                    }
6932                    new_vars.extend(vars_in_scope.iter().cloned());
6933                }
6934                ReturnItem::Expr { expr, alias, .. } => {
6935                    if matches!(expr, Expr::Wildcard) {
6936                        for v in vars_in_scope {
6937                            projections
6938                                .push((Expr::Variable(v.name.clone()), Some(v.name.clone())));
6939                            projected_aliases.insert(v.name.clone());
6940                            projected_simple_reprs.insert(v.name.clone());
6941                        }
6942                        new_vars.extend(vars_in_scope.iter().cloned());
6943                    } else {
6944                        // Validate expression variables and syntax
6945                        validate_expression_variables(expr, vars_in_scope)?;
6946                        validate_expression(expr, vars_in_scope)?;
6947                        // Pattern predicates are not allowed in WITH
6948                        if contains_pattern_predicate(expr) {
6949                            return Err(anyhow!(
6950                                "SyntaxError: UnexpectedSyntax - Pattern predicates are not allowed in WITH"
6951                            ));
6952                        }
6953
6954                        projections.push((expr.clone(), alias.clone()));
6955                        if expr.is_aggregate() && !is_compound_aggregate(expr) {
6956                            // Bare aggregate — push directly
6957                            has_agg = true;
6958                            aggregates.push(expr.clone());
6959                            projected_aggregate_reprs.insert(expr.to_string_repr());
6960                        } else if !is_window_function(expr)
6961                            && (expr.is_aggregate() || contains_aggregate_recursive(expr))
6962                        {
6963                            // Compound aggregate or expression containing aggregates
6964                            has_agg = true;
6965                            compound_agg_exprs.push(expr.clone());
6966                            for inner in extract_inner_aggregates(expr) {
6967                                let repr = inner.to_string_repr();
6968                                if !projected_aggregate_reprs.contains(&repr) {
6969                                    aggregates.push(inner);
6970                                    projected_aggregate_reprs.insert(repr);
6971                                }
6972                            }
6973                        } else if !group_by.contains(expr) {
6974                            group_by.push(expr.clone());
6975                            if matches!(expr, Expr::Variable(_) | Expr::Property(_, _)) {
6976                                projected_simple_reprs.insert(expr.to_string_repr());
6977                            }
6978                        }
6979
6980                        // Preserve non-scalar type information when WITH aliases
6981                        // entity/path-capable expressions.
6982                        if let Some(a) = alias {
6983                            if projected_aliases.contains(a) {
6984                                return Err(anyhow!(
6985                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
6986                                    a
6987                                ));
6988                            }
6989                            let inferred = infer_with_output_type(expr, vars_in_scope);
6990                            new_vars.push(VariableInfo::new(a.clone(), inferred));
6991                            projected_aliases.insert(a.clone());
6992                        } else if let Expr::Variable(v) = expr {
6993                            if projected_aliases.contains(v) {
6994                                return Err(anyhow!(
6995                                    "SyntaxError: ColumnNameConflict - Duplicate column name '{}' in WITH",
6996                                    v
6997                                ));
6998                            }
6999                            // Preserve the original type if the variable is just passed through
7000                            if let Some(existing) = find_var_in_scope(vars_in_scope, v) {
7001                                new_vars.push(existing.clone());
7002                            } else {
7003                                new_vars.push(VariableInfo::new(v.clone(), VariableType::Scalar));
7004                            }
7005                            projected_aliases.insert(v.clone());
7006                        } else {
7007                            has_unaliased_non_variable_expr = true;
7008                        }
7009                    }
7010                }
7011            }
7012        }
7013
7014        // Collect extra variables that need to survive the projection stage
7015        // for later WHERE / ORDER BY evaluation, then strip them afterwards.
7016        let projected_names: HashSet<&str> = new_vars.iter().map(|v| v.name.as_str()).collect();
7017        let mut passthrough_extras: Vec<String> = Vec::new();
7018        let mut seen_passthrough: HashSet<String> = HashSet::new();
7019
7020        if let Some(predicate) = &with_clause.where_clause {
7021            for name in collect_expr_variables(predicate) {
7022                if !projected_names.contains(name.as_str())
7023                    && find_var_in_scope(vars_in_scope, &name).is_some()
7024                    && seen_passthrough.insert(name.clone())
7025                {
7026                    passthrough_extras.push(name);
7027                }
7028            }
7029        }
7030
7031        // Non-aggregating WITH allows ORDER BY to reference incoming variables.
7032        // Carry those variables through the projection so Sort can resolve them.
7033        if !has_agg && let Some(order_by) = &with_clause.order_by {
7034            for item in order_by {
7035                for name in collect_expr_variables(&item.expr) {
7036                    if !projected_names.contains(name.as_str())
7037                        && find_var_in_scope(vars_in_scope, &name).is_some()
7038                        && seen_passthrough.insert(name.clone())
7039                    {
7040                        passthrough_extras.push(name);
7041                    }
7042                }
7043            }
7044        }
7045
7046        let needs_cleanup = !passthrough_extras.is_empty();
7047        for extra in &passthrough_extras {
7048            projections.push((Expr::Variable(extra.clone()), Some(extra.clone())));
7049        }
7050
7051        // Validate compound aggregate expressions: non-aggregate refs must be
7052        // individually present in the group_by as simple variables or properties.
7053        if has_agg {
7054            let group_by_reprs: HashSet<String> =
7055                group_by.iter().map(|e| e.to_string_repr()).collect();
7056            for expr in &compound_agg_exprs {
7057                let mut refs = Vec::new();
7058                collect_non_aggregate_refs(expr, false, &mut refs);
7059                for r in &refs {
7060                    let is_covered = match r {
7061                        NonAggregateRef::Var(v) => group_by_reprs.contains(v),
7062                        NonAggregateRef::Property { repr, .. } => group_by_reprs.contains(repr),
7063                    };
7064                    if !is_covered {
7065                        return Err(anyhow!(
7066                            "SyntaxError: AmbiguousAggregationExpression - Expression mixes aggregation with non-grouped reference"
7067                        ));
7068                    }
7069                }
7070            }
7071        }
7072
7073        if has_agg {
7074            plan = LogicalPlan::Aggregate {
7075                input: Box::new(plan),
7076                group_by,
7077                aggregates,
7078            };
7079
7080            // Insert a renaming Project so downstream clauses (WHERE, RETURN)
7081            // can reference the WITH aliases instead of raw column names.
7082            let rename_projections: Vec<(Expr, Option<String>)> = projections
7083                .iter()
7084                .map(|(expr, alias)| {
7085                    if expr.is_aggregate() && !is_compound_aggregate(expr) {
7086                        // Bare aggregate — reference by column name
7087                        (Expr::Variable(aggregate_column_name(expr)), alias.clone())
7088                    } else if is_compound_aggregate(expr)
7089                        || (!expr.is_aggregate() && contains_aggregate_recursive(expr))
7090                    {
7091                        // Compound aggregate — replace inner aggregates with
7092                        // column references, keep outer expression
7093                        (replace_aggregates_with_columns(expr), alias.clone())
7094                    } else {
7095                        (Expr::Variable(expr.to_string_repr()), alias.clone())
7096                    }
7097                })
7098                .collect();
7099            plan = LogicalPlan::Project {
7100                input: Box::new(plan),
7101                projections: rename_projections,
7102            };
7103        } else if !projections.is_empty() {
7104            plan = LogicalPlan::Project {
7105                input: Box::new(plan),
7106                projections: projections.clone(),
7107            };
7108        }
7109
7110        // Apply the WHERE filter (post-projection, with extras still visible).
7111        if let Some(predicate) = &with_clause.where_clause {
7112            plan = LogicalPlan::Filter {
7113                input: Box::new(plan),
7114                predicate: predicate.clone(),
7115                optional_variables: HashSet::new(),
7116            };
7117        }
7118
7119        // Validate and apply ORDER BY for WITH clause.
7120        // Keep pre-WITH vars in scope for parser compatibility, then apply
7121        // stricter checks for aggregate-containing ORDER BY items.
7122        if let Some(order_by) = &with_clause.order_by {
7123            // Build a mapping from aliases and projected expression reprs to
7124            // output columns of the preceding Project/Aggregate pipeline.
7125            let with_order_aliases: HashMap<String, Expr> = projections
7126                .iter()
7127                .flat_map(|(expr, alias)| {
7128                    let output_col = if let Some(a) = alias {
7129                        a.clone()
7130                    } else if expr.is_aggregate() && !is_compound_aggregate(expr) {
7131                        aggregate_column_name(expr)
7132                    } else {
7133                        expr.to_string_repr()
7134                    };
7135
7136                    let mut entries = Vec::new();
7137                    // ORDER BY alias
7138                    if let Some(a) = alias {
7139                        entries.push((a.clone(), Expr::Variable(output_col.clone())));
7140                    }
7141                    // ORDER BY projected expression (e.g. me.age)
7142                    entries.push((expr.to_string_repr(), Expr::Variable(output_col)));
7143                    entries
7144                })
7145                .collect();
7146
7147            let order_by_scope: Vec<VariableInfo> = {
7148                let mut scope = new_vars.clone();
7149                for v in vars_in_scope {
7150                    if !is_var_in_scope(&scope, &v.name) {
7151                        scope.push(v.clone());
7152                    }
7153                }
7154                scope
7155            };
7156            for item in order_by {
7157                validate_expression_variables(&item.expr, &order_by_scope)?;
7158                validate_expression(&item.expr, &order_by_scope)?;
7159                let has_aggregate_in_item = contains_aggregate_recursive(&item.expr);
7160                if has_aggregate_in_item && !has_agg {
7161                    return Err(anyhow!(
7162                        "SyntaxError: InvalidAggregation - Aggregation functions not allowed in ORDER BY of WITH"
7163                    ));
7164                }
7165                if has_agg && has_aggregate_in_item {
7166                    validate_with_order_by_aggregate_item(
7167                        &item.expr,
7168                        &projected_aggregate_reprs,
7169                        &projected_simple_reprs,
7170                        &projected_aliases,
7171                    )?;
7172                }
7173            }
7174            let rewritten_order_by: Vec<SortItem> = order_by
7175                .iter()
7176                .map(|item| {
7177                    let mut expr =
7178                        rewrite_order_by_expr_with_aliases(&item.expr, &with_order_aliases);
7179                    if has_agg {
7180                        // Rewrite any aggregate calls to the aggregate output
7181                        // columns produced by Aggregate.
7182                        expr = replace_aggregates_with_columns(&expr);
7183                        // Then re-map projected property expressions to aliases
7184                        // from the WITH projection.
7185                        expr = rewrite_order_by_expr_with_aliases(&expr, &with_order_aliases);
7186                    }
7187                    SortItem {
7188                        expr,
7189                        ascending: item.ascending,
7190                    }
7191                })
7192                .collect();
7193            plan = LogicalPlan::Sort {
7194                input: Box::new(plan),
7195                order_by: rewritten_order_by,
7196            };
7197        }
7198
7199        // Non-variable expressions in WITH must be aliased.
7200        // This check is intentionally placed after ORDER BY validation so
7201        // higher-priority semantic errors (e.g., ambiguous aggregation in
7202        // ORDER BY) can surface first.
7203        if has_unaliased_non_variable_expr {
7204            return Err(anyhow!(
7205                "SyntaxError: NoExpressionAlias - All non-variable expressions in WITH must be aliased"
7206            ));
7207        }
7208
7209        // Validate and apply SKIP/LIMIT for WITH clause
7210        let skip = with_clause
7211            .skip
7212            .as_ref()
7213            .map(|e| {
7214                self.note_folded_limit_skip(e);
7215                parse_non_negative_integer(e, "SKIP", &self.params)
7216            })
7217            .transpose()?
7218            .flatten();
7219        let fetch = with_clause
7220            .limit
7221            .as_ref()
7222            .map(|e| {
7223                self.note_folded_limit_skip(e);
7224                parse_non_negative_integer(e, "LIMIT", &self.params)
7225            })
7226            .transpose()?
7227            .flatten();
7228
7229        if skip.is_some() || fetch.is_some() {
7230            plan = LogicalPlan::Limit {
7231                input: Box::new(plan),
7232                skip,
7233                fetch,
7234            };
7235        }
7236
7237        // Strip passthrough columns that were only needed by WHERE / ORDER BY.
7238        if needs_cleanup {
7239            let cleanup_projections: Vec<(Expr, Option<String>)> = new_vars
7240                .iter()
7241                .map(|v| (Expr::Variable(v.name.clone()), Some(v.name.clone())))
7242                .collect();
7243            plan = LogicalPlan::Project {
7244                input: Box::new(plan),
7245                projections: cleanup_projections,
7246            };
7247        }
7248
7249        if with_clause.distinct {
7250            plan = LogicalPlan::Distinct {
7251                input: Box::new(plan),
7252            };
7253        }
7254
7255        Ok((plan, new_vars))
7256    }
7257
7258    fn plan_with_recursive(
7259        &self,
7260        with_recursive: &WithRecursiveClause,
7261        _prev_plan: LogicalPlan,
7262        vars_in_scope: &[VariableInfo],
7263    ) -> Result<LogicalPlan> {
7264        // WITH RECURSIVE requires a UNION query with anchor and recursive parts
7265        match &*with_recursive.query {
7266            Query::Union { left, right, .. } => {
7267                // Plan the anchor (initial) query with current scope
7268                let initial_plan = self.rewrite_and_plan_typed(*left.clone(), vars_in_scope)?;
7269
7270                // Plan the recursive query with the CTE name added to scope
7271                // so it can reference itself
7272                let mut recursive_scope = vars_in_scope.to_vec();
7273                recursive_scope.push(VariableInfo::new(
7274                    with_recursive.name.clone(),
7275                    VariableType::Scalar,
7276                ));
7277                let recursive_plan =
7278                    self.rewrite_and_plan_typed(*right.clone(), &recursive_scope)?;
7279
7280                Ok(LogicalPlan::RecursiveCTE {
7281                    cte_name: with_recursive.name.clone(),
7282                    initial: Box::new(initial_plan),
7283                    recursive: Box::new(recursive_plan),
7284                })
7285            }
7286            _ => Err(anyhow::anyhow!(
7287                "WITH RECURSIVE requires a UNION query with anchor and recursive parts"
7288            )),
7289        }
7290    }
7291
7292    pub fn properties_to_expr(&self, variable: &str, properties: &Option<Expr>) -> Option<Expr> {
7293        let entries = match properties {
7294            Some(Expr::Map(entries)) => entries,
7295            _ => return None,
7296        };
7297
7298        if entries.is_empty() {
7299            return None;
7300        }
7301        let mut final_expr = None;
7302        for (prop, val_expr) in entries {
7303            let eq_expr = Expr::BinaryOp {
7304                left: Box::new(Expr::Property(
7305                    Box::new(Expr::Variable(variable.to_string())),
7306                    prop.clone(),
7307                )),
7308                op: BinaryOp::Eq,
7309                right: Box::new(val_expr.clone()),
7310            };
7311
7312            if let Some(e) = final_expr {
7313                final_expr = Some(Expr::BinaryOp {
7314                    left: Box::new(e),
7315                    op: BinaryOp::And,
7316                    right: Box::new(eq_expr),
7317                });
7318            } else {
7319                final_expr = Some(eq_expr);
7320            }
7321        }
7322        final_expr
7323    }
7324
7325    /// Build a filter expression from node properties and labels.
7326    ///
7327    /// This is used for TraverseMainByType where we need to filter target nodes
7328    /// by both labels and properties. Label checks use hasLabel(variable, 'label').
7329    pub fn node_filter_expr(
7330        &self,
7331        variable: &str,
7332        labels: &[String],
7333        properties: &Option<Expr>,
7334    ) -> Option<Expr> {
7335        let mut final_expr = None;
7336
7337        // Add label checks using hasLabel(variable, 'label')
7338        for label in labels {
7339            let label_check = Expr::FunctionCall {
7340                name: "hasLabel".to_string(),
7341                args: vec![
7342                    Expr::Variable(variable.to_string()),
7343                    Expr::Literal(CypherLiteral::String(label.clone())),
7344                ],
7345                distinct: false,
7346                window_spec: None,
7347            };
7348
7349            final_expr = match final_expr {
7350                Some(e) => Some(Expr::BinaryOp {
7351                    left: Box::new(e),
7352                    op: BinaryOp::And,
7353                    right: Box::new(label_check),
7354                }),
7355                None => Some(label_check),
7356            };
7357        }
7358
7359        // Add property checks
7360        if let Some(prop_expr) = self.properties_to_expr(variable, properties) {
7361            final_expr = match final_expr {
7362                Some(e) => Some(Expr::BinaryOp {
7363                    left: Box::new(e),
7364                    op: BinaryOp::And,
7365                    right: Box::new(prop_expr),
7366                }),
7367                None => Some(prop_expr),
7368            };
7369        }
7370
7371        final_expr
7372    }
7373
7374    /// Create a filter plan that ensures traversed target matches a bound variable.
7375    ///
7376    /// Used in EXISTS subquery patterns where the target is already bound.
7377    /// Compares the target's VID against the bound variable's VID.
7378    fn wrap_with_bound_target_filter(plan: LogicalPlan, target_variable: &str) -> LogicalPlan {
7379        // Compare the traverse-discovered target's VID against the bound variable's VID.
7380        // Left side: Property access on the variable from current scope.
7381        // Right side: Variable column "{var}._vid" from traverse output (outer scope).
7382        // We use Variable("{var}._vid") to access the VID column from the traverse output,
7383        // not Property(Variable("{var}"), "_vid") because the column is already flattened.
7384        let bound_check = Expr::BinaryOp {
7385            left: Box::new(Expr::Property(
7386                Box::new(Expr::Variable(target_variable.to_string())),
7387                "_vid".to_string(),
7388            )),
7389            op: BinaryOp::Eq,
7390            right: Box::new(Expr::Variable(format!("{}._vid", target_variable))),
7391        };
7392        LogicalPlan::Filter {
7393            input: Box::new(plan),
7394            predicate: bound_check,
7395            optional_variables: HashSet::new(),
7396        }
7397    }
7398
7399    /// Replace a Scan node matching the variable with a VectorKnn node
7400    fn replace_scan_with_knn(
7401        plan: LogicalPlan,
7402        variable: &str,
7403        property: &str,
7404        query: Expr,
7405        threshold: Option<f32>,
7406    ) -> LogicalPlan {
7407        match plan {
7408            LogicalPlan::Scan {
7409                label_id,
7410                labels,
7411                variable: scan_var,
7412                filter,
7413                optional,
7414            } => {
7415                if scan_var == variable {
7416                    // Inject any existing scan filter into VectorKnn?
7417                    // VectorKnn doesn't support pre-filtering natively in logical plan yet (except threshold).
7418                    // Typically filter is applied post-Knn or during Knn if supported.
7419                    // For now, we assume filter is residual or handled by `extract_vector_similarity` which separates residual.
7420                    // If `filter` is present on Scan, it must be preserved.
7421                    // We can wrap VectorKnn in Filter if Scan had filter.
7422
7423                    let knn = LogicalPlan::VectorKnn {
7424                        label_id,
7425                        variable: variable.to_string(),
7426                        property: property.to_string(),
7427                        query,
7428                        k: 100, // Default K, should push down LIMIT
7429                        threshold,
7430                    };
7431
7432                    if let Some(f) = filter {
7433                        LogicalPlan::Filter {
7434                            input: Box::new(knn),
7435                            predicate: f,
7436                            optional_variables: HashSet::new(),
7437                        }
7438                    } else {
7439                        knn
7440                    }
7441                } else {
7442                    LogicalPlan::Scan {
7443                        label_id,
7444                        labels,
7445                        variable: scan_var,
7446                        filter,
7447                        optional,
7448                    }
7449                }
7450            }
7451            LogicalPlan::Filter {
7452                input,
7453                predicate,
7454                optional_variables,
7455            } => LogicalPlan::Filter {
7456                input: Box::new(Self::replace_scan_with_knn(
7457                    *input, variable, property, query, threshold,
7458                )),
7459                predicate,
7460                optional_variables,
7461            },
7462            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7463                input: Box::new(Self::replace_scan_with_knn(
7464                    *input, variable, property, query, threshold,
7465                )),
7466                projections,
7467            },
7468            LogicalPlan::Limit { input, skip, fetch } => {
7469                // If we encounter Limit, we should ideally push K down to VectorKnn
7470                // But replace_scan_with_knn is called from plan_where_clause which is inside plan_match.
7471                // Limit comes later.
7472                // To support Limit pushdown, we need a separate optimizer pass or do it in plan_single.
7473                LogicalPlan::Limit {
7474                    input: Box::new(Self::replace_scan_with_knn(
7475                        *input, variable, property, query, threshold,
7476                    )),
7477                    skip,
7478                    fetch,
7479                }
7480            }
7481            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7482                left: Box::new(Self::replace_scan_with_knn(
7483                    *left,
7484                    variable,
7485                    property,
7486                    query.clone(),
7487                    threshold,
7488                )),
7489                right: Box::new(Self::replace_scan_with_knn(
7490                    *right, variable, property, query, threshold,
7491                )),
7492            },
7493            other => other,
7494        }
7495    }
7496
7497    /// Find the label_id for a Scan node matching the given variable
7498    fn find_scan_label_id(plan: &LogicalPlan, variable: &str) -> Option<u16> {
7499        match plan {
7500            LogicalPlan::Scan {
7501                label_id,
7502                variable: var,
7503                ..
7504            } if var == variable => Some(*label_id),
7505            LogicalPlan::ScanAll { variable: var, .. } if var == variable => Some(0),
7506            LogicalPlan::Filter { input, .. }
7507            | LogicalPlan::Project { input, .. }
7508            | LogicalPlan::Sort { input, .. }
7509            | LogicalPlan::Limit { input, .. }
7510            | LogicalPlan::Aggregate { input, .. }
7511            | LogicalPlan::Apply { input, .. } => Self::find_scan_label_id(input, variable),
7512            LogicalPlan::CrossJoin { left, right } => Self::find_scan_label_id(left, variable)
7513                .or_else(|| Self::find_scan_label_id(right, variable)),
7514            LogicalPlan::Traverse { input, .. } => Self::find_scan_label_id(input, variable),
7515            _ => None,
7516        }
7517    }
7518
7519    /// Push a predicate into a Scan's filter for the specified variable
7520    fn push_predicate_to_scan(plan: LogicalPlan, variable: &str, predicate: Expr) -> LogicalPlan {
7521        match plan {
7522            LogicalPlan::Scan {
7523                label_id,
7524                labels,
7525                variable: var,
7526                filter,
7527                optional,
7528            } if var == variable => {
7529                // Merge the predicate with existing filter
7530                let new_filter = match filter {
7531                    Some(existing) => Some(Expr::BinaryOp {
7532                        left: Box::new(existing),
7533                        op: BinaryOp::And,
7534                        right: Box::new(predicate),
7535                    }),
7536                    None => Some(predicate),
7537                };
7538                LogicalPlan::Scan {
7539                    label_id,
7540                    labels,
7541                    variable: var,
7542                    filter: new_filter,
7543                    optional,
7544                }
7545            }
7546            LogicalPlan::ScanAll {
7547                variable: var,
7548                filter,
7549                optional,
7550            } if var == variable => {
7551                let new_filter = match filter {
7552                    Some(existing) => Some(Expr::BinaryOp {
7553                        left: Box::new(existing),
7554                        op: BinaryOp::And,
7555                        right: Box::new(predicate),
7556                    }),
7557                    None => Some(predicate),
7558                };
7559                LogicalPlan::ScanAll {
7560                    variable: var,
7561                    filter: new_filter,
7562                    optional,
7563                }
7564            }
7565            LogicalPlan::Filter {
7566                input,
7567                predicate: p,
7568                optional_variables: opt_vars,
7569            } => LogicalPlan::Filter {
7570                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7571                predicate: p,
7572                optional_variables: opt_vars,
7573            },
7574            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7575                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7576                projections,
7577            },
7578            LogicalPlan::CrossJoin { left, right } => {
7579                // Check which side has the variable
7580                if Self::find_scan_label_id(&left, variable).is_some() {
7581                    LogicalPlan::CrossJoin {
7582                        left: Box::new(Self::push_predicate_to_scan(*left, variable, predicate)),
7583                        right,
7584                    }
7585                } else {
7586                    LogicalPlan::CrossJoin {
7587                        left,
7588                        right: Box::new(Self::push_predicate_to_scan(*right, variable, predicate)),
7589                    }
7590                }
7591            }
7592            LogicalPlan::Traverse {
7593                input,
7594                edge_type_ids,
7595                direction,
7596                source_variable,
7597                target_variable,
7598                target_label_id,
7599                step_variable,
7600                min_hops,
7601                max_hops,
7602                optional,
7603                target_filter,
7604                path_variable,
7605                edge_properties,
7606                is_variable_length,
7607                optional_pattern_vars,
7608                scope_match_variables,
7609                edge_filter_expr,
7610                path_mode,
7611                qpp_steps,
7612            } => LogicalPlan::Traverse {
7613                input: Box::new(Self::push_predicate_to_scan(*input, variable, predicate)),
7614                edge_type_ids,
7615                direction,
7616                source_variable,
7617                target_variable,
7618                target_label_id,
7619                step_variable,
7620                min_hops,
7621                max_hops,
7622                optional,
7623                target_filter,
7624                path_variable,
7625                edge_properties,
7626                is_variable_length,
7627                optional_pattern_vars,
7628                scope_match_variables,
7629                edge_filter_expr,
7630                path_mode,
7631                qpp_steps,
7632            },
7633            other => other,
7634        }
7635    }
7636
7637    /// Extract predicates that reference only the specified variable
7638    fn extract_variable_predicates(predicate: &Expr, variable: &str) -> (Vec<Expr>, Option<Expr>) {
7639        let analyzer = PredicateAnalyzer::new();
7640        let analysis = analyzer.analyze(predicate, variable);
7641
7642        // Return pushable predicates and combined residual
7643        let residual = if analysis.residual.is_empty() {
7644            None
7645        } else {
7646            let mut iter = analysis.residual.into_iter();
7647            let first = iter.next().unwrap();
7648            Some(iter.fold(first, |acc, e| Expr::BinaryOp {
7649                left: Box::new(acc),
7650                op: BinaryOp::And,
7651                right: Box::new(e),
7652            }))
7653        };
7654
7655        (analysis.pushable, residual)
7656    }
7657
7658    // =====================================================================
7659    // Apply Predicate Pushdown - Helper Functions
7660    // =====================================================================
7661
7662    /// Split AND-connected predicates into a list.
7663    fn split_and_conjuncts(expr: &Expr) -> Vec<Expr> {
7664        match expr {
7665            Expr::BinaryOp {
7666                left,
7667                op: BinaryOp::And,
7668                right,
7669            } => {
7670                let mut result = Self::split_and_conjuncts(left);
7671                result.extend(Self::split_and_conjuncts(right));
7672                result
7673            }
7674            _ => vec![expr.clone()],
7675        }
7676    }
7677
7678    /// Combine predicates with AND.
7679    fn combine_predicates(predicates: Vec<Expr>) -> Option<Expr> {
7680        if predicates.is_empty() {
7681            return None;
7682        }
7683        let mut result = predicates[0].clone();
7684        for pred in predicates.iter().skip(1) {
7685            result = Expr::BinaryOp {
7686                left: Box::new(result),
7687                op: BinaryOp::And,
7688                right: Box::new(pred.clone()),
7689            };
7690        }
7691        Some(result)
7692    }
7693
7694    /// Collect all variable names referenced in an expression.
7695    fn collect_expr_variables(expr: &Expr) -> HashSet<String> {
7696        let mut vars = HashSet::new();
7697        Self::collect_expr_variables_impl(expr, &mut vars);
7698        vars
7699    }
7700
7701    fn collect_expr_variables_impl(expr: &Expr, vars: &mut HashSet<String>) {
7702        match expr {
7703            Expr::Variable(name) => {
7704                vars.insert(name.clone());
7705            }
7706            Expr::Property(inner, _) => {
7707                if let Expr::Variable(name) = inner.as_ref() {
7708                    vars.insert(name.clone());
7709                } else {
7710                    Self::collect_expr_variables_impl(inner, vars);
7711                }
7712            }
7713            Expr::BinaryOp { left, right, .. } => {
7714                Self::collect_expr_variables_impl(left, vars);
7715                Self::collect_expr_variables_impl(right, vars);
7716            }
7717            Expr::UnaryOp { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7718            Expr::IsNull(e) | Expr::IsNotNull(e) => Self::collect_expr_variables_impl(e, vars),
7719            Expr::FunctionCall { args, .. } => {
7720                for arg in args {
7721                    Self::collect_expr_variables_impl(arg, vars);
7722                }
7723            }
7724            Expr::List(items) => {
7725                for item in items {
7726                    Self::collect_expr_variables_impl(item, vars);
7727                }
7728            }
7729            Expr::Case {
7730                expr,
7731                when_then,
7732                else_expr,
7733            } => {
7734                if let Some(e) = expr {
7735                    Self::collect_expr_variables_impl(e, vars);
7736                }
7737                for (w, t) in when_then {
7738                    Self::collect_expr_variables_impl(w, vars);
7739                    Self::collect_expr_variables_impl(t, vars);
7740                }
7741                if let Some(e) = else_expr {
7742                    Self::collect_expr_variables_impl(e, vars);
7743                }
7744            }
7745            Expr::LabelCheck { expr, .. } => Self::collect_expr_variables_impl(expr, vars),
7746            // Skip Quantifier/Reduce/ListComprehension/PatternComprehension —
7747            // they introduce local variable bindings not in outer scope.
7748            _ => {}
7749        }
7750    }
7751
7752    /// Collect all variables produced by a logical plan.
7753    fn collect_plan_variables(plan: &LogicalPlan) -> HashSet<String> {
7754        let mut vars = HashSet::new();
7755        Self::collect_plan_variables_impl(plan, &mut vars);
7756        vars
7757    }
7758
7759    fn collect_plan_variables_impl(plan: &LogicalPlan, vars: &mut HashSet<String>) {
7760        match plan {
7761            LogicalPlan::Scan { variable, .. } => {
7762                vars.insert(variable.clone());
7763            }
7764            LogicalPlan::Traverse {
7765                target_variable,
7766                step_variable,
7767                input,
7768                path_variable,
7769                ..
7770            } => {
7771                vars.insert(target_variable.clone());
7772                if let Some(sv) = step_variable {
7773                    vars.insert(sv.clone());
7774                }
7775                if let Some(pv) = path_variable {
7776                    vars.insert(pv.clone());
7777                }
7778                Self::collect_plan_variables_impl(input, vars);
7779            }
7780            LogicalPlan::Filter { input, .. } => Self::collect_plan_variables_impl(input, vars),
7781            LogicalPlan::Project { input, projections } => {
7782                for (expr, alias) in projections {
7783                    if let Some(a) = alias {
7784                        vars.insert(a.clone());
7785                    } else if let Expr::Variable(v) = expr {
7786                        vars.insert(v.clone());
7787                    }
7788                }
7789                Self::collect_plan_variables_impl(input, vars);
7790            }
7791            LogicalPlan::Apply {
7792                input, subquery, ..
7793            } => {
7794                Self::collect_plan_variables_impl(input, vars);
7795                Self::collect_plan_variables_impl(subquery, vars);
7796            }
7797            LogicalPlan::CrossJoin { left, right } => {
7798                Self::collect_plan_variables_impl(left, vars);
7799                Self::collect_plan_variables_impl(right, vars);
7800            }
7801            LogicalPlan::Unwind {
7802                input, variable, ..
7803            } => {
7804                vars.insert(variable.clone());
7805                Self::collect_plan_variables_impl(input, vars);
7806            }
7807            LogicalPlan::Aggregate { input, .. } => {
7808                Self::collect_plan_variables_impl(input, vars);
7809            }
7810            LogicalPlan::Distinct { input } => {
7811                Self::collect_plan_variables_impl(input, vars);
7812            }
7813            LogicalPlan::Sort { input, .. } => {
7814                Self::collect_plan_variables_impl(input, vars);
7815            }
7816            LogicalPlan::Limit { input, .. } => {
7817                Self::collect_plan_variables_impl(input, vars);
7818            }
7819            LogicalPlan::VectorKnn { variable, .. } => {
7820                vars.insert(variable.clone());
7821            }
7822            LogicalPlan::ProcedureCall { yield_items, .. } => {
7823                for (name, alias) in yield_items {
7824                    vars.insert(alias.clone().unwrap_or_else(|| name.clone()));
7825                }
7826            }
7827            LogicalPlan::ShortestPath {
7828                input,
7829                path_variable,
7830                ..
7831            } => {
7832                vars.insert(path_variable.clone());
7833                Self::collect_plan_variables_impl(input, vars);
7834            }
7835            LogicalPlan::AllShortestPaths {
7836                input,
7837                path_variable,
7838                ..
7839            } => {
7840                vars.insert(path_variable.clone());
7841                Self::collect_plan_variables_impl(input, vars);
7842            }
7843            LogicalPlan::RecursiveCTE {
7844                initial, recursive, ..
7845            } => {
7846                Self::collect_plan_variables_impl(initial, vars);
7847                Self::collect_plan_variables_impl(recursive, vars);
7848            }
7849            LogicalPlan::SubqueryCall {
7850                input, subquery, ..
7851            } => {
7852                Self::collect_plan_variables_impl(input, vars);
7853                Self::collect_plan_variables_impl(subquery, vars);
7854            }
7855            _ => {}
7856        }
7857    }
7858
7859    /// Extract predicates that only reference variables from Apply's input.
7860    /// Returns (input_only_predicates, remaining_predicates).
7861    fn extract_apply_input_predicates(
7862        predicate: &Expr,
7863        input_variables: &HashSet<String>,
7864        subquery_new_variables: &HashSet<String>,
7865    ) -> (Vec<Expr>, Vec<Expr>) {
7866        let conjuncts = Self::split_and_conjuncts(predicate);
7867        let mut input_preds = Vec::new();
7868        let mut remaining = Vec::new();
7869
7870        for conj in conjuncts {
7871            let vars = Self::collect_expr_variables(&conj);
7872
7873            // Predicate only references input variables (none from subquery)
7874            let refs_input_only = vars.iter().all(|v| input_variables.contains(v));
7875            let refs_any_subquery = vars.iter().any(|v| subquery_new_variables.contains(v));
7876
7877            if refs_input_only && !refs_any_subquery && !vars.is_empty() {
7878                input_preds.push(conj);
7879            } else {
7880                remaining.push(conj);
7881            }
7882        }
7883
7884        (input_preds, remaining)
7885    }
7886
7887    /// Push eligible predicates into Apply.input_filter.
7888    /// This filters input rows BEFORE executing the correlated subquery.
7889    fn push_predicates_to_apply(plan: LogicalPlan, current_predicate: &mut Expr) -> LogicalPlan {
7890        match plan {
7891            LogicalPlan::Apply {
7892                input,
7893                subquery,
7894                input_filter,
7895            } => {
7896                // Collect variables from input plan
7897                let input_vars = Self::collect_plan_variables(&input);
7898
7899                // Collect NEW variables introduced by subquery (not in input)
7900                let subquery_vars = Self::collect_plan_variables(&subquery);
7901                let new_subquery_vars: HashSet<String> =
7902                    subquery_vars.difference(&input_vars).cloned().collect();
7903
7904                // Extract predicates that only reference input variables
7905                let (input_preds, remaining) = Self::extract_apply_input_predicates(
7906                    current_predicate,
7907                    &input_vars,
7908                    &new_subquery_vars,
7909                );
7910
7911                // Update current_predicate to only remaining predicates
7912                *current_predicate = if remaining.is_empty() {
7913                    Expr::TRUE
7914                } else {
7915                    Self::combine_predicates(remaining).unwrap()
7916                };
7917
7918                // Combine extracted predicates with existing input_filter
7919                let new_input_filter = if input_preds.is_empty() {
7920                    input_filter
7921                } else {
7922                    let extracted = Self::combine_predicates(input_preds).unwrap();
7923                    match input_filter {
7924                        Some(existing) => Some(Expr::BinaryOp {
7925                            left: Box::new(existing),
7926                            op: BinaryOp::And,
7927                            right: Box::new(extracted),
7928                        }),
7929                        None => Some(extracted),
7930                    }
7931                };
7932
7933                // Recurse into input plan
7934                let new_input = Self::push_predicates_to_apply(*input, current_predicate);
7935
7936                LogicalPlan::Apply {
7937                    input: Box::new(new_input),
7938                    subquery,
7939                    input_filter: new_input_filter,
7940                }
7941            }
7942            // Recurse into other plan nodes
7943            LogicalPlan::Filter {
7944                input,
7945                predicate,
7946                optional_variables,
7947            } => LogicalPlan::Filter {
7948                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7949                predicate,
7950                optional_variables,
7951            },
7952            LogicalPlan::Project { input, projections } => LogicalPlan::Project {
7953                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7954                projections,
7955            },
7956            LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
7957                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7958                order_by,
7959            },
7960            LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
7961                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7962                skip,
7963                fetch,
7964            },
7965            LogicalPlan::Aggregate {
7966                input,
7967                group_by,
7968                aggregates,
7969            } => LogicalPlan::Aggregate {
7970                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
7971                group_by,
7972                aggregates,
7973            },
7974            LogicalPlan::CrossJoin { left, right } => LogicalPlan::CrossJoin {
7975                left: Box::new(Self::push_predicates_to_apply(*left, current_predicate)),
7976                right: Box::new(Self::push_predicates_to_apply(*right, current_predicate)),
7977            },
7978            LogicalPlan::Traverse {
7979                input,
7980                edge_type_ids,
7981                direction,
7982                source_variable,
7983                target_variable,
7984                target_label_id,
7985                step_variable,
7986                min_hops,
7987                max_hops,
7988                optional,
7989                target_filter,
7990                path_variable,
7991                edge_properties,
7992                is_variable_length,
7993                optional_pattern_vars,
7994                scope_match_variables,
7995                edge_filter_expr,
7996                path_mode,
7997                qpp_steps,
7998            } => LogicalPlan::Traverse {
7999                input: Box::new(Self::push_predicates_to_apply(*input, current_predicate)),
8000                edge_type_ids,
8001                direction,
8002                source_variable,
8003                target_variable,
8004                target_label_id,
8005                step_variable,
8006                min_hops,
8007                max_hops,
8008                optional,
8009                target_filter,
8010                path_variable,
8011                edge_properties,
8012                is_variable_length,
8013                optional_pattern_vars,
8014                scope_match_variables,
8015                edge_filter_expr,
8016                path_mode,
8017                qpp_steps,
8018            },
8019            other => other,
8020        }
8021    }
8022}
8023
8024/// Get the expected column name for an aggregate expression.
8025///
8026/// This is the single source of truth for aggregate column naming, used by:
8027/// - Logical planner (to create column references)
8028/// - Physical planner (to rename DataFusion's auto-generated column names)
8029/// - Fallback executor (to name result columns)
8030pub fn aggregate_column_name(expr: &Expr) -> String {
8031    expr.to_string_repr()
8032}
8033
8034/// Output produced by `EXPLAIN` — a human-readable plan with index and cost info.
8035#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8036pub struct ExplainOutput {
8037    /// Debug-formatted logical plan tree.
8038    pub plan_text: String,
8039    /// Index availability report for each scan in the plan.
8040    pub index_usage: Vec<IndexUsage>,
8041    /// Rough row and cost estimates for the full plan.
8042    pub cost_estimates: CostEstimates,
8043    /// Planner warnings (e.g., missing index, forced full scan).
8044    pub warnings: Vec<String>,
8045    /// Suggested indexes that would improve this query.
8046    pub suggestions: Vec<IndexSuggestion>,
8047}
8048
8049/// Suggestion for creating an index to improve query performance.
8050#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8051pub struct IndexSuggestion {
8052    /// Label or edge type that would benefit from the index.
8053    pub label_or_type: String,
8054    /// Property to index.
8055    pub property: String,
8056    /// Recommended index type (e.g., `"SCALAR"`, `"VECTOR"`).
8057    pub index_type: String,
8058    /// Human-readable explanation of the performance benefit.
8059    pub reason: String,
8060    /// Ready-to-execute Cypher statement to create the index.
8061    pub create_statement: String,
8062}
8063
8064/// Index availability report for a single scan operator.
8065#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8066pub struct IndexUsage {
8067    pub label_or_type: String,
8068    pub property: String,
8069    pub index_type: String,
8070    /// Whether the index was actually used for this scan.
8071    pub used: bool,
8072    /// Human-readable explanation of why the index was or was not used.
8073    pub reason: Option<String>,
8074}
8075
8076/// Rough cost and row count estimates for a complete logical plan.
8077#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
8078pub struct CostEstimates {
8079    /// Estimated number of rows the plan will produce.
8080    pub estimated_rows: f64,
8081    /// Abstract cost units (lower is cheaper).
8082    pub estimated_cost: f64,
8083}
8084
8085impl QueryPlanner {
8086    /// Plan a query and produce an EXPLAIN report (plan text, index usage, costs).
8087    pub fn explain_plan(&self, ast: Query) -> Result<ExplainOutput> {
8088        let plan = self.plan(ast)?;
8089        self.explain_logical_plan(&plan)
8090    }
8091
8092    /// Produce an EXPLAIN report for an already-planned logical plan.
8093    pub fn explain_logical_plan(&self, plan: &LogicalPlan) -> Result<ExplainOutput> {
8094        let index_usage = self.analyze_index_usage(plan)?;
8095        let cost_estimates = self.estimate_costs(plan)?;
8096        let suggestions = self.collect_index_suggestions(plan);
8097        let warnings = Vec::new();
8098        let plan_text = format!("{:#?}", plan);
8099
8100        Ok(ExplainOutput {
8101            plan_text,
8102            index_usage,
8103            cost_estimates,
8104            warnings,
8105            suggestions,
8106        })
8107    }
8108
8109    fn analyze_index_usage(&self, plan: &LogicalPlan) -> Result<Vec<IndexUsage>> {
8110        let mut usage = Vec::new();
8111        self.collect_index_usage(plan, &mut usage);
8112        Ok(usage)
8113    }
8114
8115    fn collect_index_usage(&self, plan: &LogicalPlan, usage: &mut Vec<IndexUsage>) {
8116        match plan {
8117            LogicalPlan::Scan {
8118                label_id,
8119                filter: Some(filter),
8120                ..
8121            } => {
8122                // Detect indexed-property pushdown — issue #57. Run the same
8123                // analyzer the physical planner uses; if it reports a
8124                // hash-index hit, surface it in EXPLAIN.
8125                if let Some(label_name) = self.schema.label_name_by_id(*label_id) {
8126                    let analyzer = crate::query::pushdown::IndexAwareAnalyzer::new(&self.schema);
8127                    // The variable name is the scan's binding variable; we
8128                    // reach for it via the Scan node directly.
8129                    if let LogicalPlan::Scan { variable, .. } = plan {
8130                        let strategy = analyzer.analyze(filter, variable, *label_id);
8131                        for prop in strategy.hash_index_columns {
8132                            usage.push(IndexUsage {
8133                                label_or_type: label_name.to_string(),
8134                                property: prop,
8135                                index_type: "HASH".to_string(),
8136                                used: true,
8137                                reason: Some(
8138                                    "Hash index point lookup pushed into Lance scan".to_string(),
8139                                ),
8140                            });
8141                        }
8142                    }
8143                }
8144            }
8145            LogicalPlan::Scan { .. } => {}
8146            LogicalPlan::VectorKnn {
8147                label_id, property, ..
8148            } => {
8149                let label_name = self.schema.label_name_by_id(*label_id).unwrap_or("?");
8150                usage.push(IndexUsage {
8151                    label_or_type: label_name.to_string(),
8152                    property: property.clone(),
8153                    index_type: "VECTOR".to_string(),
8154                    used: true,
8155                    reason: None,
8156                });
8157            }
8158            LogicalPlan::Explain { plan } => self.collect_index_usage(plan, usage),
8159            LogicalPlan::Filter { input, .. } => self.collect_index_usage(input, usage),
8160            LogicalPlan::Project { input, .. } => self.collect_index_usage(input, usage),
8161            LogicalPlan::Limit { input, .. } => self.collect_index_usage(input, usage),
8162            LogicalPlan::Sort { input, .. } => self.collect_index_usage(input, usage),
8163            LogicalPlan::Aggregate { input, .. } => self.collect_index_usage(input, usage),
8164            LogicalPlan::Traverse { input, .. } => self.collect_index_usage(input, usage),
8165            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8166                self.collect_index_usage(left, usage);
8167                self.collect_index_usage(right, usage);
8168            }
8169            _ => {}
8170        }
8171    }
8172
8173    fn estimate_costs(&self, _plan: &LogicalPlan) -> Result<CostEstimates> {
8174        Ok(CostEstimates {
8175            estimated_rows: 100.0,
8176            estimated_cost: 10.0,
8177        })
8178    }
8179
8180    /// Collect index suggestions based on query patterns.
8181    ///
8182    /// Currently detects:
8183    /// - Temporal predicates from `uni.validAt()` function calls
8184    /// - Temporal predicates from `VALID_AT` macro expansion
8185    fn collect_index_suggestions(&self, plan: &LogicalPlan) -> Vec<IndexSuggestion> {
8186        let mut suggestions = Vec::new();
8187        self.collect_temporal_suggestions(plan, &mut suggestions);
8188        suggestions
8189    }
8190
8191    /// Recursively collect temporal index suggestions from the plan.
8192    fn collect_temporal_suggestions(
8193        &self,
8194        plan: &LogicalPlan,
8195        suggestions: &mut Vec<IndexSuggestion>,
8196    ) {
8197        match plan {
8198            LogicalPlan::Filter {
8199                input, predicate, ..
8200            } => {
8201                // Check for temporal patterns in the predicate
8202                self.detect_temporal_pattern(predicate, suggestions);
8203                // Recurse into input
8204                self.collect_temporal_suggestions(input, suggestions);
8205            }
8206            LogicalPlan::Explain { plan } => self.collect_temporal_suggestions(plan, suggestions),
8207            LogicalPlan::Project { input, .. } => {
8208                self.collect_temporal_suggestions(input, suggestions)
8209            }
8210            LogicalPlan::Limit { input, .. } => {
8211                self.collect_temporal_suggestions(input, suggestions)
8212            }
8213            LogicalPlan::Sort { input, .. } => {
8214                self.collect_temporal_suggestions(input, suggestions)
8215            }
8216            LogicalPlan::Aggregate { input, .. } => {
8217                self.collect_temporal_suggestions(input, suggestions)
8218            }
8219            LogicalPlan::Traverse { input, .. } => {
8220                self.collect_temporal_suggestions(input, suggestions)
8221            }
8222            LogicalPlan::Union { left, right, .. } | LogicalPlan::CrossJoin { left, right } => {
8223                self.collect_temporal_suggestions(left, suggestions);
8224                self.collect_temporal_suggestions(right, suggestions);
8225            }
8226            _ => {}
8227        }
8228    }
8229
8230    /// Detect temporal predicate patterns and suggest indexes.
8231    ///
8232    /// Detects two patterns:
8233    /// 1. `uni.validAt(node, 'start_prop', 'end_prop', time)` function call
8234    /// 2. `node.valid_from <= time AND (node.valid_to IS NULL OR node.valid_to > time)` from VALID_AT macro
8235    fn detect_temporal_pattern(&self, expr: &Expr, suggestions: &mut Vec<IndexSuggestion>) {
8236        match expr {
8237            // Pattern 1: uni.temporal.validAt() function call
8238            Expr::FunctionCall { name, args, .. }
8239                if (name.eq_ignore_ascii_case("uni.temporal.validAt")
8240                    || name.eq_ignore_ascii_case("validAt"))
8241                    && args.len() >= 2 =>
8242            {
8243                // args[0] = node, args[1] = start_prop, args[2] = end_prop, args[3] = time
8244                let start_prop = if let Some(Expr::Literal(CypherLiteral::String(s))) = args.get(1)
8245                {
8246                    s.clone()
8247                } else {
8248                    "valid_from".to_string()
8249                };
8250
8251                // Try to extract label from the node expression
8252                if let Some(var) = args.first().and_then(|e| e.extract_variable()) {
8253                    self.suggest_temporal_index(&var, &start_prop, suggestions);
8254                }
8255            }
8256
8257            // Pattern 2: VALID_AT macro expansion - look for property <= time pattern
8258            Expr::BinaryOp {
8259                left,
8260                op: BinaryOp::And,
8261                right,
8262            } => {
8263                // Check left side for `prop <= time` pattern (temporal start condition)
8264                if let Expr::BinaryOp {
8265                    left: prop_expr,
8266                    op: BinaryOp::LtEq,
8267                    ..
8268                } = left.as_ref()
8269                    && let Expr::Property(base, prop_name) = prop_expr.as_ref()
8270                    && (prop_name == "valid_from"
8271                        || prop_name.contains("start")
8272                        || prop_name.contains("from")
8273                        || prop_name.contains("begin"))
8274                    && let Some(var) = base.extract_variable()
8275                {
8276                    self.suggest_temporal_index(&var, prop_name, suggestions);
8277                }
8278
8279                // Recurse into both sides of AND
8280                self.detect_temporal_pattern(left.as_ref(), suggestions);
8281                self.detect_temporal_pattern(right.as_ref(), suggestions);
8282            }
8283
8284            // Recurse into other binary ops
8285            Expr::BinaryOp { left, right, .. } => {
8286                self.detect_temporal_pattern(left.as_ref(), suggestions);
8287                self.detect_temporal_pattern(right.as_ref(), suggestions);
8288            }
8289
8290            _ => {}
8291        }
8292    }
8293
8294    /// Suggest a scalar index for a temporal property if one doesn't already exist.
8295    fn suggest_temporal_index(
8296        &self,
8297        _variable: &str,
8298        property: &str,
8299        suggestions: &mut Vec<IndexSuggestion>,
8300    ) {
8301        // Check if a scalar index already exists for this property
8302        // We need to check all labels since we may not know the exact label from the variable
8303        let mut has_index = false;
8304
8305        for index in &self.schema.indexes {
8306            if let IndexDefinition::Scalar(config) = index
8307                && config.properties.contains(&property.to_string())
8308            {
8309                has_index = true;
8310                break;
8311            }
8312        }
8313
8314        if !has_index {
8315            // Avoid duplicate suggestions
8316            let already_suggested = suggestions.iter().any(|s| s.property == property);
8317            if !already_suggested {
8318                suggestions.push(IndexSuggestion {
8319                    label_or_type: "(detected from temporal query)".to_string(),
8320                    property: property.to_string(),
8321                    index_type: "SCALAR (BTree)".to_string(),
8322                    reason: format!(
8323                        "Temporal queries using '{}' can benefit from a scalar index for range scans",
8324                        property
8325                    ),
8326                    create_statement: format!(
8327                        "CREATE INDEX idx_{} FOR (n:YourLabel) ON (n.{})",
8328                        property, property
8329                    ),
8330                });
8331            }
8332        }
8333    }
8334
8335    /// Helper functions for expression normalization
8336    /// Normalize an expression for storage: strip variable prefixes
8337    /// For simple property: u.email -> "email"
8338    /// For expressions: lower(u.email) -> "lower(email)"
8339    fn normalize_expression_for_storage(expr: &Expr) -> String {
8340        match expr {
8341            Expr::Property(base, prop) if matches!(**base, Expr::Variable(_)) => prop.clone(),
8342            _ => {
8343                // Serialize expression and strip variable prefix
8344                let expr_str = expr.to_string_repr();
8345                Self::strip_variable_prefix(&expr_str)
8346            }
8347        }
8348    }
8349
8350    /// Strip variable references like "u.prop" from expression strings
8351    /// Converts "lower(u.email)" to "lower(email)"
8352    fn strip_variable_prefix(expr_str: &str) -> String {
8353        use regex::Regex;
8354        // Match patterns like "word.property" and replace with just "property"
8355        let re = Regex::new(r"\b\w+\.(\w+)").unwrap();
8356        re.replace_all(expr_str, "$1").to_string()
8357    }
8358
8359    /// Plan a schema command from the new AST
8360    fn plan_schema_command(&self, cmd: SchemaCommand) -> Result<LogicalPlan> {
8361        match cmd {
8362            SchemaCommand::CreateVectorIndex(c) => {
8363                // Parse index type from options (default: IvfPq)
8364                let opt = |key: &str| {
8365                    c.options
8366                        .get(key)
8367                        .and_then(|v| v.as_str())
8368                        .and_then(|s| s.parse::<u32>().ok())
8369                };
8370                let opt_u8 = |key: &str| -> Option<u8> {
8371                    c.options
8372                        .get(key)
8373                        .and_then(|v| v.as_str())
8374                        .and_then(|s| s.parse::<u8>().ok())
8375                };
8376                let index_type = match c.options.get("type").and_then(|v| v.as_str()) {
8377                    Some("flat") => VectorIndexType::Flat,
8378                    Some("ivf_flat") => VectorIndexType::IvfFlat {
8379                        num_partitions: opt("partitions").unwrap_or(256),
8380                    },
8381                    Some("ivf_sq") => VectorIndexType::IvfSq {
8382                        num_partitions: opt("partitions").unwrap_or(256),
8383                    },
8384                    Some("ivf_rq") => VectorIndexType::IvfRq {
8385                        num_partitions: opt("partitions").unwrap_or(256),
8386                        num_bits: opt_u8("num_bits"),
8387                    },
8388                    Some("hnsw_flat") => VectorIndexType::HnswFlat {
8389                        m: opt("m").unwrap_or(16),
8390                        ef_construction: opt("ef_construction").unwrap_or(200),
8391                        num_partitions: opt("partitions"),
8392                    },
8393                    Some("hnsw") | Some("hnsw_sq") => VectorIndexType::HnswSq {
8394                        m: opt("m").unwrap_or(16),
8395                        ef_construction: opt("ef_construction").unwrap_or(200),
8396                        num_partitions: opt("partitions"),
8397                    },
8398                    Some("hnsw_pq") => VectorIndexType::HnswPq {
8399                        m: opt("m").unwrap_or(16),
8400                        ef_construction: opt("ef_construction").unwrap_or(200),
8401                        num_sub_vectors: opt("sub_vectors").unwrap_or(16),
8402                        num_partitions: opt("partitions"),
8403                    },
8404                    _ => VectorIndexType::IvfPq {
8405                        num_partitions: opt("partitions").unwrap_or(256),
8406                        num_sub_vectors: opt("sub_vectors").unwrap_or(16),
8407                        bits_per_subvector: opt_u8("num_bits").unwrap_or(8),
8408                    },
8409                };
8410
8411                // Parse embedding config from options
8412                let embedding_config = if let Some(emb_val) = c.options.get("embedding") {
8413                    Self::parse_embedding_config(emb_val)?
8414                } else {
8415                    None
8416                };
8417
8418                let config = VectorIndexConfig {
8419                    name: c.name,
8420                    label: c.label,
8421                    property: c.property,
8422                    metric: DistanceMetric::Cosine,
8423                    index_type,
8424                    embedding_config,
8425                    metadata: Default::default(),
8426                };
8427                Ok(LogicalPlan::CreateVectorIndex {
8428                    config,
8429                    if_not_exists: c.if_not_exists,
8430                })
8431            }
8432            SchemaCommand::CreateFullTextIndex(cfg) => Ok(LogicalPlan::CreateFullTextIndex {
8433                config: FullTextIndexConfig {
8434                    name: cfg.name,
8435                    label: cfg.label,
8436                    properties: cfg.properties,
8437                    tokenizer: TokenizerConfig::Standard,
8438                    with_positions: true,
8439                    metadata: Default::default(),
8440                },
8441                if_not_exists: cfg.if_not_exists,
8442            }),
8443            SchemaCommand::CreateScalarIndex(cfg) => {
8444                // Convert expressions to storage strings (strip variable prefix)
8445                let properties: Vec<String> = cfg
8446                    .expressions
8447                    .iter()
8448                    .map(Self::normalize_expression_for_storage)
8449                    .collect();
8450
8451                Ok(LogicalPlan::CreateScalarIndex {
8452                    config: ScalarIndexConfig {
8453                        name: cfg.name,
8454                        label: cfg.label,
8455                        properties,
8456                        index_type: ScalarIndexType::BTree,
8457                        where_clause: cfg.where_clause.map(|e| e.to_string_repr()),
8458                        metadata: Default::default(),
8459                    },
8460                    if_not_exists: cfg.if_not_exists,
8461                })
8462            }
8463            SchemaCommand::CreateJsonFtsIndex(cfg) => {
8464                let with_positions = cfg
8465                    .options
8466                    .get("with_positions")
8467                    .and_then(|v| v.as_bool())
8468                    .unwrap_or(false);
8469                Ok(LogicalPlan::CreateJsonFtsIndex {
8470                    config: JsonFtsIndexConfig {
8471                        name: cfg.name,
8472                        label: cfg.label,
8473                        column: cfg.column,
8474                        paths: Vec::new(),
8475                        with_positions,
8476                        metadata: Default::default(),
8477                    },
8478                    if_not_exists: cfg.if_not_exists,
8479                })
8480            }
8481            SchemaCommand::DropIndex(drop) => Ok(LogicalPlan::DropIndex {
8482                name: drop.name,
8483                if_exists: false, // new AST doesn't have if_exists for DROP INDEX yet
8484            }),
8485            SchemaCommand::CreateConstraint(c) => Ok(LogicalPlan::CreateConstraint(c)),
8486            SchemaCommand::DropConstraint(c) => Ok(LogicalPlan::DropConstraint(c)),
8487            SchemaCommand::CreateLabel(c) => Ok(LogicalPlan::CreateLabel(c)),
8488            SchemaCommand::CreateEdgeType(c) => Ok(LogicalPlan::CreateEdgeType(c)),
8489            SchemaCommand::AlterLabel(c) => Ok(LogicalPlan::AlterLabel(c)),
8490            SchemaCommand::AlterEdgeType(c) => Ok(LogicalPlan::AlterEdgeType(c)),
8491            SchemaCommand::DropLabel(c) => Ok(LogicalPlan::DropLabel(c)),
8492            SchemaCommand::DropEdgeType(c) => Ok(LogicalPlan::DropEdgeType(c)),
8493            SchemaCommand::ShowConstraints(c) => Ok(LogicalPlan::ShowConstraints(c)),
8494            SchemaCommand::ShowIndexes(c) => Ok(LogicalPlan::ShowIndexes { filter: c.filter }),
8495            SchemaCommand::ShowDatabase => Ok(LogicalPlan::ShowDatabase),
8496            SchemaCommand::ShowConfig => Ok(LogicalPlan::ShowConfig),
8497            SchemaCommand::ShowStatistics => Ok(LogicalPlan::ShowStatistics),
8498            SchemaCommand::Vacuum => Ok(LogicalPlan::Vacuum),
8499            SchemaCommand::Checkpoint => Ok(LogicalPlan::Checkpoint),
8500            SchemaCommand::Backup { path } => Ok(LogicalPlan::Backup {
8501                destination: path,
8502                options: HashMap::new(),
8503            }),
8504            SchemaCommand::CopyTo(cmd) => Ok(LogicalPlan::CopyTo {
8505                label: cmd.label,
8506                path: cmd.path,
8507                format: cmd.format,
8508                options: cmd.options,
8509            }),
8510            SchemaCommand::CopyFrom(cmd) => Ok(LogicalPlan::CopyFrom {
8511                label: cmd.label,
8512                path: cmd.path,
8513                format: cmd.format,
8514                options: cmd.options,
8515            }),
8516        }
8517    }
8518
8519    fn parse_embedding_config(emb_val: &Value) -> Result<Option<EmbeddingConfig>> {
8520        let obj = emb_val
8521            .as_object()
8522            .ok_or_else(|| anyhow!("embedding option must be an object"))?;
8523
8524        // Parse alias (required)
8525        let alias = obj
8526            .get("alias")
8527            .and_then(|v| v.as_str())
8528            .ok_or_else(|| anyhow!("embedding.alias is required"))?;
8529
8530        // Parse source properties (required)
8531        let source_properties = obj
8532            .get("source")
8533            .and_then(|v| v.as_array())
8534            .ok_or_else(|| anyhow!("embedding.source is required and must be an array"))?
8535            .iter()
8536            .filter_map(|v| v.as_str().map(|s| s.to_string()))
8537            .collect::<Vec<_>>();
8538
8539        if source_properties.is_empty() {
8540            return Err(anyhow!(
8541                "embedding.source must contain at least one property"
8542            ));
8543        }
8544
8545        let batch_size = obj
8546            .get("batch_size")
8547            .and_then(|v| v.as_u64())
8548            .map(|v| v as usize)
8549            .unwrap_or(32);
8550
8551        let document_prefix = obj
8552            .get("document_prefix")
8553            .and_then(|v| v.as_str())
8554            .map(|s| s.to_string());
8555
8556        let query_prefix = obj
8557            .get("query_prefix")
8558            .and_then(|v| v.as_str())
8559            .map(|s| s.to_string());
8560
8561        Ok(Some(EmbeddingConfig {
8562            alias: alias.to_string(),
8563            source_properties,
8564            batch_size,
8565            document_prefix,
8566            query_prefix,
8567        }))
8568    }
8569}
8570
8571/// Collect all properties referenced anywhere in the LogicalPlan tree.
8572///
8573/// This is critical for window functions: properties must be materialized
8574/// at the Scan node so they're available for window operations later.
8575///
8576/// Returns a mapping of variable name → property names (e.g., "e" → {"dept", "salary"}).
8577pub fn collect_properties_from_plan(plan: &LogicalPlan) -> HashMap<String, HashSet<String>> {
8578    let mut properties: HashMap<String, HashSet<String>> = HashMap::new();
8579    collect_properties_recursive(plan, &mut properties);
8580    properties
8581}
8582
8583/// Recursively walk the LogicalPlan tree and collect all property references.
8584fn collect_properties_recursive(
8585    plan: &LogicalPlan,
8586    properties: &mut HashMap<String, HashSet<String>>,
8587) {
8588    match plan {
8589        LogicalPlan::Window {
8590            input,
8591            window_exprs,
8592        } => {
8593            // Collect from window expressions
8594            for expr in window_exprs {
8595                collect_properties_from_expr_into(expr, properties);
8596            }
8597            collect_properties_recursive(input, properties);
8598        }
8599        LogicalPlan::Project { input, projections } => {
8600            for (expr, _alias) in projections {
8601                collect_properties_from_expr_into(expr, properties);
8602            }
8603            collect_properties_recursive(input, properties);
8604        }
8605        LogicalPlan::Sort { input, order_by } => {
8606            for sort_item in order_by {
8607                collect_properties_from_expr_into(&sort_item.expr, properties);
8608            }
8609            collect_properties_recursive(input, properties);
8610        }
8611        LogicalPlan::Filter {
8612            input, predicate, ..
8613        } => {
8614            collect_properties_from_expr_into(predicate, properties);
8615            collect_properties_recursive(input, properties);
8616        }
8617        LogicalPlan::Aggregate {
8618            input,
8619            group_by,
8620            aggregates,
8621        } => {
8622            for expr in group_by {
8623                collect_properties_from_expr_into(expr, properties);
8624            }
8625            for expr in aggregates {
8626                collect_properties_from_expr_into(expr, properties);
8627            }
8628            collect_properties_recursive(input, properties);
8629        }
8630        LogicalPlan::Scan {
8631            filter: Some(expr), ..
8632        } => {
8633            collect_properties_from_expr_into(expr, properties);
8634        }
8635        LogicalPlan::Scan { filter: None, .. } => {}
8636        LogicalPlan::ExtIdLookup {
8637            filter: Some(expr), ..
8638        } => {
8639            collect_properties_from_expr_into(expr, properties);
8640        }
8641        LogicalPlan::ExtIdLookup { filter: None, .. } => {}
8642        LogicalPlan::ScanAll {
8643            filter: Some(expr), ..
8644        } => {
8645            collect_properties_from_expr_into(expr, properties);
8646        }
8647        LogicalPlan::ScanAll { filter: None, .. } => {}
8648        LogicalPlan::ScanMainByLabels {
8649            filter: Some(expr), ..
8650        } => {
8651            collect_properties_from_expr_into(expr, properties);
8652        }
8653        LogicalPlan::ScanMainByLabels { filter: None, .. } => {}
8654        LogicalPlan::TraverseMainByType {
8655            input,
8656            target_filter,
8657            ..
8658        } => {
8659            if let Some(expr) = target_filter {
8660                collect_properties_from_expr_into(expr, properties);
8661            }
8662            collect_properties_recursive(input, properties);
8663        }
8664        LogicalPlan::Traverse {
8665            input,
8666            target_filter,
8667            step_variable: _,
8668            ..
8669        } => {
8670            if let Some(expr) = target_filter {
8671                collect_properties_from_expr_into(expr, properties);
8672            }
8673            // Note: Edge properties (step_variable) will be collected from expressions
8674            // that reference them. The edge_properties field in LogicalPlan is populated
8675            // later during physical planning based on this collected map.
8676            collect_properties_recursive(input, properties);
8677        }
8678        LogicalPlan::Unwind { input, expr, .. } => {
8679            collect_properties_from_expr_into(expr, properties);
8680            collect_properties_recursive(input, properties);
8681        }
8682        LogicalPlan::Create { input, pattern } => {
8683            // Mark variables referenced in CREATE patterns with "*" so plan_scan
8684            // adds structural projections (bare entity columns). Without this,
8685            // execute_create_pattern() can't find bound variables and creates
8686            // spurious new nodes instead of using existing MATCH'd ones.
8687            mark_pattern_variables(pattern, properties);
8688            collect_properties_recursive(input, properties);
8689        }
8690        LogicalPlan::CreateBatch { input, patterns } => {
8691            for pattern in patterns {
8692                mark_pattern_variables(pattern, properties);
8693            }
8694            collect_properties_recursive(input, properties);
8695        }
8696        LogicalPlan::Merge {
8697            input,
8698            pattern,
8699            on_match,
8700            on_create,
8701        } => {
8702            mark_pattern_variables(pattern, properties);
8703            if let Some(set_clause) = on_match {
8704                mark_set_item_variables(&set_clause.items, properties);
8705            }
8706            if let Some(set_clause) = on_create {
8707                mark_set_item_variables(&set_clause.items, properties);
8708            }
8709            collect_properties_recursive(input, properties);
8710        }
8711        LogicalPlan::Set { input, items } => {
8712            mark_set_item_variables(items, properties);
8713            collect_properties_recursive(input, properties);
8714        }
8715        LogicalPlan::Remove { input, items } => {
8716            for item in items {
8717                match item {
8718                    RemoveItem::Property(expr) => {
8719                        // REMOVE n.prop — collect the property and mark the variable
8720                        // with "*" so full structural projection is applied.
8721                        collect_properties_from_expr_into(expr, properties);
8722                        if let Expr::Property(base, _) = expr
8723                            && let Expr::Variable(var) = base.as_ref()
8724                        {
8725                            properties
8726                                .entry(var.clone())
8727                                .or_default()
8728                                .insert("*".to_string());
8729                        }
8730                    }
8731                    RemoveItem::Labels { variable, .. } => {
8732                        // REMOVE n:Label — mark n with "*"
8733                        properties
8734                            .entry(variable.clone())
8735                            .or_default()
8736                            .insert("*".to_string());
8737                    }
8738                }
8739            }
8740            collect_properties_recursive(input, properties);
8741        }
8742        LogicalPlan::Delete { input, items, .. } => {
8743            for expr in items {
8744                collect_properties_from_expr_into(expr, properties);
8745            }
8746            collect_properties_recursive(input, properties);
8747        }
8748        LogicalPlan::Foreach {
8749            input, list, body, ..
8750        } => {
8751            collect_properties_from_expr_into(list, properties);
8752            for plan in body {
8753                collect_properties_recursive(plan, properties);
8754            }
8755            collect_properties_recursive(input, properties);
8756        }
8757        LogicalPlan::Limit { input, .. } => {
8758            collect_properties_recursive(input, properties);
8759        }
8760        LogicalPlan::CrossJoin { left, right } => {
8761            collect_properties_recursive(left, properties);
8762            collect_properties_recursive(right, properties);
8763        }
8764        LogicalPlan::Apply {
8765            input,
8766            subquery,
8767            input_filter,
8768        } => {
8769            if let Some(expr) = input_filter {
8770                collect_properties_from_expr_into(expr, properties);
8771            }
8772            collect_properties_recursive(input, properties);
8773            collect_properties_recursive(subquery, properties);
8774        }
8775        LogicalPlan::Union { left, right, .. } => {
8776            collect_properties_recursive(left, properties);
8777            collect_properties_recursive(right, properties);
8778        }
8779        LogicalPlan::RecursiveCTE {
8780            initial, recursive, ..
8781        } => {
8782            collect_properties_recursive(initial, properties);
8783            collect_properties_recursive(recursive, properties);
8784        }
8785        LogicalPlan::ProcedureCall { arguments, .. } => {
8786            for arg in arguments {
8787                collect_properties_from_expr_into(arg, properties);
8788            }
8789        }
8790        LogicalPlan::VectorKnn { query, .. } => {
8791            collect_properties_from_expr_into(query, properties);
8792        }
8793        LogicalPlan::InvertedIndexLookup { terms, .. } => {
8794            collect_properties_from_expr_into(terms, properties);
8795        }
8796        LogicalPlan::ShortestPath { input, .. } => {
8797            collect_properties_recursive(input, properties);
8798        }
8799        LogicalPlan::AllShortestPaths { input, .. } => {
8800            collect_properties_recursive(input, properties);
8801        }
8802        LogicalPlan::Distinct { input } => {
8803            collect_properties_recursive(input, properties);
8804        }
8805        LogicalPlan::QuantifiedPattern {
8806            input,
8807            pattern_plan,
8808            ..
8809        } => {
8810            collect_properties_recursive(input, properties);
8811            collect_properties_recursive(pattern_plan, properties);
8812        }
8813        LogicalPlan::BindZeroLengthPath { input, .. } => {
8814            collect_properties_recursive(input, properties);
8815        }
8816        LogicalPlan::BindPath { input, .. } => {
8817            collect_properties_recursive(input, properties);
8818        }
8819        LogicalPlan::SubqueryCall { input, subquery } => {
8820            collect_properties_recursive(input, properties);
8821            collect_properties_recursive(subquery, properties);
8822        }
8823        LogicalPlan::LocyProject {
8824            input, projections, ..
8825        } => {
8826            for (expr, _alias) in projections {
8827                match expr {
8828                    // Bare variable in LocyProject: only need _vid for node variables
8829                    // (plan_locy_project extracts VID directly). Adding "*" would create
8830                    // a structural Struct column that conflicts with derived scan columns.
8831                    Expr::Variable(name) if !name.contains('.') => {
8832                        properties
8833                            .entry(name.clone())
8834                            .or_default()
8835                            .insert("_vid".to_string());
8836                    }
8837                    _ => collect_properties_from_expr_into(expr, properties),
8838                }
8839            }
8840            collect_properties_recursive(input, properties);
8841        }
8842        LogicalPlan::LocyFold {
8843            input,
8844            fold_bindings,
8845            ..
8846        } => {
8847            for (_name, expr) in fold_bindings {
8848                collect_properties_from_expr_into(expr, properties);
8849            }
8850            collect_properties_recursive(input, properties);
8851        }
8852        LogicalPlan::LocyBestBy {
8853            input, criteria, ..
8854        } => {
8855            for (expr, _asc) in criteria {
8856                collect_properties_from_expr_into(expr, properties);
8857            }
8858            collect_properties_recursive(input, properties);
8859        }
8860        LogicalPlan::LocyPriority { input, .. } => {
8861            collect_properties_recursive(input, properties);
8862        }
8863        LogicalPlan::LocyModelInvoke { input, .. } => {
8864            // Model invocations don't introduce new property accesses
8865            // — feature expressions are lifted to hidden YIELD items
8866            // by `extract_model_invocations` (uni-locy typecheck) and
8867            // their property refs are already collected via the
8868            // wrapped LocyProject's projection walk.
8869            collect_properties_recursive(input, properties);
8870        }
8871        // DDL and other plans don't reference properties
8872        _ => {}
8873    }
8874}
8875
8876/// Mark target variables from SET items with "*" and collect value expressions.
8877fn mark_set_item_variables(items: &[SetItem], properties: &mut HashMap<String, HashSet<String>>) {
8878    for item in items {
8879        match item {
8880            SetItem::Property { expr, value } => {
8881                // SET n.prop = val — mark n with STRUCT_ONLY_SENTINEL so the
8882                // scan builds the bare `n` struct column (needed for executor
8883                // `row.get(var_name)`) WITHOUT pulling the full schema. The
8884                // explicit `prop` is collected via `collect_properties_from_expr_into`
8885                // below and joins the variable's HashSet alongside the sentinel.
8886                //
8887                // If the same variable is also referenced bare elsewhere
8888                // (e.g. `SET n.x = 1 RETURN n`), `collect_properties_from_expr_into`
8889                // inserts "*" through the bare-Variable path; "*" dominates
8890                // the sentinel in `resolve_properties`, so the full schema
8891                // is still pulled when actually required.
8892                collect_properties_from_expr_into(expr, properties);
8893                collect_properties_from_expr_into(value, properties);
8894                if let Expr::Property(base, _) = expr
8895                    && let Expr::Variable(var) = base.as_ref()
8896                {
8897                    properties
8898                        .entry(var.clone())
8899                        .or_default()
8900                        .insert(STRUCT_ONLY_SENTINEL.to_string());
8901                }
8902            }
8903            SetItem::Labels { variable, .. } => {
8904                // SET n:Label — need full access to n
8905                properties
8906                    .entry(variable.clone())
8907                    .or_default()
8908                    .insert("*".to_string());
8909            }
8910            SetItem::Variable { variable, value } | SetItem::VariablePlus { variable, value } => {
8911                // SET n = {props} or SET n += {props}
8912                properties
8913                    .entry(variable.clone())
8914                    .or_default()
8915                    .insert("*".to_string());
8916                collect_properties_from_expr_into(value, properties);
8917            }
8918        }
8919    }
8920}
8921
8922/// Mark all variables in a CREATE/MERGE pattern with "*" so that plan_scan
8923/// adds structural projections (bare entity Struct columns) for them.
8924/// This is needed so that execute_create_pattern() can find bound variables
8925/// in the row HashMap and reuse existing nodes instead of creating new ones.
8926fn mark_pattern_variables(pattern: &Pattern, properties: &mut HashMap<String, HashSet<String>>) {
8927    for path in &pattern.paths {
8928        if let Some(ref v) = path.variable {
8929            properties
8930                .entry(v.clone())
8931                .or_default()
8932                .insert("*".to_string());
8933        }
8934        for element in &path.elements {
8935            match element {
8936                PatternElement::Node(n) => {
8937                    if let Some(ref v) = n.variable {
8938                        properties
8939                            .entry(v.clone())
8940                            .or_default()
8941                            .insert("*".to_string());
8942                    }
8943                    // Also collect properties from inline property expressions
8944                    if let Some(ref props) = n.properties {
8945                        collect_properties_from_expr_into(props, properties);
8946                    }
8947                }
8948                PatternElement::Relationship(r) => {
8949                    if let Some(ref v) = r.variable {
8950                        properties
8951                            .entry(v.clone())
8952                            .or_default()
8953                            .insert("*".to_string());
8954                    }
8955                    if let Some(ref props) = r.properties {
8956                        collect_properties_from_expr_into(props, properties);
8957                    }
8958                }
8959                PatternElement::Parenthesized { pattern, .. } => {
8960                    let sub = Pattern {
8961                        paths: vec![pattern.as_ref().clone()],
8962                    };
8963                    mark_pattern_variables(&sub, properties);
8964                }
8965            }
8966        }
8967    }
8968}
8969
8970/// Collect properties from an expression into a HashMap.
8971fn collect_properties_from_expr_into(
8972    expr: &Expr,
8973    properties: &mut HashMap<String, HashSet<String>>,
8974) {
8975    match expr {
8976        Expr::PatternComprehension {
8977            where_clause,
8978            map_expr,
8979            ..
8980        } => {
8981            // Collect properties from the WHERE clause and map expression.
8982            // The pattern itself creates local bindings that don't need
8983            // property collection from the outer scope.
8984            if let Some(where_expr) = where_clause {
8985                collect_properties_from_expr_into(where_expr, properties);
8986            }
8987            collect_properties_from_expr_into(map_expr, properties);
8988        }
8989        Expr::Variable(name) => {
8990            // Handle transformed property expressions like "e.dept" (after transform_window_expr_properties)
8991            if let Some((var, prop)) = name.split_once('.') {
8992                properties
8993                    .entry(var.to_string())
8994                    .or_default()
8995                    .insert(prop.to_string());
8996            } else {
8997                // Bare variable (e.g., RETURN n) — needs all properties materialized
8998                properties
8999                    .entry(name.clone())
9000                    .or_default()
9001                    .insert("*".to_string());
9002            }
9003        }
9004        Expr::Property(base, name) => {
9005            // Extract variable name from the base expression
9006            if let Expr::Variable(var) = base.as_ref() {
9007                properties
9008                    .entry(var.clone())
9009                    .or_default()
9010                    .insert(name.clone());
9011                // Don't recurse into Variable — that would mark it as a bare
9012                // variable reference (adding "*") when it's just a property base.
9013            } else {
9014                // Recurse for complex base expressions (nested property, function call, etc.)
9015                collect_properties_from_expr_into(base, properties);
9016            }
9017        }
9018        Expr::BinaryOp { left, right, .. } => {
9019            collect_properties_from_expr_into(left, properties);
9020            collect_properties_from_expr_into(right, properties);
9021        }
9022        Expr::FunctionCall {
9023            name,
9024            args,
9025            window_spec,
9026            ..
9027        } => {
9028            // Analyze function for property requirements (pushdown hydration)
9029            analyze_function_property_requirements(name, args, properties);
9030
9031            // Collect from arguments
9032            for arg in args {
9033                collect_properties_from_expr_into(arg, properties);
9034            }
9035
9036            // Collect from window spec (PARTITION BY, ORDER BY)
9037            if let Some(spec) = window_spec {
9038                for part_expr in &spec.partition_by {
9039                    collect_properties_from_expr_into(part_expr, properties);
9040                }
9041                for sort_item in &spec.order_by {
9042                    collect_properties_from_expr_into(&sort_item.expr, properties);
9043                }
9044            }
9045        }
9046        Expr::UnaryOp { expr, .. } => {
9047            collect_properties_from_expr_into(expr, properties);
9048        }
9049        Expr::List(items) => {
9050            for item in items {
9051                collect_properties_from_expr_into(item, properties);
9052            }
9053        }
9054        Expr::Map(entries) => {
9055            for (_key, value) in entries {
9056                collect_properties_from_expr_into(value, properties);
9057            }
9058        }
9059        Expr::ListComprehension {
9060            list,
9061            where_clause,
9062            map_expr,
9063            ..
9064        } => {
9065            collect_properties_from_expr_into(list, properties);
9066            if let Some(where_expr) = where_clause {
9067                collect_properties_from_expr_into(where_expr, properties);
9068            }
9069            collect_properties_from_expr_into(map_expr, properties);
9070        }
9071        Expr::Case {
9072            expr,
9073            when_then,
9074            else_expr,
9075        } => {
9076            if let Some(scrutinee_expr) = expr {
9077                collect_properties_from_expr_into(scrutinee_expr, properties);
9078            }
9079            for (when, then) in when_then {
9080                collect_properties_from_expr_into(when, properties);
9081                collect_properties_from_expr_into(then, properties);
9082            }
9083            if let Some(default_expr) = else_expr {
9084                collect_properties_from_expr_into(default_expr, properties);
9085            }
9086        }
9087        Expr::Quantifier {
9088            list, predicate, ..
9089        } => {
9090            collect_properties_from_expr_into(list, properties);
9091            collect_properties_from_expr_into(predicate, properties);
9092        }
9093        Expr::Reduce {
9094            init, list, expr, ..
9095        } => {
9096            collect_properties_from_expr_into(init, properties);
9097            collect_properties_from_expr_into(list, properties);
9098            collect_properties_from_expr_into(expr, properties);
9099        }
9100        Expr::Exists { query, .. } => {
9101            // Walk into EXISTS body to collect property references for outer-scope variables.
9102            // This ensures correlated properties (e.g., a.city inside EXISTS where a is outer)
9103            // are included in the outer scan's property list. Extra properties collected for
9104            // inner-only variables are harmless — the outer scan ignores unknown variable names.
9105            collect_properties_from_subquery(query, properties);
9106        }
9107        Expr::CountSubquery(query) | Expr::CollectSubquery(query) => {
9108            collect_properties_from_subquery(query, properties);
9109        }
9110        Expr::IsNull(expr) | Expr::IsNotNull(expr) | Expr::IsUnique(expr) => {
9111            collect_properties_from_expr_into(expr, properties);
9112        }
9113        Expr::In { expr, list } => {
9114            collect_properties_from_expr_into(expr, properties);
9115            collect_properties_from_expr_into(list, properties);
9116        }
9117        Expr::ArrayIndex { array, index } => {
9118            if let Expr::Variable(var) = array.as_ref() {
9119                if let Expr::Literal(CypherLiteral::String(prop_name)) = index.as_ref() {
9120                    // Static string key: e['name'] → only need that specific property
9121                    properties
9122                        .entry(var.clone())
9123                        .or_default()
9124                        .insert(prop_name.clone());
9125                } else {
9126                    // Dynamic property access: e[prop] → need all properties
9127                    properties
9128                        .entry(var.clone())
9129                        .or_default()
9130                        .insert("*".to_string());
9131                }
9132            }
9133            collect_properties_from_expr_into(array, properties);
9134            collect_properties_from_expr_into(index, properties);
9135        }
9136        Expr::ArraySlice { array, start, end } => {
9137            collect_properties_from_expr_into(array, properties);
9138            if let Some(start_expr) = start {
9139                collect_properties_from_expr_into(start_expr, properties);
9140            }
9141            if let Some(end_expr) = end {
9142                collect_properties_from_expr_into(end_expr, properties);
9143            }
9144        }
9145        Expr::ValidAt {
9146            entity,
9147            timestamp,
9148            start_prop,
9149            end_prop,
9150        } => {
9151            // Extract property requirements from ValidAt expression
9152            if let Expr::Variable(var) = entity.as_ref() {
9153                if let Some(prop) = start_prop {
9154                    properties
9155                        .entry(var.clone())
9156                        .or_default()
9157                        .insert(prop.clone());
9158                }
9159                if let Some(prop) = end_prop {
9160                    properties
9161                        .entry(var.clone())
9162                        .or_default()
9163                        .insert(prop.clone());
9164                }
9165            }
9166            collect_properties_from_expr_into(entity, properties);
9167            collect_properties_from_expr_into(timestamp, properties);
9168        }
9169        Expr::MapProjection { base, items } => {
9170            collect_properties_from_expr_into(base, properties);
9171            for item in items {
9172                match item {
9173                    uni_cypher::ast::MapProjectionItem::Property(prop) => {
9174                        if let Expr::Variable(var) = base.as_ref() {
9175                            properties
9176                                .entry(var.clone())
9177                                .or_default()
9178                                .insert(prop.clone());
9179                        }
9180                    }
9181                    uni_cypher::ast::MapProjectionItem::AllProperties => {
9182                        if let Expr::Variable(var) = base.as_ref() {
9183                            properties
9184                                .entry(var.clone())
9185                                .or_default()
9186                                .insert("*".to_string());
9187                        }
9188                    }
9189                    uni_cypher::ast::MapProjectionItem::LiteralEntry(_, expr) => {
9190                        collect_properties_from_expr_into(expr, properties);
9191                    }
9192                    uni_cypher::ast::MapProjectionItem::Variable(_) => {}
9193                }
9194            }
9195        }
9196        Expr::LabelCheck { expr, .. } => {
9197            collect_properties_from_expr_into(expr, properties);
9198        }
9199        // Parameters reference outer-scope variables (e.g., $p in correlated subqueries).
9200        // Mark them with "*" so the outer scan produces structural projections that
9201        // extract_row_params can resolve.
9202        Expr::Parameter(name) => {
9203            properties
9204                .entry(name.clone())
9205                .or_default()
9206                .insert("*".to_string());
9207        }
9208        // Literals and wildcard don't reference properties
9209        Expr::Literal(_) | Expr::Wildcard => {}
9210    }
9211}
9212
9213/// Walk a subquery (EXISTS/COUNT/COLLECT body) and collect property references.
9214///
9215/// This is needed so that correlated property accesses like `a.city` inside
9216/// `WHERE EXISTS { (a)-[:KNOWS]->(b) WHERE b.city = a.city }` cause the outer
9217/// scan to include `a.city` in its projected columns.
9218fn collect_properties_from_subquery(
9219    query: &Query,
9220    properties: &mut HashMap<String, HashSet<String>>,
9221) {
9222    match query {
9223        Query::Single(stmt) => {
9224            for clause in &stmt.clauses {
9225                match clause {
9226                    Clause::Match(m) => {
9227                        if let Some(ref wc) = m.where_clause {
9228                            collect_properties_from_expr_into(wc, properties);
9229                        }
9230                    }
9231                    Clause::With(w) => {
9232                        for item in &w.items {
9233                            if let ReturnItem::Expr { expr, .. } = item {
9234                                collect_properties_from_expr_into(expr, properties);
9235                            }
9236                        }
9237                        if let Some(ref wc) = w.where_clause {
9238                            collect_properties_from_expr_into(wc, properties);
9239                        }
9240                    }
9241                    Clause::Return(r) => {
9242                        for item in &r.items {
9243                            if let ReturnItem::Expr { expr, .. } = item {
9244                                collect_properties_from_expr_into(expr, properties);
9245                            }
9246                        }
9247                    }
9248                    _ => {}
9249                }
9250            }
9251        }
9252        Query::Union { left, right, .. } => {
9253            collect_properties_from_subquery(left, properties);
9254            collect_properties_from_subquery(right, properties);
9255        }
9256        _ => {}
9257    }
9258}
9259
9260/// Analyze function calls to extract property requirements for pushdown hydration
9261///
9262/// This function examines function calls and their arguments to determine which properties
9263/// need to be loaded for entity arguments. For example:
9264/// - validAt(e, 'start', 'end', ts) -> e needs {start, end}
9265/// - keys(n) -> n needs all properties (*)
9266///
9267/// The extracted requirements are added to the properties map for later use during
9268/// scan planning.
9269fn analyze_function_property_requirements(
9270    name: &str,
9271    args: &[Expr],
9272    properties: &mut HashMap<String, HashSet<String>>,
9273) {
9274    use crate::query::function_props::get_function_spec;
9275
9276    /// Helper to mark a variable as needing all properties.
9277    fn mark_wildcard(var: &str, properties: &mut HashMap<String, HashSet<String>>) {
9278        properties
9279            .entry(var.to_string())
9280            .or_default()
9281            .insert("*".to_string());
9282    }
9283
9284    // System-managed timestamp functions: require only the corresponding
9285    // `_created_at` / `_updated_at` column, not full entity materialization.
9286    if name.eq_ignore_ascii_case("created_at") || name.eq_ignore_ascii_case("updated_at") {
9287        if let Some(Expr::Variable(var)) = args.first() {
9288            let col = if name.eq_ignore_ascii_case("created_at") {
9289                "_created_at"
9290            } else {
9291                "_updated_at"
9292            };
9293            properties
9294                .entry(var.clone())
9295                .or_default()
9296                .insert(col.to_string());
9297        }
9298        return;
9299    }
9300
9301    let Some(spec) = get_function_spec(name) else {
9302        // Unknown function: conservatively require all properties for variable args
9303        for arg in args {
9304            if let Expr::Variable(var) = arg {
9305                mark_wildcard(var, properties);
9306            }
9307        }
9308        return;
9309    };
9310
9311    // Extract property names from string literal arguments
9312    for &(prop_arg_idx, entity_arg_idx) in spec.property_name_args {
9313        let entity_arg = args.get(entity_arg_idx);
9314        let prop_arg = args.get(prop_arg_idx);
9315
9316        match (entity_arg, prop_arg) {
9317            (Some(Expr::Variable(var)), Some(Expr::Literal(CypherLiteral::String(prop)))) => {
9318                properties
9319                    .entry(var.clone())
9320                    .or_default()
9321                    .insert(prop.clone());
9322            }
9323            (Some(Expr::Variable(var)), Some(Expr::Parameter(_))) => {
9324                // Parameter property name: need all properties
9325                mark_wildcard(var, properties);
9326            }
9327            _ => {}
9328        }
9329    }
9330
9331    // Handle full entity requirement (keys(), properties())
9332    if spec.needs_full_entity {
9333        for &idx in spec.entity_args {
9334            if let Some(Expr::Variable(var)) = args.get(idx) {
9335                mark_wildcard(var, properties);
9336            }
9337        }
9338    }
9339}
9340
9341// ============================================================================
9342// Phase 5a-impl — fork-aware fusion rewrite
9343// ============================================================================
9344
9345/// Trait that exposes the per-fork "is there a fork-local index for
9346/// `(label, column)`?" lookup. Implemented for `StorageManager` so
9347/// callers don't need to depend on the fork module directly; tests
9348/// can mock by implementing it on a `HashMap`.
9349pub trait ForkIndexLookup {
9350    fn fork_index_for(
9351        &self,
9352        label: &str,
9353        column: &str,
9354    ) -> Option<uni_store::fork::ForkLocalIndexKind>;
9355
9356    /// Phase 5b followup: resolve a label id, then dispatch to
9357    /// `fork_index_for`. Used by the rewrite when wrapping
9358    /// `VectorKnn` and `InvertedIndexLookup` nodes which carry
9359    /// `label_id: u16` rather than the label name. Default returns
9360    /// `None`; the `StorageManager` impl resolves via its
9361    /// `schema_manager`.
9362    fn fork_index_for_label_id(
9363        &self,
9364        _label_id: u16,
9365        _column: &str,
9366    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9367        None
9368    }
9369}
9370
9371impl ForkIndexLookup for uni_store::storage::StorageManager {
9372    fn fork_index_for(
9373        &self,
9374        label: &str,
9375        column: &str,
9376    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9377        self.fork_index_exists(label, column)
9378    }
9379
9380    fn fork_index_for_label_id(
9381        &self,
9382        label_id: u16,
9383        column: &str,
9384    ) -> Option<uni_store::fork::ForkLocalIndexKind> {
9385        let schema = self.schema_manager().schema();
9386        let label_name = schema.label_name_by_id(label_id)?;
9387        self.fork_index_exists(label_name, column)
9388    }
9389}
9390
9391/// Fold a trailing `SET var.prop = value` into the freshly-created entity's
9392/// inline property map, eliminating the separate `Set` write pass.
9393///
9394/// Rewrites `CREATE (a)-[r:T]->(b) SET r.x = e.v` into the equivalent of
9395/// `CREATE (a)-[r:T {x: e.v}]->(b)`, so the plan collapses from `Set → Create`
9396/// to a single `Create`. This removes an entire read-modify-write operator
9397/// (`MutationSetExec`) — measured at ~38% of per-edge `UNWIND … CREATE … SET`
9398/// execution — that the bulk write path never pays.
9399///
9400/// # Examples
9401///
9402/// ```ignore
9403/// // CREATE (a)-[r:LINK]->(b) SET r.role = e.role   ==>
9404/// // CREATE (a)-[r:LINK {role: e.role}]->(b)
9405/// let fused = fuse_create_set(plan);
9406/// ```
9407///
9408/// The fold is **all-or-nothing per `SET` clause** and only fires when every
9409/// item is safe:
9410/// - the item is the simple `Variable.property = value` form (not `+=`, label
9411///   set `SET n:L`, or whole-entity map assignment `SET n = {...}`),
9412/// - the target variable is introduced by the immediately-preceding
9413///   `Create`/`CreateBatch` (a MATCHed variable is left untouched),
9414/// - the target element's inline properties are absent or a map literal (a
9415///   parameter-map form such as `CREATE (n $props)` cannot be merged),
9416/// - the value references no variable created in the same statement, so
9417///   evaluating it at create time is observably identical to SET time.
9418///
9419/// When any item fails these checks the whole `Set` node is preserved, keeping
9420/// semantics unchanged. The pass is idempotent: a plan with no fusable
9421/// `Set`/`Create` adjacency passes through untouched.
9422#[must_use]
9423pub fn fuse_create_set(plan: LogicalPlan) -> LogicalPlan {
9424    match plan {
9425        LogicalPlan::Set { input, items } => {
9426            // Fuse any deeper adjacency first so chained
9427            // `CREATE … SET … CREATE … SET` collapses bottom-up.
9428            let input = fuse_create_set(*input);
9429            match input {
9430                LogicalPlan::Create {
9431                    input: child,
9432                    pattern,
9433                } => {
9434                    let bound_vars = crate::query::df_planner::collect_plan_variables(&child);
9435                    match try_fuse_set_items(std::slice::from_ref(&pattern), &items, &bound_vars) {
9436                        Some(mut patterns) => LogicalPlan::Create {
9437                            input: child,
9438                            // try_fuse_set_items returns exactly as many patterns
9439                            // as it was given (one here).
9440                            pattern: patterns
9441                                .pop()
9442                                .expect("one pattern in yields one pattern out"),
9443                        },
9444                        None => LogicalPlan::Set {
9445                            input: Box::new(LogicalPlan::Create {
9446                                input: child,
9447                                pattern,
9448                            }),
9449                            items,
9450                        },
9451                    }
9452                }
9453                LogicalPlan::CreateBatch {
9454                    input: child,
9455                    patterns,
9456                } => {
9457                    let bound_vars = crate::query::df_planner::collect_plan_variables(&child);
9458                    match try_fuse_set_items(&patterns, &items, &bound_vars) {
9459                        Some(fused) => LogicalPlan::CreateBatch {
9460                            input: child,
9461                            patterns: fused,
9462                        },
9463                        None => LogicalPlan::Set {
9464                            input: Box::new(LogicalPlan::CreateBatch {
9465                                input: child,
9466                                patterns,
9467                            }),
9468                            items,
9469                        },
9470                    }
9471                }
9472                other => LogicalPlan::Set {
9473                    input: Box::new(other),
9474                    items,
9475                },
9476            }
9477        }
9478        // Recurse through the operators that can sit above a write clause so a
9479        // `Set` under RETURN/ORDER BY/LIMIT is still reached. This mirrors the
9480        // pragmatic recursion of `rewrite_for_fork_fusion`: variants that never
9481        // sit above a write clause fall through `other => other` unchanged.
9482        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9483            input: Box::new(fuse_create_set(*input)),
9484            projections,
9485        },
9486        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9487            input: Box::new(fuse_create_set(*input)),
9488            skip,
9489            fetch,
9490        },
9491        LogicalPlan::Sort { input, order_by } => LogicalPlan::Sort {
9492            input: Box::new(fuse_create_set(*input)),
9493            order_by,
9494        },
9495        LogicalPlan::Filter {
9496            input,
9497            predicate,
9498            optional_variables,
9499        } => LogicalPlan::Filter {
9500            input: Box::new(fuse_create_set(*input)),
9501            predicate,
9502            optional_variables,
9503        },
9504        LogicalPlan::Create { input, pattern } => LogicalPlan::Create {
9505            input: Box::new(fuse_create_set(*input)),
9506            pattern,
9507        },
9508        LogicalPlan::CreateBatch { input, patterns } => LogicalPlan::CreateBatch {
9509            input: Box::new(fuse_create_set(*input)),
9510            patterns,
9511        },
9512        other => other,
9513    }
9514}
9515
9516/// Try to fold every `SET` item into the given CREATE patterns.
9517///
9518/// Returns the rewritten patterns when *all* items fuse safely (see
9519/// [`fuse_create_set`] for the conditions); returns `None` the moment any item
9520/// is unfusable, so the caller can keep the original `Set` node untouched.
9521///
9522/// `bound_vars` are the variables produced by the CREATE's input plan (e.g. an
9523/// upstream MATCH). A CREATE pattern may *reuse* such a variable as an endpoint
9524/// (`MATCH (a) CREATE (a)-[r:T]->(b)`), so `pattern_variable_names` alone cannot
9525/// tell a freshly-created variable from a reused one. Reused variables are
9526/// excluded from `owner`: a `SET` on them must not fuse, because the executor
9527/// skips inline properties on already-bound elements (which would silently drop
9528/// the write).
9529fn try_fuse_set_items(
9530    patterns: &[Pattern],
9531    items: &[SetItem],
9532    bound_vars: &HashSet<String>,
9533) -> Option<Vec<Pattern>> {
9534    // Map each freshly-created variable to the index of the pattern that
9535    // introduces it, skipping any variable already bound upstream.
9536    let mut owner: HashMap<String, usize> = HashMap::new();
9537    for (idx, pattern) in patterns.iter().enumerate() {
9538        for var in crate::query::df_graph::mutation_common::pattern_variable_names(pattern) {
9539            if bound_vars.contains(&var) {
9540                continue;
9541            }
9542            owner.entry(var).or_insert(idx);
9543        }
9544    }
9545
9546    let mut out = patterns.to_vec();
9547    for item in items {
9548        let SetItem::Property { expr, value } = item else {
9549            return None; // `+=`, label set, or whole-entity map assignment
9550        };
9551        let Expr::Property(base, prop) = expr else {
9552            return None; // not a property target
9553        };
9554        let Expr::Variable(var) = base.as_ref() else {
9555            return None; // e.g. `n[expr].x` or a deeper path
9556        };
9557        let Some(&idx) = owner.get(var) else {
9558            return None; // target is a MATCHed (not created) variable
9559        };
9560        // Evaluating the value at create time must equal evaluating it at SET
9561        // time: reject any reference to a variable created in this statement
9562        // (its value may not yet exist when the element is constructed).
9563        if collect_expr_variables(value)
9564            .iter()
9565            .any(|referenced| owner.contains_key(referenced))
9566        {
9567            return None;
9568        }
9569        if !merge_pattern_property(&mut out[idx], var, prop, value) {
9570            return None; // element absent or has a non-map property form
9571        }
9572    }
9573    Some(out)
9574}
9575
9576/// Merge `var.prop = value` into the matching element's inline property map.
9577///
9578/// Returns `false` (leaving the pattern unchanged) when the variable's element
9579/// is not found or its existing properties are a non-map expression that cannot
9580/// be merged. Any pre-existing entry for `prop` is replaced so the SET's
9581/// last-write-wins precedence is preserved.
9582fn merge_pattern_property(pattern: &mut Pattern, var: &str, prop: &str, value: &Expr) -> bool {
9583    for path in &mut pattern.paths {
9584        if merge_into_elements(&mut path.elements, var, prop, value) {
9585            return true;
9586        }
9587    }
9588    false
9589}
9590
9591/// Recursive worker for [`merge_pattern_property`] over a list of elements.
9592fn merge_into_elements(
9593    elements: &mut [PatternElement],
9594    var: &str,
9595    prop: &str,
9596    value: &Expr,
9597) -> bool {
9598    for element in elements {
9599        match element {
9600            PatternElement::Node(n) if n.variable.as_deref() == Some(var) => {
9601                return set_map_property(&mut n.properties, prop, value.clone());
9602            }
9603            PatternElement::Relationship(r) if r.variable.as_deref() == Some(var) => {
9604                return set_map_property(&mut r.properties, prop, value.clone());
9605            }
9606            PatternElement::Parenthesized { pattern, .. } => {
9607                if merge_into_elements(&mut pattern.elements, var, prop, value) {
9608                    return true;
9609                }
9610            }
9611            _ => {}
9612        }
9613    }
9614    false
9615}
9616
9617/// Set `prop = value` on an optional inline property map, last-write-wins.
9618///
9619/// Returns `false` without mutating when the properties are present but are not
9620/// a map literal (e.g. `CREATE (n $params)`), which cannot accept a single key.
9621fn set_map_property(props: &mut Option<Expr>, prop: &str, value: Expr) -> bool {
9622    match props {
9623        None => {
9624            *props = Some(Expr::Map(vec![(prop.to_string(), value)]));
9625            true
9626        }
9627        Some(Expr::Map(entries)) => {
9628            entries.retain(|(k, _)| k != prop);
9629            entries.push((prop.to_string(), value));
9630            true
9631        }
9632        Some(_) => false,
9633    }
9634}
9635
9636/// Walk a [`LogicalPlan`] tree and rewrite each `Scan` whose target
9637/// `(label, column)` has a registered fork-local index into the
9638/// matching `FusedIndexScan` variant.
9639///
9640/// Phase 5a-impl Step 4 covers `VidUidForkFirst`; Steps 5 and 6 add
9641/// `BtreeUnion` and `SortedKWayMerge` by extending `kind_for_filter`.
9642///
9643/// Idempotent: a tree that already contains `FusedIndexScan` nodes
9644/// passes through unchanged.
9645#[must_use]
9646pub fn rewrite_for_fork_fusion<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9647    rewrite_node(plan, lookup)
9648}
9649
9650fn rewrite_node<L: ForkIndexLookup>(plan: LogicalPlan, lookup: &L) -> LogicalPlan {
9651    match plan {
9652        LogicalPlan::Scan {
9653            label_id,
9654            labels,
9655            variable,
9656            filter,
9657            optional,
9658        } => {
9659            // VidUid fusion only fires on a single-label scan with an
9660            // equality filter on a registered UID column. BTree and
9661            // Sorted will extend this match in Steps 5 and 6.
9662            let kind = if labels.len() == 1
9663                && let Some(col) = filter
9664                    .as_ref()
9665                    .and_then(|f| equality_target_column(f, &variable))
9666                && let Some(idx_kind) = lookup.fork_index_for(&labels[0], &col)
9667            {
9668                into_fusion_kind(idx_kind)
9669            } else {
9670                None
9671            };
9672            match kind {
9673                Some(kind) => LogicalPlan::FusedIndexScan {
9674                    label_id,
9675                    labels,
9676                    variable,
9677                    filter,
9678                    optional,
9679                    kind,
9680                },
9681                None => LogicalPlan::Scan {
9682                    label_id,
9683                    labels,
9684                    variable,
9685                    filter,
9686                    optional,
9687                },
9688            }
9689        }
9690        // Phase 5b followup: wrap lossy leaf operators when a
9691        // matching fork-local index has been registered. The wrap
9692        // preserves the original node's fields (the physical
9693        // planner unwraps and recurses); only the explain-plan
9694        // surface and runtime-stats operator name change. The
9695        // actual fusion still happens at the `BranchedBackend`
9696        // layer via Lance's per-branch reads.
9697        //
9698        // The CALL-style vector/FTS queries land as `ProcedureCall`
9699        // (not the dedicated `VectorKnn`/`InvertedIndexLookup`
9700        // operators); recognize those by procedure name and the
9701        // shape of their first two arguments (`label, column, ...`).
9702        LogicalPlan::ProcedureCall {
9703            procedure_name,
9704            arguments,
9705            yield_items,
9706        } => {
9707            let kind = procedure_call_fusion_kind(&procedure_name, &arguments, lookup);
9708            let inner = LogicalPlan::ProcedureCall {
9709                procedure_name,
9710                arguments,
9711                yield_items,
9712            };
9713            match kind {
9714                Some(kind) => LogicalPlan::FusedIndexScanWrapped {
9715                    inner: Box::new(inner),
9716                    kind,
9717                },
9718                None => inner,
9719            }
9720        }
9721        LogicalPlan::VectorKnn {
9722            label_id,
9723            variable,
9724            property,
9725            query,
9726            k,
9727            threshold,
9728        } => {
9729            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9730                && let Some(kind) = into_fusion_kind(idx_kind)
9731            {
9732                LogicalPlan::FusedIndexScanWrapped {
9733                    inner: Box::new(LogicalPlan::VectorKnn {
9734                        label_id,
9735                        variable,
9736                        property,
9737                        query,
9738                        k,
9739                        threshold,
9740                    }),
9741                    kind,
9742                }
9743            } else {
9744                LogicalPlan::VectorKnn {
9745                    label_id,
9746                    variable,
9747                    property,
9748                    query,
9749                    k,
9750                    threshold,
9751                }
9752            }
9753        }
9754        LogicalPlan::InvertedIndexLookup {
9755            label_id,
9756            variable,
9757            property,
9758            terms,
9759        } => {
9760            if let Some(idx_kind) = lookup.fork_index_for_label_id(label_id, &property)
9761                && let Some(kind) = into_fusion_kind(idx_kind)
9762            {
9763                LogicalPlan::FusedIndexScanWrapped {
9764                    inner: Box::new(LogicalPlan::InvertedIndexLookup {
9765                        label_id,
9766                        variable,
9767                        property,
9768                        terms,
9769                    }),
9770                    kind,
9771                }
9772            } else {
9773                LogicalPlan::InvertedIndexLookup {
9774                    label_id,
9775                    variable,
9776                    property,
9777                    terms,
9778                }
9779            }
9780        }
9781        // Tree-recursive variants — only the ones that can carry a
9782        // Scan in their subtree need to recurse here. Adding more is
9783        // safe (a missing recursion just means fusion doesn't fire
9784        // for that nested context, not incorrect results).
9785        LogicalPlan::Filter {
9786            input,
9787            predicate,
9788            optional_variables,
9789        } => LogicalPlan::Filter {
9790            input: Box::new(rewrite_node(*input, lookup)),
9791            predicate,
9792            optional_variables,
9793        },
9794        LogicalPlan::Project { input, projections } => LogicalPlan::Project {
9795            input: Box::new(rewrite_node(*input, lookup)),
9796            projections,
9797        },
9798        LogicalPlan::Limit { input, skip, fetch } => LogicalPlan::Limit {
9799            input: Box::new(rewrite_node(*input, lookup)),
9800            skip,
9801            fetch,
9802        },
9803        LogicalPlan::Sort { input, order_by } => {
9804            // Phase 5a-impl Sorted fusion: when the immediate child
9805            // is a single-label Scan AND the sole sort key is a
9806            // single-column property reference on that scan's
9807            // variable AND the column has a fork-local Sorted index
9808            // registered, rewrite to FusedIndexScan { SortedKWayMerge }.
9809            // Otherwise recurse normally.
9810            let new_input = match (*input, &order_by[..]) {
9811                (
9812                    LogicalPlan::Scan {
9813                        label_id,
9814                        labels,
9815                        variable,
9816                        filter,
9817                        optional,
9818                    },
9819                    [single_sort],
9820                ) if labels.len() == 1
9821                    && let Some(col) = column_of_scan_variable(&single_sort.expr, &variable)
9822                    && let Some(uni_store::fork::ForkLocalIndexKind::Sorted) =
9823                        lookup.fork_index_for(&labels[0], &col) =>
9824                {
9825                    LogicalPlan::FusedIndexScan {
9826                        label_id,
9827                        labels,
9828                        variable,
9829                        filter,
9830                        optional,
9831                        kind: FusionKind::SortedKWayMerge,
9832                    }
9833                }
9834                (other_input, _) => rewrite_node(other_input, lookup),
9835            };
9836            LogicalPlan::Sort {
9837                input: Box::new(new_input),
9838                order_by,
9839            }
9840        }
9841        LogicalPlan::Union { left, right, all } => LogicalPlan::Union {
9842            left: Box::new(rewrite_node(*left, lookup)),
9843            right: Box::new(rewrite_node(*right, lookup)),
9844            all,
9845        },
9846        // Everything else passes through unchanged. Adding more
9847        // arms is purely additive — fusion just doesn't fire inside
9848        // un-recursed-into subtrees.
9849        other => other,
9850    }
9851}
9852
9853/// Phase 5b followup: inspect a CALL-style procedure invocation
9854/// for a `(label, column)` pair and check whether a fork-local
9855/// index has been registered for it.
9856///
9857/// Recognizes:
9858/// - `uni.vector.query(label, column, query_vec, k)` → `AnnRerank`
9859///   when a `Vector` fork-local index exists.
9860/// - `uni.fts.query(label, column, query, k)` → `Bm25Rrf` when a
9861///   `FullText` fork-local index exists.
9862///
9863/// Returns `None` for any other procedure (no rewrite) or when the
9864/// registry has no matching entry.
9865fn procedure_call_fusion_kind<L: ForkIndexLookup>(
9866    procedure_name: &str,
9867    arguments: &[Expr],
9868    lookup: &L,
9869) -> Option<FusionKind> {
9870    if arguments.len() < 2 {
9871        return None;
9872    }
9873    let label = match &arguments[0] {
9874        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9875        _ => return None,
9876    };
9877    let column = match &arguments[1] {
9878        Expr::Literal(uni_cypher::ast::CypherLiteral::String(s)) => s.as_str(),
9879        _ => return None,
9880    };
9881    let expected = match procedure_name {
9882        "uni.vector.query" => uni_store::fork::ForkLocalIndexKind::Vector,
9883        "uni.fts.query" => uni_store::fork::ForkLocalIndexKind::FullText,
9884        _ => return None,
9885    };
9886    let registered = lookup.fork_index_for(label, column)?;
9887    if registered != expected {
9888        return None;
9889    }
9890    into_fusion_kind(registered)
9891}
9892
9893/// Map a fork-local index kind to its planner-side fusion variant.
9894/// Returns `None` for any future `ForkLocalIndexKind` we don't yet
9895/// know how to fuse — the caller falls back to a regular Scan.
9896fn into_fusion_kind(kind: uni_store::fork::ForkLocalIndexKind) -> Option<FusionKind> {
9897    use uni_store::fork::ForkLocalIndexKind as K;
9898    match kind {
9899        K::VidUid => Some(FusionKind::VidUidForkFirst),
9900        K::ScalarBtree => Some(FusionKind::BtreeUnion),
9901        K::Sorted => Some(FusionKind::SortedKWayMerge),
9902        K::Vector => Some(FusionKind::AnnRerank),
9903        K::FullText => Some(FusionKind::Bm25Rrf),
9904        // `ForkLocalIndexKind` is `#[non_exhaustive]`; future kinds
9905        // we don't yet handle are silently passed through as a
9906        // regular Scan so a forward-incompatible binary doesn't
9907        // panic — just misses the fusion opportunity.
9908        _ => None,
9909    }
9910}
9911
9912/// Inspect a Scan filter `Expr` for a single-column equality predicate
9913/// against the scan's variable. Returns the column name if the
9914/// predicate matches the shape `variable.column = <literal_or_param>`
9915/// (or its commuted form). Returns `None` for any other shape — fusion
9916/// only fires on the simple case in Phase 5a-impl.
9917fn equality_target_column(filter: &Expr, scan_variable: &str) -> Option<String> {
9918    let (lhs, rhs) = match filter {
9919        Expr::BinaryOp {
9920            left,
9921            op: uni_cypher::ast::BinaryOp::Eq,
9922            right,
9923        } => (left.as_ref(), right.as_ref()),
9924        _ => return None,
9925    };
9926    // Try lhs = column-of-scan-var, rhs = literal/param; or commuted.
9927    if let Some(col) = column_of_scan_variable(lhs, scan_variable)
9928        && is_constant_or_param(rhs)
9929    {
9930        return Some(col);
9931    }
9932    if let Some(col) = column_of_scan_variable(rhs, scan_variable)
9933        && is_constant_or_param(lhs)
9934    {
9935        return Some(col);
9936    }
9937    None
9938}
9939
9940fn column_of_scan_variable(expr: &Expr, scan_variable: &str) -> Option<String> {
9941    if let Expr::Property(base, prop) = expr
9942        && let Expr::Variable(v) = base.as_ref()
9943        && v == scan_variable
9944    {
9945        return Some(prop.clone());
9946    }
9947    None
9948}
9949
9950fn is_constant_or_param(expr: &Expr) -> bool {
9951    matches!(expr, Expr::Literal(_) | Expr::Parameter(_))
9952}
9953
9954#[cfg(test)]
9955mod pushdown_tests {
9956    use super::*;
9957
9958    #[test]
9959    fn test_validat_extracts_property_names() {
9960        // validAt(e, 'start', 'end', ts) → e: {start, end}
9961        let mut properties = HashMap::new();
9962
9963        let args = vec![
9964            Expr::Variable("e".to_string()),
9965            Expr::Literal(CypherLiteral::String("start".to_string())),
9966            Expr::Literal(CypherLiteral::String("end".to_string())),
9967            Expr::Variable("ts".to_string()),
9968        ];
9969
9970        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
9971
9972        assert!(properties.contains_key("e"));
9973        let e_props: HashSet<String> = ["start".to_string(), "end".to_string()]
9974            .iter()
9975            .cloned()
9976            .collect();
9977        assert_eq!(properties.get("e").unwrap(), &e_props);
9978    }
9979
9980    #[test]
9981    fn test_keys_requires_wildcard() {
9982        // keys(n) → n: {*}
9983        let mut properties = HashMap::new();
9984
9985        let args = vec![Expr::Variable("n".to_string())];
9986
9987        analyze_function_property_requirements("keys", &args, &mut properties);
9988
9989        assert!(properties.contains_key("n"));
9990        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
9991        assert_eq!(properties.get("n").unwrap(), &n_props);
9992    }
9993
9994    #[test]
9995    fn test_properties_requires_wildcard() {
9996        // properties(n) → n: {*}
9997        let mut properties = HashMap::new();
9998
9999        let args = vec![Expr::Variable("n".to_string())];
10000
10001        analyze_function_property_requirements("properties", &args, &mut properties);
10002
10003        assert!(properties.contains_key("n"));
10004        let n_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
10005        assert_eq!(properties.get("n").unwrap(), &n_props);
10006    }
10007
10008    #[test]
10009    fn test_unknown_function_conservative() {
10010        // customUdf(e) → e: {*}
10011        let mut properties = HashMap::new();
10012
10013        let args = vec![Expr::Variable("e".to_string())];
10014
10015        analyze_function_property_requirements("customUdf", &args, &mut properties);
10016
10017        assert!(properties.contains_key("e"));
10018        let e_props: HashSet<String> = ["*".to_string()].iter().cloned().collect();
10019        assert_eq!(properties.get("e").unwrap(), &e_props);
10020    }
10021
10022    #[test]
10023    fn test_parameter_property_name() {
10024        // validAt(e, $start, $end, ts) → e: {*}
10025        let mut properties = HashMap::new();
10026
10027        let args = vec![
10028            Expr::Variable("e".to_string()),
10029            Expr::Parameter("start".to_string()),
10030            Expr::Parameter("end".to_string()),
10031            Expr::Variable("ts".to_string()),
10032        ];
10033
10034        analyze_function_property_requirements("uni.temporal.validAt", &args, &mut properties);
10035
10036        assert!(properties.contains_key("e"));
10037        assert!(properties.get("e").unwrap().contains("*"));
10038    }
10039
10040    #[test]
10041    fn test_validat_expr_extracts_properties() {
10042        // Test Expr::ValidAt variant property extraction
10043        let mut properties = HashMap::new();
10044
10045        let validat_expr = Expr::ValidAt {
10046            entity: Box::new(Expr::Variable("e".to_string())),
10047            timestamp: Box::new(Expr::Variable("ts".to_string())),
10048            start_prop: Some("valid_from".to_string()),
10049            end_prop: Some("valid_to".to_string()),
10050        };
10051
10052        collect_properties_from_expr_into(&validat_expr, &mut properties);
10053
10054        assert!(properties.contains_key("e"));
10055        assert!(properties.get("e").unwrap().contains("valid_from"));
10056        assert!(properties.get("e").unwrap().contains("valid_to"));
10057    }
10058
10059    #[test]
10060    fn test_array_index_requires_wildcard() {
10061        // e[prop] → e: {*}
10062        let mut properties = HashMap::new();
10063
10064        let array_index_expr = Expr::ArrayIndex {
10065            array: Box::new(Expr::Variable("e".to_string())),
10066            index: Box::new(Expr::Variable("prop".to_string())),
10067        };
10068
10069        collect_properties_from_expr_into(&array_index_expr, &mut properties);
10070
10071        assert!(properties.contains_key("e"));
10072        assert!(properties.get("e").unwrap().contains("*"));
10073    }
10074
10075    #[test]
10076    fn test_property_access_extraction() {
10077        // e.name → e: {name}
10078        let mut properties = HashMap::new();
10079
10080        let prop_access = Expr::Property(
10081            Box::new(Expr::Variable("e".to_string())),
10082            "name".to_string(),
10083        );
10084
10085        collect_properties_from_expr_into(&prop_access, &mut properties);
10086
10087        assert!(properties.contains_key("e"));
10088        assert!(properties.get("e").unwrap().contains("name"));
10089    }
10090}